From 958de668197651bbf2b4b9528f204ab5a0f1af65 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Oct 2019 21:07:31 +0200 Subject: module: Remove set_all_modules_text_*() Now that there are no users of set_all_modules_text_*() left, remove it. While it appears nds32 uses it, it does not have STRICT_MODULE_RWX and therefore ends up with the NOP stubs. Tested-by: Alexei Starovoitov Tested-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexei Starovoitov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Greentime Hu Cc: H. Peter Anvin Cc: Jessica Yu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vincent Chen Link: https://lkml.kernel.org/r/20191111132458.284298307@infradead.org Signed-off-by: Ingo Molnar --- include/linux/module.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 6d20895e7739..daae84705040 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -846,13 +846,9 @@ extern int module_sysfs_initialized; #define __MODULE_STRING(x) __stringify(x) #ifdef CONFIG_STRICT_MODULE_RWX -extern void set_all_modules_text_rw(void); -extern void set_all_modules_text_ro(void); extern void module_enable_ro(const struct module *mod, bool after_init); extern void module_disable_ro(const struct module *mod); #else -static inline void set_all_modules_text_rw(void) { } -static inline void set_all_modules_text_ro(void) { } static inline void module_enable_ro(const struct module *mod, bool after_init) { } static inline void module_disable_ro(const struct module *mod) { } #endif -- cgit v1.2.3 From 04ae87a52074e2d448fc66143f1bd2c7d694d2b9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 24 Oct 2019 22:26:59 +0200 Subject: ftrace: Rework event_create_dir() Rework event_create_dir() to use an array of static data instead of function pointers where possible. The problem is that it would call the function pointer on module load before parse_args(), possibly even before jump_labels were initialized. Luckily the generated functions don't use jump_labels but it still seems fragile. It also gets in the way of changing when we make the module map executable. The generated function are basically calling trace_define_field() with a bunch of static arguments. So instead of a function, capture these arguments in a static array, avoiding the function call. Now there are a number of cases where the fields are dynamic (syscall arguments, kprobes and uprobes), in which case a static array does not work, for these we preserve the function call. Luckily all these cases are not related to modules and so we can retain the function call for them. Also fix up all broken tracepoint definitions that now generate a compile error. Tested-by: Alexei Starovoitov Tested-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Acked-by: Alexei Starovoitov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191111132458.342979914@infradead.org Signed-off-by: Ingo Molnar --- include/linux/trace_events.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 30a8cdcfd4a4..a379255c14a9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -187,6 +187,22 @@ enum trace_reg { struct trace_event_call; +#define TRACE_FUNCTION_TYPE ((const char *)~0UL) + +struct trace_event_fields { + const char *type; + union { + struct { + const char *name; + const int size; + const int align; + const int is_signed; + const int filter_type; + }; + int (*define_fields)(struct trace_event_call *); + }; +}; + struct trace_event_class { const char *system; void *probe; @@ -195,7 +211,7 @@ struct trace_event_class { #endif int (*reg)(struct trace_event_call *event, enum trace_reg type, void *data); - int (*define_fields)(struct trace_event_call *); + struct trace_event_fields *fields_array; struct list_head *(*get_fields)(struct trace_event_call *); struct list_head fields; int (*raw_init)(struct trace_event_call *); -- cgit v1.2.3 From 2496396fcb44404ead24b578c583d5286886e857 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Oct 2019 21:18:10 +0200 Subject: sched/rt, fs: Use CONFIG_PREEMPTION MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_PREEMPTION is selected by CONFIG_PREEMPT and by CONFIG_PREEMPT_RT. Both PREEMPT and PREEMPT_RT require the same functionality which today depends on CONFIG_PREEMPT. Switch the i_size() and part_nr_sects_…() code over to use CONFIG_PREEMPTION. Update the comment for fsstack_copy_inode_size() also to refer to CONFIG_PREEMPTION. [bigeasy: +PREEMPT comments] Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Alexander Viro Cc: Linus Torvalds Cc: Peter Zijlstra Cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20191015191821.11479-24-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/fs.h | 4 ++-- include/linux/genhd.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 98e0349adb52..dddfcbb140a7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -855,7 +855,7 @@ static inline loff_t i_size_read(const struct inode *inode) i_size = inode->i_size; } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); return i_size; -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) loff_t i_size; preempt_disable(); @@ -880,7 +880,7 @@ static inline void i_size_write(struct inode *inode, loff_t i_size) inode->i_size = i_size; write_seqcount_end(&inode->i_size_seqcount); preempt_enable(); -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) preempt_disable(); inode->i_size = i_size; preempt_enable(); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 8bb63027e4d6..a927829bb73a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -718,7 +718,7 @@ static inline void hd_free_part(struct hd_struct *part) * accessor function. * * Code written along the lines of i_size_read() and i_size_write(). - * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption + * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption * on. */ static inline sector_t part_nr_sects_read(struct hd_struct *part) @@ -731,7 +731,7 @@ static inline sector_t part_nr_sects_read(struct hd_struct *part) nr_sects = part->nr_sects; } while (read_seqcount_retry(&part->nr_sects_seq, seq)); return nr_sects; -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) sector_t nr_sects; preempt_disable(); @@ -754,7 +754,7 @@ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) write_seqcount_begin(&part->nr_sects_seq); part->nr_sects = size; write_seqcount_end(&part->nr_sects_seq); -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) preempt_disable(); part->nr_sects = size; preempt_enable(); -- cgit v1.2.3 From b50b0580d27bc45a0637aefc8bac4d31aa85771a Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 25 Nov 2019 14:48:57 +0100 Subject: net: add queue argument to __skb_wait_for_more_packets and __skb_{,try_}recv_datagram This will be used by ESP over TCP to handle the queue of IKE messages. Signed-off-by: Sabrina Dubroca Acked-by: David S. Miller Signed-off-by: Steffen Klassert --- include/linux/skbuff.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e9133bcf0544..49a10f9cc538 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3459,7 +3459,8 @@ static inline void skb_frag_list_init(struct sk_buff *skb) for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) -int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, +int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, + int *err, long *timeo_p, const struct sk_buff *skb); struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, struct sk_buff_head *queue, @@ -3468,12 +3469,16 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, struct sk_buff *skb), int *off, int *err, struct sk_buff **last); -struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags, +struct sk_buff *__skb_try_recv_datagram(struct sock *sk, + struct sk_buff_head *queue, + unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb), int *off, int *err, struct sk_buff **last); -struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, +struct sk_buff *__skb_recv_datagram(struct sock *sk, + struct sk_buff_head *sk_queue, + unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb), int *off, int *err); -- cgit v1.2.3 From df1e849ae4559544ff00ff5052eefe2479750539 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 27 Nov 2019 16:36:45 -0800 Subject: rcu: Enable tick for nohz_full CPUs slow to provide expedited QS An expedited grace period can be stalled by a nohz_full CPU looping in kernel context. This possibility is currently handled by some carefully crafted checks in rcu_read_unlock_special() that enlist help from ksoftirqd when permitted by the scheduler. However, it is exactly these checks that require the scheduler avoid holding any of its rq or pi locks across rcu_read_unlock() without also having held them across the entire RCU read-side critical section. It would therefore be very nice if expedited grace periods could handle nohz_full CPUs looping in kernel context without such checks. This commit therefore adds code to the expedited grace period's wait and cleanup code that forces the scheduler-clock interrupt on for CPUs that fail to quickly supply a quiescent state. "Quickly" is currently a hard-coded single-jiffy delay. Signed-off-by: Paul E. McKenney --- include/linux/tick.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 7896f792d3b0..7340613c7eff 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -109,8 +109,10 @@ enum tick_dep_bits { TICK_DEP_BIT_PERF_EVENTS = 1, TICK_DEP_BIT_SCHED = 2, TICK_DEP_BIT_CLOCK_UNSTABLE = 3, - TICK_DEP_BIT_RCU = 4 + TICK_DEP_BIT_RCU = 4, + TICK_DEP_BIT_RCU_EXP = 5 }; +#define TICK_DEP_BIT_MAX TICK_DEP_BIT_RCU_EXP #define TICK_DEP_MASK_NONE 0 #define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER) @@ -118,6 +120,7 @@ enum tick_dep_bits { #define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED) #define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE) #define TICK_DEP_MASK_RCU (1 << TICK_DEP_BIT_RCU) +#define TICK_DEP_MASK_RCU_EXP (1 << TICK_DEP_BIT_RCU_EXP) #ifdef CONFIG_NO_HZ_COMMON extern bool tick_nohz_enabled; -- cgit v1.2.3 From f452ee096d95482892b101bde4fd037fa025d3cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 4 Oct 2019 23:54:02 +0200 Subject: rculist: Describe variadic macro argument in a Sphinx-compatible way MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this patch, Sphinx shows "variable arguments" as the description of the cond argument, rather than the intended description, and prints the following warnings: ./include/linux/rculist.h:374: warning: Excess function parameter 'cond' description in 'list_for_each_entry_rcu' ./include/linux/rculist.h:651: warning: Excess function parameter 'cond' description in 'hlist_for_each_entry_rcu' Signed-off-by: Jonathan Neuschäfer Acked-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 4158b7212936..61c6728a71f7 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -361,7 +361,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the list_head within the struct. - * @cond: optional lockdep expression if called from non-RCU protection. + * @cond...: optional lockdep expression if called from non-RCU protection. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as list_add_rcu() @@ -636,7 +636,7 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. - * @cond: optional lockdep expression if called from non-RCU protection. + * @cond...: optional lockdep expression if called from non-RCU protection. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as hlist_add_head_rcu() -- cgit v1.2.3 From c54a2744497db4b6887b9c905ef7aa0b3620c956 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 11:37:37 -0800 Subject: list: Add hlist_unhashed_lockless() We would like to use hlist_unhashed() from timer_pending(), which runs without protection of a lock. Note that other callers might also want to use this variant. Instead of forcing a READ_ONCE() for all hlist_unhashed() callers, add a new helper with an explicit _lockless suffix in the name to better document what is going on. Also add various WRITE_ONCE() in __hlist_del(), hlist_add_head() and hlist_add_before()/hlist_add_behind() to pair with the READ_ONCE(). Signed-off-by: Eric Dumazet Cc: Thomas Gleixner [ paulmck: Also add WRITE_ONCE() to rculist.h. ] Signed-off-by: Paul E. McKenney --- include/linux/list.h | 32 +++++++++++++++++++++----------- include/linux/rculist.h | 24 ++++++++++++------------ 2 files changed, 33 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 85c92555e31f..61f5aaf96192 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -749,6 +749,16 @@ static inline int hlist_unhashed(const struct hlist_node *h) return !h->pprev; } +/* This variant of hlist_unhashed() must be used in lockless contexts + * to avoid potential load-tearing. + * The READ_ONCE() is paired with the various WRITE_ONCE() in hlist + * helpers that are defined below. + */ +static inline int hlist_unhashed_lockless(const struct hlist_node *h) +{ + return !READ_ONCE(h->pprev); +} + static inline int hlist_empty(const struct hlist_head *h) { return !READ_ONCE(h->first); @@ -761,7 +771,7 @@ static inline void __hlist_del(struct hlist_node *n) WRITE_ONCE(*pprev, next); if (next) - next->pprev = pprev; + WRITE_ONCE(next->pprev, pprev); } static inline void hlist_del(struct hlist_node *n) @@ -782,32 +792,32 @@ static inline void hlist_del_init(struct hlist_node *n) static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) { struct hlist_node *first = h->first; - n->next = first; + WRITE_ONCE(n->next, first); if (first) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); WRITE_ONCE(h->first, n); - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); } /* next must be != NULL */ static inline void hlist_add_before(struct hlist_node *n, struct hlist_node *next) { - n->pprev = next->pprev; - n->next = next; - next->pprev = &n->next; + WRITE_ONCE(n->pprev, next->pprev); + WRITE_ONCE(n->next, next); + WRITE_ONCE(next->pprev, &n->next); WRITE_ONCE(*(n->pprev), n); } static inline void hlist_add_behind(struct hlist_node *n, struct hlist_node *prev) { - n->next = prev->next; - prev->next = n; - n->pprev = &prev->next; + WRITE_ONCE(n->next, prev->next); + WRITE_ONCE(prev->next, n); + WRITE_ONCE(n->pprev, &prev->next); if (n->next) - n->next->pprev = &n->next; + WRITE_ONCE(n->next->pprev, &n->next); } /* after that we'll appear to be on some hlist and hlist_del will work */ diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 61c6728a71f7..4b7ae1bf50b3 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -173,7 +173,7 @@ static inline void hlist_del_init_rcu(struct hlist_node *n) { if (!hlist_unhashed(n)) { __hlist_del(n); - n->pprev = NULL; + WRITE_ONCE(n->pprev, NULL); } } @@ -473,7 +473,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, static inline void hlist_del_rcu(struct hlist_node *n) { __hlist_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** @@ -489,11 +489,11 @@ static inline void hlist_replace_rcu(struct hlist_node *old, struct hlist_node *next = old->next; new->next = next; - new->pprev = old->pprev; + WRITE_ONCE(new->pprev, old->pprev); rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new); if (next) - new->next->pprev = &new->next; - old->pprev = LIST_POISON2; + WRITE_ONCE(new->next->pprev, &new->next); + WRITE_ONCE(old->pprev, LIST_POISON2); } /* @@ -528,10 +528,10 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, struct hlist_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); rcu_assign_pointer(hlist_first_rcu(h), n); if (first) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); } /** @@ -564,7 +564,7 @@ static inline void hlist_add_tail_rcu(struct hlist_node *n, if (last) { n->next = last->next; - n->pprev = &last->next; + WRITE_ONCE(n->pprev, &last->next); rcu_assign_pointer(hlist_next_rcu(last), n); } else { hlist_add_head_rcu(n, h); @@ -592,10 +592,10 @@ static inline void hlist_add_tail_rcu(struct hlist_node *n, static inline void hlist_add_before_rcu(struct hlist_node *n, struct hlist_node *next) { - n->pprev = next->pprev; + WRITE_ONCE(n->pprev, next->pprev); n->next = next; rcu_assign_pointer(hlist_pprev_rcu(n), n); - next->pprev = &n->next; + WRITE_ONCE(next->pprev, &n->next); } /** @@ -620,10 +620,10 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, struct hlist_node *prev) { n->next = prev->next; - n->pprev = &prev->next; + WRITE_ONCE(n->pprev, &prev->next); rcu_assign_pointer(hlist_next_rcu(prev), n); if (n->next) - n->next->pprev = &n->next; + WRITE_ONCE(n->next->pprev, &n->next); } #define __hlist_for_each_rcu(pos, head) \ -- cgit v1.2.3 From b3e627d3d5092a87fc9b9e37e341610cfecfbfdc Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Oct 2019 02:55:57 +0000 Subject: rcu: Make PREEMPT_RCU be a modifier to TREE_RCU Currently PREEMPT_RCU and TREE_RCU are mutually exclusive Kconfig options. But PREEMPT_RCU actually specifies a kind of TREE_RCU, namely a preemptible TREE_RCU. This commit therefore makes PREEMPT_RCU be a modifer to the TREE_RCU Kconfig option. This has the benefit of simplifying several of the #if expressions that formerly needed to check both, but now need only check one or the other. Signed-off-by: Lai Jiangshan Signed-off-by: Lai Jiangshan Reviewed-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 0b7506330c87..70a41cd8f58d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -167,7 +167,7 @@ do { \ * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. */ -#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) +#if defined(CONFIG_TREE_RCU) #include #elif defined(CONFIG_TINY_RCU) #include @@ -601,7 +601,7 @@ do { \ * read-side critical section that would block in a !PREEMPT kernel. * But if you want the full story, read on! * - * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), + * In non-preemptible RCU implementations (pure TREE_RCU and TINY_RCU), * it is illegal to block while in an RCU read-side critical section. * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPTION * kernel builds, RCU read-side critical sections may be preempted, -- cgit v1.2.3 From 90326f0521a88004194f88f1b597b54347482b5c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 15 Oct 2019 21:18:14 +0200 Subject: rcu: Use CONFIG_PREEMPTION where appropriate The config option `CONFIG_PREEMPT' is used for the preemption model "Low-Latency Desktop". The config option `CONFIG_PREEMPTION' is enabled when kernel preemption is enabled which is true for the preemption model `CONFIG_PREEMPT' and `CONFIG_PREEMPT_RT'. Use `CONFIG_PREEMPTION' if it applies to both preemption models and not just to `CONFIG_PREEMPT'. Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Lai Jiangshan Cc: Joel Fernandes Cc: Davidlohr Bueso Cc: rcu@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 70a41cd8f58d..eb32fff81c30 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -154,7 +154,7 @@ static inline void exit_tasks_rcu_finish(void) { } * * This macro resembles cond_resched(), except that it is defined to * report potential quiescent states to RCU-tasks even if the cond_resched() - * machinery were to be shut off, as some advocate for PREEMPT kernels. + * machinery were to be shut off, as some advocate for PREEMPTION kernels. */ #define cond_resched_tasks_rcu_qs() \ do { \ @@ -598,7 +598,7 @@ do { \ * * You can avoid reading and understanding the next paragraph by * following this rule: don't put anything in an rcu_read_lock() RCU - * read-side critical section that would block in a !PREEMPT kernel. + * read-side critical section that would block in a !PREEMPTION kernel. * But if you want the full story, read on! * * In non-preemptible RCU implementations (pure TREE_RCU and TINY_RCU), -- cgit v1.2.3 From 1f059dfdf5d170dccbac92193be2fee3c1763384 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 28 Nov 2019 08:19:36 +0100 Subject: mm/vmalloc: Add empty headers and use them from In the x86 MM code we'd like to untangle various types of historic header dependency spaghetti, but for this we'd need to pass to the generic vmalloc code various vmalloc related defines that customarily come via the low level arch header. Signed-off-by: Ingo Molnar --- include/linux/vmalloc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index a4b241102771..ec3813236699 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -10,6 +10,8 @@ #include #include +#include + struct vm_area_struct; /* vma defining user mapping in mm_types.h */ struct notifier_block; /* in notifier.h */ -- cgit v1.2.3 From 186525bd6b83efc592672e2d6185e4d7c810d2b4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Nov 2019 08:17:25 +0100 Subject: mm, x86/mm: Untangle address space layout definitions from basic pgtable type definitions - Untangle the somewhat incestous way of how VMALLOC_START is used all across the kernel, but is, on x86, defined deep inside one of the lowest level page table headers. It doesn't help that vmalloc.h only includes a single asm header: #include /* pgprot_t */ So there was no existing cross-arch way to decouple address layout definitions from page.h details. I used this: #ifndef VMALLOC_START # include #endif This way every architecture that wants to simplify page.h can do so. - Also on x86 we had a couple of LDT related inline functions that used the late-stage address space layout positions - but these could be uninlined without real trouble - the end result is cleaner this way as well. Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Borislav Petkov Cc: Linus Torvalds Cc: Andrew Morton Cc: Rik van Riel Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- include/linux/mm.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c97ea3b694e6..fb8f9412e2cf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -625,24 +625,19 @@ unsigned long vmalloc_to_pfn(const void *addr); * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there * is no special casing required. */ -static inline bool is_vmalloc_addr(const void *x) -{ -#ifdef CONFIG_MMU - unsigned long addr = (unsigned long)x; - - return addr >= VMALLOC_START && addr < VMALLOC_END; -#else - return false; -#endif -} #ifndef is_ioremap_addr #define is_ioremap_addr(x) is_vmalloc_addr(x) #endif #ifdef CONFIG_MMU +extern bool is_vmalloc_addr(const void *x); extern int is_vmalloc_or_module_addr(const void *x); #else +static inline bool is_vmalloc_addr(const void *x) +{ + return false; +} static inline int is_vmalloc_or_module_addr(const void *x) { return 0; -- cgit v1.2.3 From bbefa1dd6a6d53537c11624752219e39959d04fb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 Nov 2019 15:58:45 +0800 Subject: crypto: pcrypt - Avoid deadlock by using per-instance padata queues If the pcrypt template is used multiple times in an algorithm, then a deadlock occurs because all pcrypt instances share the same padata_instance, which completes requests in the order submitted. That is, the inner pcrypt request waits for the outer pcrypt request while the outer request is already waiting for the inner. This patch fixes this by allocating a set of queues for each pcrypt instance instead of using two global queues. In order to maintain the existing user-space interface, the pinst structure remains global so any sysfs modifications will apply to every pcrypt instance. Note that when an update occurs we have to allocate memory for every pcrypt instance. Should one of the allocations fail we will abort the update without rolling back changes already made. The new per-instance data structure is called padata_shell and is essentially a wrapper around parallel_data. Reproducer: #include #include #include int main() { struct sockaddr_alg addr = { .salg_type = "aead", .salg_name = "pcrypt(pcrypt(rfc4106-gcm-aesni))" }; int algfd, reqfd; char buf[32] = { 0 }; algfd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(algfd, (void *)&addr, sizeof(addr)); setsockopt(algfd, SOL_ALG, ALG_SET_KEY, buf, 20); reqfd = accept(algfd, 0, 0); write(reqfd, buf, 32); read(reqfd, buf, 16); } Reported-by: syzbot+56c7151cad94eec37c521f0e47d2eee53f9361c4@syzkaller.appspotmail.com Fixes: 5068c7a883d1 ("crypto: pcrypt - Add pcrypt crypto parallelization wrapper") Signed-off-by: Herbert Xu Tested-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/padata.h | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index 23717eeaad23..cccab7a59787 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -9,6 +9,7 @@ #ifndef PADATA_H #define PADATA_H +#include #include #include #include @@ -98,7 +99,7 @@ struct padata_cpumask { * struct parallel_data - Internal control structure, covers everything * that depends on the cpumask in use. * - * @pinst: padata instance. + * @sh: padata_shell object. * @pqueue: percpu padata queues used for parallelization. * @squeue: percpu padata queues used for serialuzation. * @reorder_objects: Number of objects waiting in the reorder queues. @@ -111,7 +112,7 @@ struct padata_cpumask { * @lock: Reorder lock. */ struct parallel_data { - struct padata_instance *pinst; + struct padata_shell *ps; struct padata_parallel_queue __percpu *pqueue; struct padata_serial_queue __percpu *squeue; atomic_t reorder_objects; @@ -124,14 +125,33 @@ struct parallel_data { spinlock_t lock ____cacheline_aligned; }; +/** + * struct padata_shell - Wrapper around struct parallel_data, its + * purpose is to allow the underlying control structure to be replaced + * on the fly using RCU. + * + * @pinst: padat instance. + * @pd: Actual parallel_data structure which may be substituted on the fly. + * @opd: Pointer to old pd to be freed by padata_replace. + * @list: List entry in padata_instance list. + */ +struct padata_shell { + struct padata_instance *pinst; + struct parallel_data __rcu *pd; + struct parallel_data *opd; + struct list_head list; +}; + /** * struct padata_instance - The overall control structure. * * @cpu_notifier: cpu hotplug notifier. * @parallel_wq: The workqueue used for parallel work. * @serial_wq: The workqueue used for serial work. - * @pd: The internal control structure. + * @pslist: List of padata_shell objects attached to this instance. * @cpumask: User supplied cpumasks for parallel and serial works. + * @rcpumask: Actual cpumasks based on user cpumask and cpu_online_mask. + * @omask: Temporary storage used to compute the notification mask. * @cpumask_change_notifier: Notifiers chain for user-defined notify * callbacks that will be called when either @pcpu or @cbcpu * or both cpumasks change. @@ -143,8 +163,10 @@ struct padata_instance { struct hlist_node node; struct workqueue_struct *parallel_wq; struct workqueue_struct *serial_wq; - struct parallel_data *pd; + struct list_head pslist; struct padata_cpumask cpumask; + struct padata_cpumask rcpumask; + cpumask_var_t omask; struct blocking_notifier_head cpumask_change_notifier; struct kobject kobj; struct mutex lock; @@ -156,7 +178,9 @@ struct padata_instance { extern struct padata_instance *padata_alloc_possible(const char *name); extern void padata_free(struct padata_instance *pinst); -extern int padata_do_parallel(struct padata_instance *pinst, +extern struct padata_shell *padata_alloc_shell(struct padata_instance *pinst); +extern void padata_free_shell(struct padata_shell *ps); +extern int padata_do_parallel(struct padata_shell *ps, struct padata_priv *padata, int *cb_cpu); extern void padata_do_serial(struct padata_priv *padata); extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, -- cgit v1.2.3 From c441a909c68618ff64aa70394d0b270b0665a229 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 2 Dec 2019 13:42:29 -0800 Subject: crypto: compress - remove crt_u.compress (struct compress_tfm) crt_u.compress (struct compress_tfm) is pointless because its two fields, ->cot_compress() and ->cot_decompress(), always point to crypto_compress() and crypto_decompress(). Remove this pointless indirection, and just make crypto_comp_compress() and crypto_comp_decompress() be direct calls to what used to be crypto_compress() and crypto_decompress(). Also remove the unused function crypto_comp_cast(). Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/crypto.h | 43 ++++++------------------------------------- 1 file changed, 6 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 23365a9d062e..8f708564b98b 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -606,17 +606,7 @@ struct cipher_tfm { void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); }; -struct compress_tfm { - int (*cot_compress)(struct crypto_tfm *tfm, - const u8 *src, unsigned int slen, - u8 *dst, unsigned int *dlen); - int (*cot_decompress)(struct crypto_tfm *tfm, - const u8 *src, unsigned int slen, - u8 *dst, unsigned int *dlen); -}; - #define crt_cipher crt_u.cipher -#define crt_compress crt_u.compress struct crypto_tfm { @@ -624,7 +614,6 @@ struct crypto_tfm { union { struct cipher_tfm cipher; - struct compress_tfm compress; } crt_u; void (*exit)(struct crypto_tfm *tfm); @@ -928,13 +917,6 @@ static inline struct crypto_comp *__crypto_comp_cast(struct crypto_tfm *tfm) return (struct crypto_comp *)tfm; } -static inline struct crypto_comp *crypto_comp_cast(struct crypto_tfm *tfm) -{ - BUG_ON((crypto_tfm_alg_type(tfm) ^ CRYPTO_ALG_TYPE_COMPRESS) & - CRYPTO_ALG_TYPE_MASK); - return __crypto_comp_cast(tfm); -} - static inline struct crypto_comp *crypto_alloc_comp(const char *alg_name, u32 type, u32 mask) { @@ -969,26 +951,13 @@ static inline const char *crypto_comp_name(struct crypto_comp *tfm) return crypto_tfm_alg_name(crypto_comp_tfm(tfm)); } -static inline struct compress_tfm *crypto_comp_crt(struct crypto_comp *tfm) -{ - return &crypto_comp_tfm(tfm)->crt_compress; -} - -static inline int crypto_comp_compress(struct crypto_comp *tfm, - const u8 *src, unsigned int slen, - u8 *dst, unsigned int *dlen) -{ - return crypto_comp_crt(tfm)->cot_compress(crypto_comp_tfm(tfm), - src, slen, dst, dlen); -} +int crypto_comp_compress(struct crypto_comp *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen); -static inline int crypto_comp_decompress(struct crypto_comp *tfm, - const u8 *src, unsigned int slen, - u8 *dst, unsigned int *dlen) -{ - return crypto_comp_crt(tfm)->cot_decompress(crypto_comp_tfm(tfm), - src, slen, dst, dlen); -} +int crypto_comp_decompress(struct crypto_comp *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen); #endif /* _LINUX_CRYPTO_H */ -- cgit v1.2.3 From e8cfed5e4e2b5929371955f476a52a4c3398ead3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 2 Dec 2019 13:42:30 -0800 Subject: crypto: cipher - remove crt_u.cipher (struct cipher_tfm) Of the three fields in crt_u.cipher (struct cipher_tfm), ->cit_setkey() is pointless because it always points to setkey() in crypto/cipher.c. ->cit_decrypt_one() and ->cit_encrypt_one() are slightly less pointless, since if the algorithm doesn't have an alignmask, they are set directly to ->cia_encrypt() and ->cia_decrypt(). However, this "optimization" isn't worthwhile because: - The "cipher" algorithm type is the only algorithm still using crt_u, so it's bloating every struct crypto_tfm for every algorithm type. - If the algorithm has an alignmask, this "optimization" actually makes things slower, as it causes 2 indirect calls per block rather than 1. - It adds extra code complexity. - Some templates already call ->cia_encrypt()/->cia_decrypt() directly instead of going through ->cit_encrypt_one()/->cit_decrypt_one(). - The "cipher" algorithm type never gives optimal performance anyway. For that, a higher-level type such as skcipher needs to be used. Therefore, just remove the extra indirection, and make crypto_cipher_setkey(), crypto_cipher_encrypt_one(), and crypto_cipher_decrypt_one() be direct calls into crypto/cipher.c. Also remove the unused function crypto_cipher_cast(). Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/crypto.h | 48 ++++++------------------------------------------ 1 file changed, 6 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 8f708564b98b..c23f1eed7970 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -599,23 +599,10 @@ int crypto_has_alg(const char *name, u32 type, u32 mask); * crypto_free_*(), as well as the various helpers below. */ -struct cipher_tfm { - int (*cit_setkey)(struct crypto_tfm *tfm, - const u8 *key, unsigned int keylen); - void (*cit_encrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); - void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); -}; - -#define crt_cipher crt_u.cipher - struct crypto_tfm { u32 crt_flags; - union { - struct cipher_tfm cipher; - } crt_u; - void (*exit)(struct crypto_tfm *tfm); struct crypto_alg *__crt_alg; @@ -752,12 +739,6 @@ static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm) return (struct crypto_cipher *)tfm; } -static inline struct crypto_cipher *crypto_cipher_cast(struct crypto_tfm *tfm) -{ - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); - return __crypto_cipher_cast(tfm); -} - /** * crypto_alloc_cipher() - allocate single block cipher handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the @@ -815,11 +796,6 @@ static inline int crypto_has_cipher(const char *alg_name, u32 type, u32 mask) return crypto_has_alg(alg_name, type, mask); } -static inline struct cipher_tfm *crypto_cipher_crt(struct crypto_cipher *tfm) -{ - return &crypto_cipher_tfm(tfm)->crt_cipher; -} - /** * crypto_cipher_blocksize() - obtain block size for cipher * @tfm: cipher handle @@ -873,12 +849,8 @@ static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm, * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ -static inline int crypto_cipher_setkey(struct crypto_cipher *tfm, - const u8 *key, unsigned int keylen) -{ - return crypto_cipher_crt(tfm)->cit_setkey(crypto_cipher_tfm(tfm), - key, keylen); -} +int crypto_cipher_setkey(struct crypto_cipher *tfm, + const u8 *key, unsigned int keylen); /** * crypto_cipher_encrypt_one() - encrypt one block of plaintext @@ -889,12 +861,8 @@ static inline int crypto_cipher_setkey(struct crypto_cipher *tfm, * Invoke the encryption operation of one block. The caller must ensure that * the plaintext and ciphertext buffers are at least one block in size. */ -static inline void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, - u8 *dst, const u8 *src) -{ - crypto_cipher_crt(tfm)->cit_encrypt_one(crypto_cipher_tfm(tfm), - dst, src); -} +void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, + u8 *dst, const u8 *src); /** * crypto_cipher_decrypt_one() - decrypt one block of ciphertext @@ -905,12 +873,8 @@ static inline void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, * Invoke the decryption operation of one block. The caller must ensure that * the plaintext and ciphertext buffers are at least one block in size. */ -static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, - u8 *dst, const u8 *src) -{ - crypto_cipher_crt(tfm)->cit_decrypt_one(crypto_cipher_tfm(tfm), - dst, src); -} +void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, + u8 *dst, const u8 *src); static inline struct crypto_comp *__crypto_comp_cast(struct crypto_tfm *tfm) { -- cgit v1.2.3 From 894c9ef9780c5cf2f143415e867ee39a33ecb75d Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Tue, 3 Dec 2019 14:31:10 -0500 Subject: padata: validate cpumask without removed CPU during offline Configuring an instance's parallel mask without any online CPUs... echo 2 > /sys/kernel/pcrypt/pencrypt/parallel_cpumask echo 0 > /sys/devices/system/cpu/cpu1/online ...makes tcrypt mode=215 crash like this: divide error: 0000 [#1] SMP PTI CPU: 4 PID: 283 Comm: modprobe Not tainted 5.4.0-rc8-padata-doc-v2+ #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20191013_105130-anatol 04/01/2014 RIP: 0010:padata_do_parallel+0x114/0x300 Call Trace: pcrypt_aead_encrypt+0xc0/0xd0 [pcrypt] crypto_aead_encrypt+0x1f/0x30 do_mult_aead_op+0x4e/0xdf [tcrypt] test_mb_aead_speed.constprop.0.cold+0x226/0x564 [tcrypt] do_test+0x28c2/0x4d49 [tcrypt] tcrypt_mod_init+0x55/0x1000 [tcrypt] ... cpumask_weight() in padata_cpu_hash() returns 0 because the mask has no CPUs. The problem is __padata_remove_cpu() checks for valid masks too early and so doesn't mark the instance PADATA_INVALID as expected, which would have made padata_do_parallel() return error before doing the division. Fix by introducing a second padata CPU hotplug state before CPUHP_BRINGUP_CPU so that __padata_remove_cpu() sees the online mask without @cpu. No need for the second argument to padata_replace() since @cpu is now already missing from the online mask. Fixes: 33e54450683c ("padata: Handle empty padata cpumasks") Signed-off-by: Daniel Jordan Cc: Eric Biggers Cc: Herbert Xu Cc: Sebastian Andrzej Siewior Cc: Steffen Klassert Cc: Thomas Gleixner Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index e51ee772b9f5..def48a583670 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -59,6 +59,7 @@ enum cpuhp_state { CPUHP_IOMMU_INTEL_DEAD, CPUHP_LUSTRE_CFS_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, + CPUHP_PADATA_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From 91a71d612128f84f725022d7b7c5d5a741f6fdc7 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Tue, 3 Dec 2019 14:31:12 -0500 Subject: padata: remove cpumask change notifier Since commit 63d3578892dc ("crypto: pcrypt - remove padata cpumask notifier") this feature is unused, so get rid of it. Signed-off-by: Daniel Jordan Cc: Eric Biggers Cc: Herbert Xu Cc: Jonathan Corbet Cc: Steffen Klassert Cc: linux-crypto@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- include/linux/padata.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index cccab7a59787..178d5cc6b494 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -13,7 +13,6 @@ #include #include #include -#include #include #define PADATA_CPU_SERIAL 0x01 @@ -151,10 +150,6 @@ struct padata_shell { * @pslist: List of padata_shell objects attached to this instance. * @cpumask: User supplied cpumasks for parallel and serial works. * @rcpumask: Actual cpumasks based on user cpumask and cpu_online_mask. - * @omask: Temporary storage used to compute the notification mask. - * @cpumask_change_notifier: Notifiers chain for user-defined notify - * callbacks that will be called when either @pcpu or @cbcpu - * or both cpumasks change. * @kobj: padata instance kernel object. * @lock: padata instance lock. * @flags: padata flags. @@ -166,8 +161,6 @@ struct padata_instance { struct list_head pslist; struct padata_cpumask cpumask; struct padata_cpumask rcpumask; - cpumask_var_t omask; - struct blocking_notifier_head cpumask_change_notifier; struct kobject kobj; struct mutex lock; u8 flags; @@ -187,8 +180,4 @@ extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, cpumask_var_t cpumask); extern int padata_start(struct padata_instance *pinst); extern void padata_stop(struct padata_instance *pinst); -extern int padata_register_cpumask_notifier(struct padata_instance *pinst, - struct notifier_block *nblock); -extern int padata_unregister_cpumask_notifier(struct padata_instance *pinst, - struct notifier_block *nblock); #endif -- cgit v1.2.3 From 3facced7aeed131c1002b724e488d68ebe59c56f Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Tue, 3 Dec 2019 14:31:13 -0500 Subject: padata: remove reorder_objects reorder_objects is unused since the rework of padata's flushing, so remove it. Signed-off-by: Daniel Jordan Cc: Eric Biggers Cc: Herbert Xu Cc: Steffen Klassert Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- include/linux/padata.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index 178d5cc6b494..faa2e36832f8 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -101,7 +101,6 @@ struct padata_cpumask { * @sh: padata_shell object. * @pqueue: percpu padata queues used for parallelization. * @squeue: percpu padata queues used for serialuzation. - * @reorder_objects: Number of objects waiting in the reorder queues. * @refcnt: Number of objects holding a reference on this parallel_data. * @max_seq_nr: Maximal used sequence number. * @processed: Number of already processed objects. @@ -114,7 +113,6 @@ struct parallel_data { struct padata_shell *ps; struct padata_parallel_queue __percpu *pqueue; struct padata_serial_queue __percpu *squeue; - atomic_t reorder_objects; atomic_t refcnt; atomic_t seq_nr; unsigned int processed; -- cgit v1.2.3 From bfcdcef8c8e3469f4d6c082a1da27a6ef77e5715 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Tue, 3 Dec 2019 14:31:14 -0500 Subject: padata: update documentation Remove references to unused functions, standardize language, update to reflect new functionality, migrate to rst format, and fix all kernel-doc warnings. Fixes: 815613da6a67 ("kernel/padata.c: removed unused code") Signed-off-by: Daniel Jordan Cc: Eric Biggers Cc: Herbert Xu Cc: Jonathan Corbet Cc: Steffen Klassert Cc: linux-crypto@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Daniel Jordan Signed-off-by: Herbert Xu --- include/linux/padata.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index faa2e36832f8..a0d8b41850b2 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -19,7 +19,7 @@ #define PADATA_CPU_PARALLEL 0x02 /** - * struct padata_priv - Embedded to the users data structure. + * struct padata_priv - Represents one job * * @list: List entry, to attach to the padata lists. * @pd: Pointer to the internal control structure. @@ -42,7 +42,7 @@ struct padata_priv { }; /** - * struct padata_list + * struct padata_list - one per work type per CPU * * @list: List head. * @lock: List lock. @@ -70,9 +70,6 @@ struct padata_serial_queue { * * @parallel: List to wait for parallelization. * @reorder: List to wait for reordering after parallel processing. - * @serial: List to wait for serialization after reordering. - * @pwork: work struct for parallelization. - * @swork: work struct for serialization. * @work: work struct for parallelization. * @num_obj: Number of objects that are processed by this cpu. */ @@ -98,11 +95,11 @@ struct padata_cpumask { * struct parallel_data - Internal control structure, covers everything * that depends on the cpumask in use. * - * @sh: padata_shell object. + * @ps: padata_shell object. * @pqueue: percpu padata queues used for parallelization. * @squeue: percpu padata queues used for serialuzation. * @refcnt: Number of objects holding a reference on this parallel_data. - * @max_seq_nr: Maximal used sequence number. + * @seq_nr: Sequence number of the parallelized data object. * @processed: Number of already processed objects. * @cpu: Next CPU to be processed. * @cpumask: The cpumasks in use for parallel and serial workers. @@ -119,7 +116,7 @@ struct parallel_data { int cpu; struct padata_cpumask cpumask; struct work_struct reorder_work; - spinlock_t lock ____cacheline_aligned; + spinlock_t ____cacheline_aligned lock; }; /** @@ -142,7 +139,7 @@ struct padata_shell { /** * struct padata_instance - The overall control structure. * - * @cpu_notifier: cpu hotplug notifier. + * @node: Used by CPU hotplug. * @parallel_wq: The workqueue used for parallel work. * @serial_wq: The workqueue used for serial work. * @pslist: List of padata_shell objects attached to this instance. -- cgit v1.2.3 From a4516c7053b96fed98a0439a9226983b5275474b Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Dec 2019 10:55:59 +0000 Subject: net: sfp: derive interface mode from ethtool link modes We don't need the EEPROM ID to derive the phy interface mode as we can derive it merely from the ethtool link modes. Remove the EEPROM ID argument to sfp_select_interface(). Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/sfp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 487fd9412d10..8d7b98c214d7 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -504,7 +504,6 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); phy_interface_t sfp_select_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id, unsigned long *link_modes); int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo); @@ -532,7 +531,6 @@ static inline void sfp_parse_support(struct sfp_bus *bus, } static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id, unsigned long *link_modes) { return PHY_INTERFACE_MODE_NA; -- cgit v1.2.3 From 0fbd26a9fb6875b98fcfff523831fec47bc5e9a2 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Dec 2019 10:56:04 +0000 Subject: net: sfp: add more extended compliance codes SFF-8024 is used to define various constants re-used in several SFF SFP-related specifications. Split these constants from the enum, and rename them to indicate that they're defined by SFF-8024. Add and use updated SFF-8024 extended compliance code definitions for 10GBASE-T, 5GBASE-T and 2.5GBASE-T modules. Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/sfp.h | 82 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 8d7b98c214d7..373d8b67ea86 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -275,6 +275,61 @@ struct sfp_diag { __be16 cal_v_offset; } __packed; +/* SFF8024 defined constants */ +enum { + SFF8024_ID_UNK = 0x00, + SFF8024_ID_SFF_8472 = 0x02, + SFF8024_ID_SFP = 0x03, + SFF8024_ID_DWDM_SFP = 0x0b, + SFF8024_ID_QSFP_8438 = 0x0c, + SFF8024_ID_QSFP_8436_8636 = 0x0d, + SFF8024_ID_QSFP28_8636 = 0x11, + + SFF8024_ENCODING_UNSPEC = 0x00, + SFF8024_ENCODING_8B10B = 0x01, + SFF8024_ENCODING_4B5B = 0x02, + SFF8024_ENCODING_NRZ = 0x03, + SFF8024_ENCODING_8472_MANCHESTER= 0x04, + SFF8024_ENCODING_8472_SONET = 0x05, + SFF8024_ENCODING_8472_64B66B = 0x06, + SFF8024_ENCODING_8436_MANCHESTER= 0x06, + SFF8024_ENCODING_8436_SONET = 0x04, + SFF8024_ENCODING_8436_64B66B = 0x05, + SFF8024_ENCODING_256B257B = 0x07, + SFF8024_ENCODING_PAM4 = 0x08, + + SFF8024_CONNECTOR_UNSPEC = 0x00, + /* codes 01-05 not supportable on SFP, but some modules have single SC */ + SFF8024_CONNECTOR_SC = 0x01, + SFF8024_CONNECTOR_FIBERJACK = 0x06, + SFF8024_CONNECTOR_LC = 0x07, + SFF8024_CONNECTOR_MT_RJ = 0x08, + SFF8024_CONNECTOR_MU = 0x09, + SFF8024_CONNECTOR_SG = 0x0a, + SFF8024_CONNECTOR_OPTICAL_PIGTAIL= 0x0b, + SFF8024_CONNECTOR_MPO_1X12 = 0x0c, + SFF8024_CONNECTOR_MPO_2X16 = 0x0d, + SFF8024_CONNECTOR_HSSDC_II = 0x20, + SFF8024_CONNECTOR_COPPER_PIGTAIL= 0x21, + SFF8024_CONNECTOR_RJ45 = 0x22, + SFF8024_CONNECTOR_NOSEPARATE = 0x23, + SFF8024_CONNECTOR_MXC_2X16 = 0x24, + + SFF8024_ECC_UNSPEC = 0x00, + SFF8024_ECC_100G_25GAUI_C2M_AOC = 0x01, + SFF8024_ECC_100GBASE_SR4_25GBASE_SR = 0x02, + SFF8024_ECC_100GBASE_LR4_25GBASE_LR = 0x03, + SFF8024_ECC_100GBASE_ER4_25GBASE_ER = 0x04, + SFF8024_ECC_100GBASE_SR10 = 0x05, + SFF8024_ECC_100GBASE_CR4 = 0x0b, + SFF8024_ECC_25GBASE_CR_S = 0x0c, + SFF8024_ECC_25GBASE_CR_N = 0x0d, + SFF8024_ECC_10GBASE_T_SFI = 0x16, + SFF8024_ECC_10GBASE_T_SR = 0x1c, + SFF8024_ECC_5GBASE_T = 0x1d, + SFF8024_ECC_2_5GBASE_T = 0x1e, +}; + /* SFP EEPROM registers */ enum { SFP_PHYS_ID = 0x00, @@ -309,34 +364,7 @@ enum { SFP_SFF8472_COMPLIANCE = 0x5e, SFP_CC_EXT = 0x5f, - SFP_PHYS_ID_SFF = 0x02, - SFP_PHYS_ID_SFP = 0x03, SFP_PHYS_EXT_ID_SFP = 0x04, - SFP_CONNECTOR_UNSPEC = 0x00, - /* codes 01-05 not supportable on SFP, but some modules have single SC */ - SFP_CONNECTOR_SC = 0x01, - SFP_CONNECTOR_FIBERJACK = 0x06, - SFP_CONNECTOR_LC = 0x07, - SFP_CONNECTOR_MT_RJ = 0x08, - SFP_CONNECTOR_MU = 0x09, - SFP_CONNECTOR_SG = 0x0a, - SFP_CONNECTOR_OPTICAL_PIGTAIL = 0x0b, - SFP_CONNECTOR_MPO_1X12 = 0x0c, - SFP_CONNECTOR_MPO_2X16 = 0x0d, - SFP_CONNECTOR_HSSDC_II = 0x20, - SFP_CONNECTOR_COPPER_PIGTAIL = 0x21, - SFP_CONNECTOR_RJ45 = 0x22, - SFP_CONNECTOR_NOSEPARATE = 0x23, - SFP_CONNECTOR_MXC_2X16 = 0x24, - SFP_ENCODING_UNSPEC = 0x00, - SFP_ENCODING_8B10B = 0x01, - SFP_ENCODING_4B5B = 0x02, - SFP_ENCODING_NRZ = 0x03, - SFP_ENCODING_8472_MANCHESTER = 0x04, - SFP_ENCODING_8472_SONET = 0x05, - SFP_ENCODING_8472_64B66B = 0x06, - SFP_ENCODING_256B257B = 0x07, - SFP_ENCODING_PAM4 = 0x08, SFP_OPTIONS_HIGH_POWER_LEVEL = BIT(13), SFP_OPTIONS_PAGING_A2 = BIT(12), SFP_OPTIONS_RETIMER = BIT(11), -- cgit v1.2.3 From 74c551ca5a0edcc9cf66a3b73fd95b9a8615bfd0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Dec 2019 10:56:09 +0000 Subject: net: sfp: add module start/stop upstream notifications When dealing with some copper modules, we can't positively know the module capabilities are until we have probed the PHY. Without the full capabilities, we may end up failing a module that we could otherwise drive with a restricted set of capabilities. An example of this would be a module with a NBASE-T PHY plugged into a host that supports phy interface modes 2500BASE-X and SGMII. The PHY supports 10GBASE-R, 5000BASE-X, 2500BASE-X, SGMII interface modes, which means a subset of the capabilities are compatible with the host. However, reading the module EEPROM leads us to believe that the module only supports ethtool link mode 10GBASE-T, which is incompatible with the host - and thus results in the module being rejected. This patch adds an extra notification which are triggered after the SFP module's PHY probe, and a corresponding notification just before the PHY is removed. Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/sfp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 373d8b67ea86..66a56396e8e3 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -507,6 +507,8 @@ struct sfp_bus; * @module_insert: called after a module has been detected to determine * whether the module is supported for the upstream device. * @module_remove: called after the module has been removed. + * @module_start: called after the PHY probe step + * @module_stop: called before the PHY is removed * @link_down: called when the link is non-operational for whatever * reason. * @link_up: called when the link is operational. @@ -520,6 +522,8 @@ struct sfp_upstream_ops { void (*detach)(void *priv, struct sfp_bus *bus); int (*module_insert)(void *priv, const struct sfp_eeprom_id *id); void (*module_remove)(void *priv); + int (*module_start)(void *priv); + void (*module_stop)(void *priv); void (*link_down)(void *priv); void (*link_up)(void *priv); int (*connect_phy)(void *priv, struct phy_device *); -- cgit v1.2.3 From 52c956003a9d5bcae1f445f9dfd42b624adb6e87 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Dec 2019 10:56:45 +0000 Subject: net: phylink: delay MAC configuration for copper SFP modules Knowing whether we need to delay the MAC configuration because a module may have a PHY is useful to phylink to allow NBASE-T modules to work on systems supporting no more than 2.5G speeds. This commit allows us to delay such configuration until after the PHY has been probed by recording the parsed capabilities, and if the module may have a PHY, doing no more until the module_start() notification is called. At that point, we either have a PHY, or we don't. We move the PHY-based setup a little later, and use the PHYs support capabilities rather than the EEPROM parsed capabilities to determine whether we can support the PHY. Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/sfp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 66a56396e8e3..38893e4dd0f0 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -533,6 +533,7 @@ struct sfp_upstream_ops { #if IS_ENABLED(CONFIG_SFP) int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); +bool sfp_may_have_phy(struct sfp_bus *bus, const struct sfp_eeprom_id *id); void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); phy_interface_t sfp_select_interface(struct sfp_bus *bus, @@ -556,6 +557,12 @@ static inline int sfp_parse_port(struct sfp_bus *bus, return PORT_OTHER; } +static inline bool sfp_may_have_phy(struct sfp_bus *bus, + const struct sfp_eeprom_id *id) +{ + return false; +} + static inline void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support) -- cgit v1.2.3 From 528be501b7d4a64e04672a38ebfc9e19c555e770 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 11 Dec 2019 19:44:57 -0600 Subject: soundwire: sdw_slave: add probe_complete structure and new fields When a Slave device becomes synchronized with the bus, it may report its presence in PING frames, as well as optionally asserting an in-band PREQ signal. The bus driver will detect a new Device0, start the enumeration process and assign it a non-zero device number. The SoundWire enumeration provides an arbitration to deal with multiple Slaves reporting ATTACHED at the same time. The bus driver will also invoke the driver .probe() callback associated with this device. The probe() depends on the Linux device core, which handles the match operations and may result in modules being loaded. Once the non-zero device number is programmed, the Slave will report its new status in PING frames and the Master hardware will typically report this status change with an interrupt. At this point, the .update_status() callback of the codec driver will be invoked (usually from an interrupt thread or workqueue scheduled from the interrupt thread). The first race condition which can happen is between the .probe(), which allocates the resources, and .update_status() where initializations are typically handled. The .probe() is only called once during the initial boot, while .update_status() will be called for every bus hardware reset and if the Slave device loses synchronization (an unlikely event but with non-zero probability). The time difference between the end of the enumeration process and a change of status reported by the hardware may be as small as one SoundWire PING frame. The scheduling of the interrupt thread, which invokes .update_status() is not deterministic, but can be small enough to create a race condition. With a 48 kHz frame rate and ideal scheduling cases, the .probe() may be pre-empted within double-digit microseconds. Since there is no guarantee that the .probe() completes by the time .update_status() is invoked as a result of an interrupt, it's not unusual for the .update_status() to rely on data structures that have not been allocated yet, leading to kernel oopses. This patch adds a probe_complete utility, which is used in the sdw_update_slave_status() routine. The codec driver does not need to do anything and can safely assume all resources are allocated in its update_status() callback. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191212014507.28050-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 28745b9ba279..cb1db4a7475d 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -547,6 +547,10 @@ struct sdw_slave_ops { * @node: node for bus list * @port_ready: Port ready completion flag for each Slave port * @dev_num: Device Number assigned by Bus + * @probed: boolean tracking driver state + * @probe_complete: completion utility to control potential races + * on startup between driver probe/initialization and SoundWire + * Slave state changes/implementation-defined interrupts */ struct sdw_slave { struct sdw_slave_id id; @@ -561,6 +565,8 @@ struct sdw_slave { struct list_head node; struct completion *port_ready; u16 dev_num; + bool probed; + struct completion probe_complete; }; #define dev_to_sdw_dev(_dev) container_of(_dev, struct sdw_slave, dev) -- cgit v1.2.3 From fbbff36325079fd9d2fcd30063c84f4b38a0ad9b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 11 Dec 2019 19:44:58 -0600 Subject: soundwire: sdw_slave: add enumeration_complete structure When the Master starts the bus (be it during the initial boot or system resume), it usually performs a HardReset to make sure electrical levels are correct, then enables the control channel. While the PM framework guarantees that the Slave devices will only become 'active' once the Master completes the bus initialization, there is still a risk of a race condition: the Slave enumeration is handled in a separate interrupt thread triggered by hardware status changes, so the Slave device may not be ready to accept commands when the Slave driver tries to access the registers and restore settings in its resume or pm_runtime_resume callbacks. In those cases, any read/write commands from/to the Slave device will result in a timeout. This patch adds an enumeration_complete structure. When the bus is goes through a HardReset sequence and restarted, the Slave will be marked as UNATTACHED, which will result in a call to init_completion(). When the Slave reports its presence during PING frames as a non-zero Device, the Master hardware will issue an interrupt and the bus driver will invoke complete(). The order between init_completion()/complete() is predictable since this is a Master-initiated transition. The Slave driver may use wait_for_completion() in its resume callback. When regmap is used, the Slave driver will typically set its regmap in cache-only mode on suspend, then on resume block on wait_for_completion(&enumeration_complete) to guarantee it is safe to start read/write transactions. It may then exit the cache-only mode and use a regmap_sync to restore settings. All these steps are optional, their use completely depends on the Slave device capabilities and how the Slave driver is implemented. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191212014507.28050-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index cb1db4a7475d..3fa8d875b16b 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -551,6 +551,9 @@ struct sdw_slave_ops { * @probe_complete: completion utility to control potential races * on startup between driver probe/initialization and SoundWire * Slave state changes/implementation-defined interrupts + * @enumeration_complete: completion utility to control potential races + * on startup between device enumeration and read/write access to the + * Slave device */ struct sdw_slave { struct sdw_slave_id id; @@ -567,6 +570,7 @@ struct sdw_slave { u16 dev_num; bool probed; struct completion probe_complete; + struct completion enumeration_complete; }; #define dev_to_sdw_dev(_dev) container_of(_dev, struct sdw_slave, dev) -- cgit v1.2.3 From 7afc50e441af0afc8055920a64cff70b648e4b44 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 11 Dec 2019 19:44:59 -0600 Subject: soundwire: sdw_slave: add initialization_complete definition Slave drivers may have different ways of handling their settings, with or without regmap. During the integration of codec drivers, done in partnership between Intel and Realtek, it became desirable to implement a predictable order between low-level initializations performed in .update_status() (invoked by an interrupt thread) and the settings restored in the resume steps (invoked by the PM core). This patch builds on the previous solution to wait for the Slave device to be fully enumerated. The complete() in this case is signaled not before the .update_status() is called, but after .update_status() returns. Without this patch, the settings were not properly restored, leading to timing-dependent 'no sound after resume' or 'no headset detected after resume' bug reports. Depending on how initialization is handled, a Slave device driver may wait for enumeration_complete, or for initialization_complete, both are valid synchronization points. They are initialized at the same time, they only differ on when complete() is invoked. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191212014507.28050-4-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 3fa8d875b16b..ed42cd79eab7 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -554,6 +554,8 @@ struct sdw_slave_ops { * @enumeration_complete: completion utility to control potential races * on startup between device enumeration and read/write access to the * Slave device + * @initialization_complete: completion utility to control potential races + * on startup between device enumeration and settings being restored */ struct sdw_slave { struct sdw_slave_id id; @@ -571,6 +573,7 @@ struct sdw_slave { bool probed; struct completion probe_complete; struct completion enumeration_complete; + struct completion initialization_complete; }; #define dev_to_sdw_dev(_dev) container_of(_dev, struct sdw_slave, dev) -- cgit v1.2.3 From b2bd75f806c49929d7ab5a860c0a69b0a17c59d2 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 11 Dec 2019 19:45:00 -0600 Subject: soundwire: sdw_slave: track unattach_request to handle all init sequences The Slave device initialization can be split in 4 different cases: 1. Master-initiated hardware reset, system suspend-resume and pm_runtime based on clock-stop mode1. To avoid timeouts and a bad audio experience, the Slave device resume operations need to wait for the Slave device to be re-enumerated and its settings restored. 2. Exit from clock-stop mode0. In this case, the Slave device is required to remain enumerated and its context preserved while the clock is stopped, so no re-initialization or wait_for_completion() is necessary. 3. Slave-initiated pm_runtime D3 transition. With the parent child relationship, it is possible that a Slave device becomes 'suspended' while its parent is still 'active' with the bus clock still toggling. In this case, during the pm_runtime resume operation, there is no need to wait for any settings to be restored. 4. Slave reset (sync loss or implementation-defined). In that case the bus remains operational and the Slave device will be re-initialized when it becomes ATTACHED again. In previous patches, we suggested the use of wait_for_completion() to deal with the case #1, but case #2 and #3 do not need any wait. To account for those differences, this patch adds an unattach_request field. The field is explicitly set by the Master for the case #1, and if non-zero the Slave device shall wait on resume. In all other cases, the Slave resume operations can proceed without wait. The only request tracked so far is Master HardReset, but the request is declared as a bit mask for future extensions (if needed). The definition for this value is added in bus.h and does not need to be exposed in sdw.h Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191212014507.28050-5-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index ed42cd79eab7..b7c9eca4332a 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -556,6 +556,11 @@ struct sdw_slave_ops { * Slave device * @initialization_complete: completion utility to control potential races * on startup between device enumeration and settings being restored + * @unattach_request: mask field to keep track why the Slave re-attached and + * was re-initialized. This is useful to deal with potential race conditions + * between the Master suspending and the codec resuming, and make sure that + * when the Master triggered a reset the Slave is properly enumerated and + * initialized */ struct sdw_slave { struct sdw_slave_id id; @@ -574,6 +579,7 @@ struct sdw_slave { struct completion probe_complete; struct completion enumeration_complete; struct completion initialization_complete; + u32 unattach_request; }; #define dev_to_sdw_dev(_dev) container_of(_dev, struct sdw_slave, dev) -- cgit v1.2.3 From f98f690fb03c2a8d21dfa31aa1042480cf6f7f9b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 11 Dec 2019 19:45:01 -0600 Subject: soundwire: intel: update interfaces between ASoC and SoundWire The current interfaces between ASoC and SoundWire are limited by the platform_device infrastructure to an init() and exit() (mapped to the platform driver.probe and .remove) To help with the platform detection, machine driver selection and management of power dependencies between DSP and SoundWire IP, the ASoC side requires: a) an ACPI scan helper, to report if any devices are exposed in the DSDT tables, and if any links are disabled by the BIOS. b) a probe helper that allocates the resources without actually starting the bus. c) a startup helper which does start the bus when all power dependencies are settled. d) an exit helper to free all resources e) an interrupt_enable/disable helper, typically invoked after the startup helper but also used in suspend routines. This patch moves all required interfaces to sdw_intel.h, mainly to allow SoundWire and ASoC parts to be merged separately once the header files are shared between trees. To avoid compilation issues, the conflicts in intel_init.c are blindly removed. This would in theory prevent the code from working, but since there are no users of the Intel Soundwire driver this has no impact. Functionality will be restored when the removal of platform devices is complete. Support for SoundWire + SOF builds will only be provided once all the required pieces are upstream. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191212014507.28050-6-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw_intel.h | 77 ++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index c9427cb6020b..034eca8df748 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -16,24 +16,91 @@ struct sdw_intel_ops { }; /** - * struct sdw_intel_res - Soundwire Intel resource structure + * struct sdw_intel_acpi_info - Soundwire Intel information found in ACPI tables + * @handle: ACPI controller handle + * @count: link count found with "sdw-master-count" property + * @link_mask: bit-wise mask listing links enabled by BIOS menu + * + * this structure could be expanded to e.g. provide all the _ADR + * information in case the link_mask is not sufficient to identify + * platform capabilities. + */ +struct sdw_intel_acpi_info { + acpi_handle handle; + int count; + u32 link_mask; +}; + +struct sdw_intel_link_res; + +/** + * struct sdw_intel_ctx - context allocated by the controller + * driver probe + * @count: link count + * @mmio_base: mmio base of SoundWire registers, only used to check + * hardware capabilities after all power dependencies are settled. + * @link_mask: bit-wise mask listing SoundWire links reported by the + * Controller + * @handle: ACPI parent handle + * @links: information for each link (controller-specific and kept + * opaque here) + */ +struct sdw_intel_ctx { + int count; + void __iomem *mmio_base; + u32 link_mask; + acpi_handle handle; + struct sdw_intel_link_res *links; +}; + +/** + * struct sdw_intel_res - Soundwire Intel global resource structure, + * typically populated by the DSP driver + * + * @count: link count * @mmio_base: mmio base of SoundWire registers * @irq: interrupt number * @handle: ACPI parent handle * @parent: parent device * @ops: callback ops - * @arg: callback arg + * @dev: device implementing hwparams and free callbacks + * @link_mask: bit-wise mask listing links selected by the DSP driver + * This mask may be a subset of the one reported by the controller since + * machine-specific quirks are handled in the DSP driver. */ struct sdw_intel_res { + int count; void __iomem *mmio_base; int irq; acpi_handle handle; struct device *parent; const struct sdw_intel_ops *ops; - void *arg; + struct device *dev; + u32 link_mask; }; -void *sdw_intel_init(acpi_handle *parent_handle, struct sdw_intel_res *res); -void sdw_intel_exit(void *arg); +/* + * On Intel platforms, the SoundWire IP has dependencies on power + * rails shared with the DSP, and the initialization steps are split + * in three. First an ACPI scan to check what the firmware describes + * in DSDT tables, then an allocation step (with no hardware + * configuration but with all the relevant devices created) and last + * the actual hardware configuration. The final stage is a global + * interrupt enable which is controlled by the DSP driver. Splitting + * these phases helps simplify the boot flow and make early decisions + * on e.g. which machine driver to select (I2S mode, HDaudio or + * SoundWire). + */ +int sdw_intel_acpi_scan(acpi_handle *parent_handle, + struct sdw_intel_acpi_info *info); + +struct sdw_intel_ctx * +sdw_intel_probe(struct sdw_intel_res *res); + +int sdw_intel_startup(struct sdw_intel_ctx *ctx); + +void sdw_intel_exit(struct sdw_intel_ctx *ctx); + +void sdw_intel_enable_irq(void __iomem *mmio_base, bool enable); #endif -- cgit v1.2.3 From 4b206d34b92224496c42226c4b6d92719056c8b6 Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Wed, 11 Dec 2019 19:45:02 -0600 Subject: soundwire: intel: update stream callbacks for hwparams/free stream operations The SoundWire DAIs for Intel platform are created in drivers/soundwire/intel.c, while the communication with the Intel DSP is all controlled in soc/sof/intel When the DAI status changes, a callback is used to bridge the gap between the two subsystems. The naming of the existing 'config_stream' callback does not map well with any of ALSA/ASoC concepts. This patch renames it as 'params_stream' to be more self-explanatory. A new 'free_stream' callback is added in case any resources allocated in the 'params_stream' stage need to be released. In the SOF implementation, this is used in the hw_free case to release the DMA channels over IPC. These two callbacks now rely on structures which expose the link_id and alh_stream_id (required by the firmware IPC), instead of a list of parameters. The 'void *' definitions are changed to use explicit types, as suggested on alsa-devel during earlier reviews. Signed-off-by: Rander Wang Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191212014507.28050-7-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw_intel.h | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 034eca8df748..3ccb38d48eef 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -4,15 +4,39 @@ #ifndef __SDW_INTEL_H #define __SDW_INTEL_H +/** + * struct sdw_intel_stream_params_data: configuration passed during + * the @params_stream callback, e.g. for interaction with DSP + * firmware. + */ +struct sdw_intel_stream_params_data { + struct snd_pcm_substream *substream; + struct snd_soc_dai *dai; + struct snd_pcm_hw_params *hw_params; + int link_id; + int alh_stream_id; +}; + +/** + * struct sdw_intel_stream_free_data: configuration passed during + * the @free_stream callback, e.g. for interaction with DSP + * firmware. + */ +struct sdw_intel_stream_free_data { + struct snd_pcm_substream *substream; + struct snd_soc_dai *dai; + int link_id; +}; + /** * struct sdw_intel_ops: Intel audio driver callback ops * - * @config_stream: configure the stream with the hw_params - * the first argument containing the context is mandatory */ struct sdw_intel_ops { - int (*config_stream)(void *arg, void *substream, - void *dai, void *hw_params, int stream_num); + int (*params_stream)(struct device *dev, + struct sdw_intel_stream_params_data *params_data); + int (*free_stream)(struct device *dev, + struct sdw_intel_stream_free_data *free_data); }; /** -- cgit v1.2.3 From 6cd1d670bee641d5d10b11d58c7c99ac1ddf8068 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 11 Dec 2019 19:45:03 -0600 Subject: soundwire: intel: update headers for interrupts The existing use of 6 handlers is problematic in MSI mode. Update headers so that all shared interrupts can be handled with a single handler. Signed-off-by: Bard Liao Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191212014507.28050-8-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw_intel.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 3ccb38d48eef..2ce3e9ecc4b6 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -4,6 +4,8 @@ #ifndef __SDW_INTEL_H #define __SDW_INTEL_H +#include + /** * struct sdw_intel_stream_params_data: configuration passed during * the @params_stream callback, e.g. for interaction with DSP @@ -127,4 +129,6 @@ void sdw_intel_exit(struct sdw_intel_ctx *ctx); void sdw_intel_enable_irq(void __iomem *mmio_base, bool enable); +irqreturn_t sdw_intel_thread(int irq, void *dev_id); + #endif -- cgit v1.2.3 From eae0b60d64834c75a460d96b1d1e0e187381e341 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 11 Dec 2019 19:45:04 -0600 Subject: soundwire: intel: add link_list to handle interrupts with a single thread In MSI mode, the use of separate handlers and threads for the Intel IPC, stream and SoundWire shared interrupt leads to timeouts and lost interrupts. The solution is to merge all interrupt handling across all links with a single thread function. The use of a linked list enables this thread function to walk through all contexts and figure out which link needs attention. Signed-off-by: Bard Liao Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191212014507.28050-9-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw_intel.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 2ce3e9ecc4b6..2a56180bc9dc 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -70,6 +70,7 @@ struct sdw_intel_link_res; * @handle: ACPI parent handle * @links: information for each link (controller-specific and kept * opaque here) + * @link_list: list to handle interrupts across all links */ struct sdw_intel_ctx { int count; @@ -77,6 +78,7 @@ struct sdw_intel_ctx { u32 link_mask; acpi_handle handle; struct sdw_intel_link_res *links; + struct list_head link_list; }; /** -- cgit v1.2.3 From 905b5a81afe15e8252e5892b8ca1ff1c1adfb79d Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Wed, 11 Dec 2019 19:45:05 -0600 Subject: soundwire: intel: add prototype for WAKEEN interrupt processing In ClockStop mode, the PCI device will be notified of a wake, which will be handled from an interrupt thread. Signed-off-by: Rander Wang Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191212014507.28050-10-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw_intel.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 2a56180bc9dc..073121c49695 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -122,6 +122,8 @@ struct sdw_intel_res { int sdw_intel_acpi_scan(acpi_handle *parent_handle, struct sdw_intel_acpi_info *info); +void sdw_intel_process_wakeen_event(struct sdw_intel_ctx *ctx); + struct sdw_intel_ctx * sdw_intel_probe(struct sdw_intel_res *res); -- cgit v1.2.3 From 4da0680f24c9af2de8406ded68c4ef967f448de3 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 11 Dec 2019 19:45:06 -0600 Subject: soundwire: intel: add mutex for shared SHIM register access Some of the Intel SoundWire SHIM registers contain fields for different links. Without protection, the master drivers for the different links will access these shared registers, leading to invalid configurations and timeouts (specifically when changing CPA/SPA power-related registers and polling for the changes to be applied). A mutex is added to make sure all rmw access to those registers are serialized. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191212014507.28050-11-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw_intel.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 073121c49695..45fa6d93197f 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -71,6 +71,7 @@ struct sdw_intel_link_res; * @links: information for each link (controller-specific and kept * opaque here) * @link_list: list to handle interrupts across all links + * @shim_lock: mutex to handle concurrent rmw access to shared SHIM registers. */ struct sdw_intel_ctx { int count; @@ -79,6 +80,7 @@ struct sdw_intel_ctx { acpi_handle handle; struct sdw_intel_link_res *links; struct list_head link_list; + struct mutex shim_lock; /* lock for access to shared SHIM registers */ }; /** -- cgit v1.2.3 From 09f6a72d014386939d21899921dd379006471a4b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 11 Dec 2019 19:45:07 -0600 Subject: soundwire: intel: add clock stop quirks Due to power rail dependencies, the SoundWire Master driver cannot make decisions on its own when entering pm runtime suspend. Add quirk mask for each link, so that the SOF parent driver can inform the SoundWire master driver of the desired behavior: a) leave clock on b) power-off instead of clock stop c) power-off if all devices cannot generate wakes d) force bus reset on clock restart Note that for now the interface with the SOF driver relies on a single mask for all links. If needed, the interface might be modified at a later point to provide more freedom. The code at the lower level does not assume any commonality between links. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191212014507.28050-12-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw_intel.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 45fa6d93197f..93b83bdf8035 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -59,6 +59,40 @@ struct sdw_intel_acpi_info { struct sdw_intel_link_res; +/* Intel clock-stop/pm_runtime quirk definitions */ + +/* + * Force the clock to remain on during pm_runtime suspend. This might + * be needed if Slave devices do not have an alternate clock source or + * if the latency requirements are very strict. + */ +#define SDW_INTEL_CLK_STOP_NOT_ALLOWED BIT(0) + +/* + * Stop the bus during pm_runtime suspend. If set, a complete bus + * reset and re-enumeration will be performed when the bus + * restarts. This mode shall not be used if Slave devices can generate + * in-band wakes. + */ +#define SDW_INTEL_CLK_STOP_TEARDOWN BIT(1) + +/* + * Stop the bus during pm_suspend if Slaves are not wake capable + * (e.g. speaker amplifiers). The clock-stop mode is typically + * slightly higher power than when the IP is completely powered-off. + */ +#define SDW_INTEL_CLK_STOP_WAKE_CAPABLE_ONLY BIT(2) + +/* + * Require a bus reset (and complete re-enumeration) when exiting + * clock stop modes. This may be needed if the controller power was + * turned off and all context lost. This quirk shall not be used if a + * Slave device needs to remain enumerated and keep its context, + * e.g. to provide the reasons for the wake, report acoustic events or + * pass a history buffer. + */ +#define SDW_INTEL_CLK_STOP_BUS_RESET BIT(3) + /** * struct sdw_intel_ctx - context allocated by the controller * driver probe @@ -97,6 +131,8 @@ struct sdw_intel_ctx { * @link_mask: bit-wise mask listing links selected by the DSP driver * This mask may be a subset of the one reported by the controller since * machine-specific quirks are handled in the DSP driver. + * @clock_stop_quirks: mask array of possible behaviors requested by the + * DSP driver. The quirks are common for all links for now. */ struct sdw_intel_res { int count; @@ -107,6 +143,7 @@ struct sdw_intel_res { const struct sdw_intel_ops *ops; struct device *dev; u32 link_mask; + u32 clock_stop_quirks; }; /* -- cgit v1.2.3 From cb1aa3823c9280f2bb8218cdb5cb05721e0376b1 Mon Sep 17 00:00:00 2001 From: Lakshmi Ramasubramanian Date: Wed, 11 Dec 2019 08:47:05 -0800 Subject: KEYS: Call the IMA hook to measure keys Call the IMA hook from key_create_or_update() function to measure the payload when a new key is created or an existing key is updated. This patch adds the call to the IMA hook from key_create_or_update() function to measure the key on key create or update. Signed-off-by: Lakshmi Ramasubramanian Cc: David Howells Cc: Jarkko Sakkinen Signed-off-by: Mimi Zohar --- include/linux/ima.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 6d904754d858..3b89136bc218 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -101,6 +101,20 @@ static inline void ima_add_kexec_buffer(struct kimage *image) {} #endif +#if defined(CONFIG_IMA) && defined(CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE) +extern void ima_post_key_create_or_update(struct key *keyring, + struct key *key, + const void *payload, size_t plen, + unsigned long flags, bool create); +#else +static inline void ima_post_key_create_or_update(struct key *keyring, + struct key *key, + const void *payload, + size_t plen, + unsigned long flags, + bool create) {} +#endif /* CONFIG_IMA && CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE */ + #ifdef CONFIG_IMA_APPRAISE extern bool is_ima_appraise_enabled(void); extern void ima_inode_post_setattr(struct dentry *dentry); -- cgit v1.2.3 From 4414abf89158d734a83c99f6504f648417bd9550 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 23 Sep 2019 16:31:42 -0700 Subject: rcu: Remove rcu_swap_protected() Now that the calls to rcu_swap_protected() have been replaced by rcu_replace_pointer(), this commit removes rcu_swap_protected(). Link: https://lore.kernel.org/lkml/CAHk-=wiAsJLw1egFEE=Z7-GGtM6wcvtyytXZA1+BHqta4gg6Hw@mail.gmail.com/ Reported-by: Linus Torvalds Signed-off-by: Paul E. McKenney Cc: Bart Van Assche Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Shane M Seymour Cc: Martin K. Petersen --- include/linux/rcupdate.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 0b7506330c87..fe470243acdd 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -400,22 +400,6 @@ do { \ __tmp; \ }) -/** - * rcu_swap_protected() - swap an RCU and a regular pointer - * @rcu_ptr: RCU pointer - * @ptr: regular pointer - * @c: the conditions under which the dereference will take place - * - * Perform swap(@rcu_ptr, @ptr) where @rcu_ptr is an RCU-annotated pointer and - * @c is the argument that is passed to the rcu_dereference_protected() call - * used to read that pointer. - */ -#define rcu_swap_protected(rcu_ptr, ptr, c) do { \ - typeof(ptr) __tmp = rcu_dereference_protected((rcu_ptr), (c)); \ - rcu_assign_pointer((rcu_ptr), (ptr)); \ - (ptr) = __tmp; \ -} while (0) - /** * rcu_access_pointer() - fetch RCU pointer with no dereferencing * @p: The pointer to read -- cgit v1.2.3 From b4653342b1514cb11f25b727c689451aff02996d Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 9 Dec 2019 13:03:40 +0300 Subject: net: Allow to show socket-specific information in /proc/[pid]/fdinfo/[fd] This adds .show_fdinfo to socket_file_ops, so protocols will be able to print their specific data in fdinfo. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/linux/net.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 9cafb5f353a9..6451425e828f 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -171,6 +171,7 @@ struct proto_ops { int (*compat_getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); #endif + void (*show_fdinfo)(struct seq_file *m, struct socket *sock); int (*sendmsg) (struct socket *sock, struct msghdr *m, size_t total_len); /* Notes for implementing recvmsg: -- cgit v1.2.3 From 0290bd291cc0e0488e35e66bf39efcd7d9d9122b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 10 Dec 2019 09:23:51 -0500 Subject: netdev: pass the stuck queue to the timeout handler This allows incrementing the correct timeout statistic without any mess. Down the road, devices can learn to reset just the specific queue. The patch was generated with the following script: use strict; use warnings; our $^I = '.bak'; my @work = ( ["arch/m68k/emu/nfeth.c", "nfeth_tx_timeout"], ["arch/um/drivers/net_kern.c", "uml_net_tx_timeout"], ["arch/um/drivers/vector_kern.c", "vector_net_tx_timeout"], ["arch/xtensa/platforms/iss/network.c", "iss_net_tx_timeout"], ["drivers/char/pcmcia/synclink_cs.c", "hdlcdev_tx_timeout"], ["drivers/infiniband/ulp/ipoib/ipoib_main.c", "ipoib_timeout"], ["drivers/infiniband/ulp/ipoib/ipoib_main.c", "ipoib_timeout"], ["drivers/message/fusion/mptlan.c", "mpt_lan_tx_timeout"], ["drivers/misc/sgi-xp/xpnet.c", "xpnet_dev_tx_timeout"], ["drivers/net/appletalk/cops.c", "cops_timeout"], ["drivers/net/arcnet/arcdevice.h", "arcnet_timeout"], ["drivers/net/arcnet/arcnet.c", "arcnet_timeout"], ["drivers/net/arcnet/com20020.c", "arcnet_timeout"], ["drivers/net/ethernet/3com/3c509.c", "el3_tx_timeout"], ["drivers/net/ethernet/3com/3c515.c", "corkscrew_timeout"], ["drivers/net/ethernet/3com/3c574_cs.c", "el3_tx_timeout"], ["drivers/net/ethernet/3com/3c589_cs.c", "el3_tx_timeout"], ["drivers/net/ethernet/3com/3c59x.c", "vortex_tx_timeout"], ["drivers/net/ethernet/3com/3c59x.c", "vortex_tx_timeout"], ["drivers/net/ethernet/3com/typhoon.c", "typhoon_tx_timeout"], ["drivers/net/ethernet/8390/8390.h", "ei_tx_timeout"], ["drivers/net/ethernet/8390/8390.h", "eip_tx_timeout"], ["drivers/net/ethernet/8390/8390.c", "ei_tx_timeout"], ["drivers/net/ethernet/8390/8390p.c", "eip_tx_timeout"], ["drivers/net/ethernet/8390/ax88796.c", "ax_ei_tx_timeout"], ["drivers/net/ethernet/8390/axnet_cs.c", "axnet_tx_timeout"], ["drivers/net/ethernet/8390/etherh.c", "__ei_tx_timeout"], ["drivers/net/ethernet/8390/hydra.c", "__ei_tx_timeout"], ["drivers/net/ethernet/8390/mac8390.c", "__ei_tx_timeout"], ["drivers/net/ethernet/8390/mcf8390.c", "__ei_tx_timeout"], ["drivers/net/ethernet/8390/lib8390.c", "__ei_tx_timeout"], ["drivers/net/ethernet/8390/ne2k-pci.c", "ei_tx_timeout"], ["drivers/net/ethernet/8390/pcnet_cs.c", "ei_tx_timeout"], ["drivers/net/ethernet/8390/smc-ultra.c", "ei_tx_timeout"], ["drivers/net/ethernet/8390/wd.c", "ei_tx_timeout"], ["drivers/net/ethernet/8390/zorro8390.c", "__ei_tx_timeout"], ["drivers/net/ethernet/adaptec/starfire.c", "tx_timeout"], ["drivers/net/ethernet/agere/et131x.c", "et131x_tx_timeout"], ["drivers/net/ethernet/allwinner/sun4i-emac.c", "emac_timeout"], ["drivers/net/ethernet/alteon/acenic.c", "ace_watchdog"], ["drivers/net/ethernet/amazon/ena/ena_netdev.c", "ena_tx_timeout"], ["drivers/net/ethernet/amd/7990.h", "lance_tx_timeout"], ["drivers/net/ethernet/amd/7990.c", "lance_tx_timeout"], ["drivers/net/ethernet/amd/a2065.c", "lance_tx_timeout"], ["drivers/net/ethernet/amd/am79c961a.c", "am79c961_timeout"], ["drivers/net/ethernet/amd/amd8111e.c", "amd8111e_tx_timeout"], ["drivers/net/ethernet/amd/ariadne.c", "ariadne_tx_timeout"], ["drivers/net/ethernet/amd/atarilance.c", "lance_tx_timeout"], ["drivers/net/ethernet/amd/au1000_eth.c", "au1000_tx_timeout"], ["drivers/net/ethernet/amd/declance.c", "lance_tx_timeout"], ["drivers/net/ethernet/amd/lance.c", "lance_tx_timeout"], ["drivers/net/ethernet/amd/mvme147.c", "lance_tx_timeout"], ["drivers/net/ethernet/amd/ni65.c", "ni65_timeout"], ["drivers/net/ethernet/amd/nmclan_cs.c", "mace_tx_timeout"], ["drivers/net/ethernet/amd/pcnet32.c", "pcnet32_tx_timeout"], ["drivers/net/ethernet/amd/sunlance.c", "lance_tx_timeout"], ["drivers/net/ethernet/amd/xgbe/xgbe-drv.c", "xgbe_tx_timeout"], ["drivers/net/ethernet/apm/xgene-v2/main.c", "xge_timeout"], ["drivers/net/ethernet/apm/xgene/xgene_enet_main.c", "xgene_enet_timeout"], ["drivers/net/ethernet/apple/macmace.c", "mace_tx_timeout"], ["drivers/net/ethernet/atheros/ag71xx.c", "ag71xx_tx_timeout"], ["drivers/net/ethernet/atheros/alx/main.c", "alx_tx_timeout"], ["drivers/net/ethernet/atheros/atl1c/atl1c_main.c", "atl1c_tx_timeout"], ["drivers/net/ethernet/atheros/atl1e/atl1e_main.c", "atl1e_tx_timeout"], ["drivers/net/ethernet/atheros/atlx/atl.c", "atlx_tx_timeout"], ["drivers/net/ethernet/atheros/atlx/atl1.c", "atlx_tx_timeout"], ["drivers/net/ethernet/atheros/atlx/atl2.c", "atl2_tx_timeout"], ["drivers/net/ethernet/broadcom/b44.c", "b44_tx_timeout"], ["drivers/net/ethernet/broadcom/bcmsysport.c", "bcm_sysport_tx_timeout"], ["drivers/net/ethernet/broadcom/bnx2.c", "bnx2_tx_timeout"], ["drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h", "bnx2x_tx_timeout"], ["drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c", "bnx2x_tx_timeout"], ["drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c", "bnx2x_tx_timeout"], ["drivers/net/ethernet/broadcom/bnxt/bnxt.c", "bnxt_tx_timeout"], ["drivers/net/ethernet/broadcom/genet/bcmgenet.c", "bcmgenet_timeout"], ["drivers/net/ethernet/broadcom/sb1250-mac.c", "sbmac_tx_timeout"], ["drivers/net/ethernet/broadcom/tg3.c", "tg3_tx_timeout"], ["drivers/net/ethernet/calxeda/xgmac.c", "xgmac_tx_timeout"], ["drivers/net/ethernet/cavium/liquidio/lio_main.c", "liquidio_tx_timeout"], ["drivers/net/ethernet/cavium/liquidio/lio_vf_main.c", "liquidio_tx_timeout"], ["drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c", "lio_vf_rep_tx_timeout"], ["drivers/net/ethernet/cavium/thunder/nicvf_main.c", "nicvf_tx_timeout"], ["drivers/net/ethernet/cirrus/cs89x0.c", "net_timeout"], ["drivers/net/ethernet/cisco/enic/enic_main.c", "enic_tx_timeout"], ["drivers/net/ethernet/cisco/enic/enic_main.c", "enic_tx_timeout"], ["drivers/net/ethernet/cortina/gemini.c", "gmac_tx_timeout"], ["drivers/net/ethernet/davicom/dm9000.c", "dm9000_timeout"], ["drivers/net/ethernet/dec/tulip/de2104x.c", "de_tx_timeout"], ["drivers/net/ethernet/dec/tulip/tulip_core.c", "tulip_tx_timeout"], ["drivers/net/ethernet/dec/tulip/winbond-840.c", "tx_timeout"], ["drivers/net/ethernet/dlink/dl2k.c", "rio_tx_timeout"], ["drivers/net/ethernet/dlink/sundance.c", "tx_timeout"], ["drivers/net/ethernet/emulex/benet/be_main.c", "be_tx_timeout"], ["drivers/net/ethernet/ethoc.c", "ethoc_tx_timeout"], ["drivers/net/ethernet/faraday/ftgmac100.c", "ftgmac100_tx_timeout"], ["drivers/net/ethernet/fealnx.c", "fealnx_tx_timeout"], ["drivers/net/ethernet/freescale/dpaa/dpaa_eth.c", "dpaa_tx_timeout"], ["drivers/net/ethernet/freescale/fec_main.c", "fec_timeout"], ["drivers/net/ethernet/freescale/fec_mpc52xx.c", "mpc52xx_fec_tx_timeout"], ["drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c", "fs_timeout"], ["drivers/net/ethernet/freescale/gianfar.c", "gfar_timeout"], ["drivers/net/ethernet/freescale/ucc_geth.c", "ucc_geth_timeout"], ["drivers/net/ethernet/fujitsu/fmvj18x_cs.c", "fjn_tx_timeout"], ["drivers/net/ethernet/google/gve/gve_main.c", "gve_tx_timeout"], ["drivers/net/ethernet/hisilicon/hip04_eth.c", "hip04_timeout"], ["drivers/net/ethernet/hisilicon/hix5hd2_gmac.c", "hix5hd2_net_timeout"], ["drivers/net/ethernet/hisilicon/hns/hns_enet.c", "hns_nic_net_timeout"], ["drivers/net/ethernet/hisilicon/hns3/hns3_enet.c", "hns3_nic_net_timeout"], ["drivers/net/ethernet/huawei/hinic/hinic_main.c", "hinic_tx_timeout"], ["drivers/net/ethernet/i825xx/82596.c", "i596_tx_timeout"], ["drivers/net/ethernet/i825xx/ether1.c", "ether1_timeout"], ["drivers/net/ethernet/i825xx/lib82596.c", "i596_tx_timeout"], ["drivers/net/ethernet/i825xx/sun3_82586.c", "sun3_82586_timeout"], ["drivers/net/ethernet/ibm/ehea/ehea_main.c", "ehea_tx_watchdog"], ["drivers/net/ethernet/ibm/emac/core.c", "emac_tx_timeout"], ["drivers/net/ethernet/ibm/emac/core.c", "emac_tx_timeout"], ["drivers/net/ethernet/ibm/ibmvnic.c", "ibmvnic_tx_timeout"], ["drivers/net/ethernet/intel/e100.c", "e100_tx_timeout"], ["drivers/net/ethernet/intel/e1000/e1000_main.c", "e1000_tx_timeout"], ["drivers/net/ethernet/intel/e1000e/netdev.c", "e1000_tx_timeout"], ["drivers/net/ethernet/intel/fm10k/fm10k_netdev.c", "fm10k_tx_timeout"], ["drivers/net/ethernet/intel/i40e/i40e_main.c", "i40e_tx_timeout"], ["drivers/net/ethernet/intel/iavf/iavf_main.c", "iavf_tx_timeout"], ["drivers/net/ethernet/intel/ice/ice_main.c", "ice_tx_timeout"], ["drivers/net/ethernet/intel/ice/ice_main.c", "ice_tx_timeout"], ["drivers/net/ethernet/intel/igb/igb_main.c", "igb_tx_timeout"], ["drivers/net/ethernet/intel/igbvf/netdev.c", "igbvf_tx_timeout"], ["drivers/net/ethernet/intel/ixgb/ixgb_main.c", "ixgb_tx_timeout"], ["drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c", "adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev);"], ["drivers/net/ethernet/intel/ixgbe/ixgbe_main.c", "ixgbe_tx_timeout"], ["drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c", "ixgbevf_tx_timeout"], ["drivers/net/ethernet/jme.c", "jme_tx_timeout"], ["drivers/net/ethernet/korina.c", "korina_tx_timeout"], ["drivers/net/ethernet/lantiq_etop.c", "ltq_etop_tx_timeout"], ["drivers/net/ethernet/marvell/mv643xx_eth.c", "mv643xx_eth_tx_timeout"], ["drivers/net/ethernet/marvell/pxa168_eth.c", "pxa168_eth_tx_timeout"], ["drivers/net/ethernet/marvell/skge.c", "skge_tx_timeout"], ["drivers/net/ethernet/marvell/sky2.c", "sky2_tx_timeout"], ["drivers/net/ethernet/marvell/sky2.c", "sky2_tx_timeout"], ["drivers/net/ethernet/mediatek/mtk_eth_soc.c", "mtk_tx_timeout"], ["drivers/net/ethernet/mellanox/mlx4/en_netdev.c", "mlx4_en_tx_timeout"], ["drivers/net/ethernet/mellanox/mlx4/en_netdev.c", "mlx4_en_tx_timeout"], ["drivers/net/ethernet/mellanox/mlx5/core/en_main.c", "mlx5e_tx_timeout"], ["drivers/net/ethernet/micrel/ks8842.c", "ks8842_tx_timeout"], ["drivers/net/ethernet/micrel/ksz884x.c", "netdev_tx_timeout"], ["drivers/net/ethernet/microchip/enc28j60.c", "enc28j60_tx_timeout"], ["drivers/net/ethernet/microchip/encx24j600.c", "encx24j600_tx_timeout"], ["drivers/net/ethernet/natsemi/sonic.h", "sonic_tx_timeout"], ["drivers/net/ethernet/natsemi/sonic.c", "sonic_tx_timeout"], ["drivers/net/ethernet/natsemi/jazzsonic.c", "sonic_tx_timeout"], ["drivers/net/ethernet/natsemi/macsonic.c", "sonic_tx_timeout"], ["drivers/net/ethernet/natsemi/natsemi.c", "ns_tx_timeout"], ["drivers/net/ethernet/natsemi/ns83820.c", "ns83820_tx_timeout"], ["drivers/net/ethernet/natsemi/xtsonic.c", "sonic_tx_timeout"], ["drivers/net/ethernet/neterion/s2io.h", "s2io_tx_watchdog"], ["drivers/net/ethernet/neterion/s2io.c", "s2io_tx_watchdog"], ["drivers/net/ethernet/neterion/vxge/vxge-main.c", "vxge_tx_watchdog"], ["drivers/net/ethernet/netronome/nfp/nfp_net_common.c", "nfp_net_tx_timeout"], ["drivers/net/ethernet/nvidia/forcedeth.c", "nv_tx_timeout"], ["drivers/net/ethernet/nvidia/forcedeth.c", "nv_tx_timeout"], ["drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c", "pch_gbe_tx_timeout"], ["drivers/net/ethernet/packetengines/hamachi.c", "hamachi_tx_timeout"], ["drivers/net/ethernet/packetengines/yellowfin.c", "yellowfin_tx_timeout"], ["drivers/net/ethernet/pensando/ionic/ionic_lif.c", "ionic_tx_timeout"], ["drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c", "netxen_tx_timeout"], ["drivers/net/ethernet/qlogic/qla3xxx.c", "ql3xxx_tx_timeout"], ["drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c", "qlcnic_tx_timeout"], ["drivers/net/ethernet/qualcomm/emac/emac.c", "emac_tx_timeout"], ["drivers/net/ethernet/qualcomm/qca_spi.c", "qcaspi_netdev_tx_timeout"], ["drivers/net/ethernet/qualcomm/qca_uart.c", "qcauart_netdev_tx_timeout"], ["drivers/net/ethernet/rdc/r6040.c", "r6040_tx_timeout"], ["drivers/net/ethernet/realtek/8139cp.c", "cp_tx_timeout"], ["drivers/net/ethernet/realtek/8139too.c", "rtl8139_tx_timeout"], ["drivers/net/ethernet/realtek/atp.c", "tx_timeout"], ["drivers/net/ethernet/realtek/r8169_main.c", "rtl8169_tx_timeout"], ["drivers/net/ethernet/renesas/ravb_main.c", "ravb_tx_timeout"], ["drivers/net/ethernet/renesas/sh_eth.c", "sh_eth_tx_timeout"], ["drivers/net/ethernet/renesas/sh_eth.c", "sh_eth_tx_timeout"], ["drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c", "sxgbe_tx_timeout"], ["drivers/net/ethernet/seeq/ether3.c", "ether3_timeout"], ["drivers/net/ethernet/seeq/sgiseeq.c", "timeout"], ["drivers/net/ethernet/sfc/efx.c", "efx_watchdog"], ["drivers/net/ethernet/sfc/falcon/efx.c", "ef4_watchdog"], ["drivers/net/ethernet/sgi/ioc3-eth.c", "ioc3_timeout"], ["drivers/net/ethernet/sgi/meth.c", "meth_tx_timeout"], ["drivers/net/ethernet/silan/sc92031.c", "sc92031_tx_timeout"], ["drivers/net/ethernet/sis/sis190.c", "sis190_tx_timeout"], ["drivers/net/ethernet/sis/sis900.c", "sis900_tx_timeout"], ["drivers/net/ethernet/smsc/epic100.c", "epic_tx_timeout"], ["drivers/net/ethernet/smsc/smc911x.c", "smc911x_timeout"], ["drivers/net/ethernet/smsc/smc9194.c", "smc_timeout"], ["drivers/net/ethernet/smsc/smc91c92_cs.c", "smc_tx_timeout"], ["drivers/net/ethernet/smsc/smc91x.c", "smc_timeout"], ["drivers/net/ethernet/stmicro/stmmac/stmmac_main.c", "stmmac_tx_timeout"], ["drivers/net/ethernet/sun/cassini.c", "cas_tx_timeout"], ["drivers/net/ethernet/sun/ldmvsw.c", "sunvnet_tx_timeout_common"], ["drivers/net/ethernet/sun/niu.c", "niu_tx_timeout"], ["drivers/net/ethernet/sun/sunbmac.c", "bigmac_tx_timeout"], ["drivers/net/ethernet/sun/sungem.c", "gem_tx_timeout"], ["drivers/net/ethernet/sun/sunhme.c", "happy_meal_tx_timeout"], ["drivers/net/ethernet/sun/sunqe.c", "qe_tx_timeout"], ["drivers/net/ethernet/sun/sunvnet.c", "sunvnet_tx_timeout_common"], ["drivers/net/ethernet/sun/sunvnet_common.c", "sunvnet_tx_timeout_common"], ["drivers/net/ethernet/sun/sunvnet_common.h", "sunvnet_tx_timeout_common"], ["drivers/net/ethernet/synopsys/dwc-xlgmac-net.c", "xlgmac_tx_timeout"], ["drivers/net/ethernet/ti/cpmac.c", "cpmac_tx_timeout"], ["drivers/net/ethernet/ti/cpsw.c", "cpsw_ndo_tx_timeout"], ["drivers/net/ethernet/ti/cpsw_priv.c", "cpsw_ndo_tx_timeout"], ["drivers/net/ethernet/ti/cpsw_priv.h", "cpsw_ndo_tx_timeout"], ["drivers/net/ethernet/ti/davinci_emac.c", "emac_dev_tx_timeout"], ["drivers/net/ethernet/ti/netcp_core.c", "netcp_ndo_tx_timeout"], ["drivers/net/ethernet/ti/tlan.c", "tlan_tx_timeout"], ["drivers/net/ethernet/toshiba/ps3_gelic_net.h", "gelic_net_tx_timeout"], ["drivers/net/ethernet/toshiba/ps3_gelic_net.c", "gelic_net_tx_timeout"], ["drivers/net/ethernet/toshiba/ps3_gelic_wireless.c", "gelic_net_tx_timeout"], ["drivers/net/ethernet/toshiba/spider_net.c", "spider_net_tx_timeout"], ["drivers/net/ethernet/toshiba/tc35815.c", "tc35815_tx_timeout"], ["drivers/net/ethernet/via/via-rhine.c", "rhine_tx_timeout"], ["drivers/net/ethernet/wiznet/w5100.c", "w5100_tx_timeout"], ["drivers/net/ethernet/wiznet/w5300.c", "w5300_tx_timeout"], ["drivers/net/ethernet/xilinx/xilinx_emaclite.c", "xemaclite_tx_timeout"], ["drivers/net/ethernet/xircom/xirc2ps_cs.c", "xirc_tx_timeout"], ["drivers/net/fjes/fjes_main.c", "fjes_tx_retry"], ["drivers/net/slip/slip.c", "sl_tx_timeout"], ["include/linux/usb/usbnet.h", "usbnet_tx_timeout"], ["drivers/net/usb/aqc111.c", "usbnet_tx_timeout"], ["drivers/net/usb/asix_devices.c", "usbnet_tx_timeout"], ["drivers/net/usb/asix_devices.c", "usbnet_tx_timeout"], ["drivers/net/usb/asix_devices.c", "usbnet_tx_timeout"], ["drivers/net/usb/ax88172a.c", "usbnet_tx_timeout"], ["drivers/net/usb/ax88179_178a.c", "usbnet_tx_timeout"], ["drivers/net/usb/catc.c", "catc_tx_timeout"], ["drivers/net/usb/cdc_mbim.c", "usbnet_tx_timeout"], ["drivers/net/usb/cdc_ncm.c", "usbnet_tx_timeout"], ["drivers/net/usb/dm9601.c", "usbnet_tx_timeout"], ["drivers/net/usb/hso.c", "hso_net_tx_timeout"], ["drivers/net/usb/int51x1.c", "usbnet_tx_timeout"], ["drivers/net/usb/ipheth.c", "ipheth_tx_timeout"], ["drivers/net/usb/kaweth.c", "kaweth_tx_timeout"], ["drivers/net/usb/lan78xx.c", "lan78xx_tx_timeout"], ["drivers/net/usb/mcs7830.c", "usbnet_tx_timeout"], ["drivers/net/usb/pegasus.c", "pegasus_tx_timeout"], ["drivers/net/usb/qmi_wwan.c", "usbnet_tx_timeout"], ["drivers/net/usb/r8152.c", "rtl8152_tx_timeout"], ["drivers/net/usb/rndis_host.c", "usbnet_tx_timeout"], ["drivers/net/usb/rtl8150.c", "rtl8150_tx_timeout"], ["drivers/net/usb/sierra_net.c", "usbnet_tx_timeout"], ["drivers/net/usb/smsc75xx.c", "usbnet_tx_timeout"], ["drivers/net/usb/smsc95xx.c", "usbnet_tx_timeout"], ["drivers/net/usb/sr9700.c", "usbnet_tx_timeout"], ["drivers/net/usb/sr9800.c", "usbnet_tx_timeout"], ["drivers/net/usb/usbnet.c", "usbnet_tx_timeout"], ["drivers/net/vmxnet3/vmxnet3_drv.c", "vmxnet3_tx_timeout"], ["drivers/net/wan/cosa.c", "cosa_net_timeout"], ["drivers/net/wan/farsync.c", "fst_tx_timeout"], ["drivers/net/wan/fsl_ucc_hdlc.c", "uhdlc_tx_timeout"], ["drivers/net/wan/lmc/lmc_main.c", "lmc_driver_timeout"], ["drivers/net/wan/x25_asy.c", "x25_asy_timeout"], ["drivers/net/wimax/i2400m/netdev.c", "i2400m_tx_timeout"], ["drivers/net/wireless/intel/ipw2x00/ipw2100.c", "ipw2100_tx_timeout"], ["drivers/net/wireless/intersil/hostap/hostap_main.c", "prism2_tx_timeout"], ["drivers/net/wireless/intersil/hostap/hostap_main.c", "prism2_tx_timeout"], ["drivers/net/wireless/intersil/hostap/hostap_main.c", "prism2_tx_timeout"], ["drivers/net/wireless/intersil/orinoco/main.c", "orinoco_tx_timeout"], ["drivers/net/wireless/intersil/orinoco/orinoco_usb.c", "orinoco_tx_timeout"], ["drivers/net/wireless/intersil/orinoco/orinoco.h", "orinoco_tx_timeout"], ["drivers/net/wireless/intersil/prism54/islpci_dev.c", "islpci_eth_tx_timeout"], ["drivers/net/wireless/intersil/prism54/islpci_eth.c", "islpci_eth_tx_timeout"], ["drivers/net/wireless/intersil/prism54/islpci_eth.h", "islpci_eth_tx_timeout"], ["drivers/net/wireless/marvell/mwifiex/main.c", "mwifiex_tx_timeout"], ["drivers/net/wireless/quantenna/qtnfmac/core.c", "qtnf_netdev_tx_timeout"], ["drivers/net/wireless/quantenna/qtnfmac/core.h", "qtnf_netdev_tx_timeout"], ["drivers/net/wireless/rndis_wlan.c", "usbnet_tx_timeout"], ["drivers/net/wireless/wl3501_cs.c", "wl3501_tx_timeout"], ["drivers/net/wireless/zydas/zd1201.c", "zd1201_tx_timeout"], ["drivers/s390/net/qeth_core.h", "qeth_tx_timeout"], ["drivers/s390/net/qeth_core_main.c", "qeth_tx_timeout"], ["drivers/s390/net/qeth_l2_main.c", "qeth_tx_timeout"], ["drivers/s390/net/qeth_l2_main.c", "qeth_tx_timeout"], ["drivers/s390/net/qeth_l3_main.c", "qeth_tx_timeout"], ["drivers/s390/net/qeth_l3_main.c", "qeth_tx_timeout"], ["drivers/staging/ks7010/ks_wlan_net.c", "ks_wlan_tx_timeout"], ["drivers/staging/qlge/qlge_main.c", "qlge_tx_timeout"], ["drivers/staging/rtl8192e/rtl8192e/rtl_core.c", "_rtl92e_tx_timeout"], ["drivers/staging/rtl8192u/r8192U_core.c", "tx_timeout"], ["drivers/staging/unisys/visornic/visornic_main.c", "visornic_xmit_timeout"], ["drivers/staging/wlan-ng/p80211netdev.c", "p80211knetdev_tx_timeout"], ["drivers/tty/n_gsm.c", "gsm_mux_net_tx_timeout"], ["drivers/tty/synclink.c", "hdlcdev_tx_timeout"], ["drivers/tty/synclink_gt.c", "hdlcdev_tx_timeout"], ["drivers/tty/synclinkmp.c", "hdlcdev_tx_timeout"], ["net/atm/lec.c", "lec_tx_timeout"], ["net/bluetooth/bnep/netdev.c", "bnep_net_timeout"] ); for my $p (@work) { my @pair = @$p; my $file = $pair[0]; my $func = $pair[1]; print STDERR $file , ": ", $func,"\n"; our @ARGV = ($file); while () { if (m/($func\s*\(struct\s+net_device\s+\*[A-Za-z_]?[A-Za-z-0-9_]*)(\))/) { print STDERR "found $1+$2 in $file\n"; } if (s/($func\s*\(struct\s+net_device\s+\*[A-Za-z_]?[A-Za-z-0-9_]*)(\))/$1, unsigned int txqueue$2/) { print STDERR "$func found in $file\n"; } print; } } where the list of files and functions is simply from: git grep ndo_tx_timeout, with manual addition of headers in the rare cases where the function is from a header, then manually changing the few places which actually call ndo_tx_timeout. Signed-off-by: Michael S. Tsirkin Acked-by: Heiner Kallweit Acked-by: Jakub Kicinski Acked-by: Shannon Nelson Reviewed-by: Martin Habets changes from v9: fixup a forward declaration changes from v9: more leftovers from v3 change changes from v8: fix up a missing direct call to timeout rebased on net-next changes from v7: fixup leftovers from v3 change changes from v6: fix typo in rtl driver changes from v5: add missing files (allow any net device argument name) changes from v4: add a missing driver header changes from v3: change queue # to unsigned Changes from v2: added headers Changes from v1: Fix errors found by kbuild: generalize the pattern a bit, to pick up a couple of instances missed by the previous version. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++-- include/linux/usb/usbnet.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9ef20389622d..30745068fb39 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1014,7 +1014,7 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); * Called when a user wants to change the Maximum Transfer Unit * of a device. * - * void (*ndo_tx_timeout)(struct net_device *dev); + * void (*ndo_tx_timeout)(struct net_device *dev, unsigned int txqueue); * Callback used when the transmitter has not made any progress * for dev->watchdog ticks. * @@ -1281,7 +1281,8 @@ struct net_device_ops { int new_mtu); int (*ndo_neigh_setup)(struct net_device *dev, struct neigh_parms *); - void (*ndo_tx_timeout) (struct net_device *dev); + void (*ndo_tx_timeout) (struct net_device *dev, + unsigned int txqueue); void (*ndo_get_stats64)(struct net_device *dev, struct rtnl_link_stats64 *storage); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index d8860f2d0976..b0bff3083278 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -253,7 +253,7 @@ extern int usbnet_open(struct net_device *net); extern int usbnet_stop(struct net_device *net); extern netdev_tx_t usbnet_start_xmit(struct sk_buff *skb, struct net_device *net); -extern void usbnet_tx_timeout(struct net_device *net); +extern void usbnet_tx_timeout(struct net_device *net, unsigned int txqueue); extern int usbnet_change_mtu(struct net_device *net, int new_mtu); extern int usbnet_get_endpoints(struct usbnet *, struct usb_interface *); -- cgit v1.2.3 From 98e8627efcada18ac043a77b9101b4b4c768090b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 13 Dec 2019 18:51:07 +0100 Subject: bpf: Move trampoline JIT image allocation to a function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor the image allocation in the BPF trampoline code into a separate function, so it can be shared with the BPF dispatcher in upcoming commits. Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191213175112.30208-2-bjorn.topel@gmail.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 35903f148be5..5d744828b399 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -475,6 +475,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); +void *bpf_jit_alloc_exec_page(void); #else static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { -- cgit v1.2.3 From 75ccbef6369e94ecac696a152a998a978d41376b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 13 Dec 2019 18:51:08 +0100 Subject: bpf: Introduce BPF dispatcher MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BPF dispatcher is a multi-way branch code generator, mainly targeted for XDP programs. When an XDP program is executed via the bpf_prog_run_xdp(), it is invoked via an indirect call. The indirect call has a substantial performance impact, when retpolines are enabled. The dispatcher transform indirect calls to direct calls, and therefore avoids the retpoline. The dispatcher is generated using the BPF JIT, and relies on text poking provided by bpf_arch_text_poke(). The dispatcher hijacks a trampoline function it via the __fentry__ nop of the trampoline. One dispatcher instance currently supports up to 64 dispatch points. A user creates a dispatcher with its corresponding trampoline with the DEFINE_BPF_DISPATCHER macro. Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191213175112.30208-3-bjorn.topel@gmail.com --- include/linux/bpf.h | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5d744828b399..53ae4a50abe4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -470,12 +470,61 @@ struct bpf_trampoline { void *image; u64 selector; }; + +#define BPF_DISPATCHER_MAX 64 /* Fits in 2048B */ + +struct bpf_dispatcher_prog { + struct bpf_prog *prog; + refcount_t users; +}; + +struct bpf_dispatcher { + /* dispatcher mutex */ + struct mutex mutex; + void *func; + struct bpf_dispatcher_prog progs[BPF_DISPATCHER_MAX]; + int num_progs; + void *image; + u32 image_off; +}; + #ifdef CONFIG_BPF_JIT struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); void *bpf_jit_alloc_exec_page(void); +#define BPF_DISPATCHER_INIT(name) { \ + .mutex = __MUTEX_INITIALIZER(name.mutex), \ + .func = &name##func, \ + .progs = {}, \ + .num_progs = 0, \ + .image = NULL, \ + .image_off = 0 \ +} + +#define DEFINE_BPF_DISPATCHER(name) \ + noinline unsigned int name##func( \ + const void *ctx, \ + const struct bpf_insn *insnsi, \ + unsigned int (*bpf_func)(const void *, \ + const struct bpf_insn *)) \ + { \ + return bpf_func(ctx, insnsi); \ + } \ + EXPORT_SYMBOL(name##func); \ + struct bpf_dispatcher name = BPF_DISPATCHER_INIT(name); +#define DECLARE_BPF_DISPATCHER(name) \ + unsigned int name##func( \ + const void *ctx, \ + const struct bpf_insn *insnsi, \ + unsigned int (*bpf_func)(const void *, \ + const struct bpf_insn *)); \ + extern struct bpf_dispatcher name; +#define BPF_DISPATCHER_FUNC(name) name##func +#define BPF_DISPATCHER_PTR(name) (&name) +void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, + struct bpf_prog *to); #else static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { @@ -490,6 +539,13 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) return -ENOTSUPP; } static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} +#define DEFINE_BPF_DISPATCHER(name) +#define DECLARE_BPF_DISPATCHER(name) +#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nopfunc +#define BPF_DISPATCHER_PTR(name) NULL +static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, + struct bpf_prog *from, + struct bpf_prog *to) {} #endif struct bpf_func_info_aux { -- cgit v1.2.3 From 7e6897f95935973c3253fd756135b5ea58043dc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 13 Dec 2019 18:51:09 +0100 Subject: bpf, xdp: Start using the BPF dispatcher for XDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds a BPF dispatcher for XDP. The dispatcher is updated from the XDP control-path, dev_xdp_install(), and used when an XDP program is run via bpf_prog_run_xdp(). Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191213175112.30208-4-bjorn.topel@gmail.com --- include/linux/bpf.h | 15 +++++++++++++++ include/linux/filter.h | 40 ++++++++++++++++++++++++---------------- 2 files changed, 39 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 53ae4a50abe4..5970989b99d1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -488,6 +488,14 @@ struct bpf_dispatcher { u32 image_off; }; +static __always_inline unsigned int bpf_dispatcher_nopfunc( + const void *ctx, + const struct bpf_insn *insnsi, + unsigned int (*bpf_func)(const void *, + const struct bpf_insn *)) +{ + return bpf_func(ctx, insnsi); +} #ifdef CONFIG_BPF_JIT struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); @@ -997,6 +1005,8 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog); +struct bpf_prog *bpf_prog_by_id(u32 id); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -1128,6 +1138,11 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, static inline void bpf_map_put(struct bpf_map *map) { } + +static inline struct bpf_prog *bpf_prog_by_id(u32 id) +{ + return ERR_PTR(-ENOTSUPP); +} #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, diff --git a/include/linux/filter.h b/include/linux/filter.h index a141cb07e76a..37ac7025031d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -559,23 +559,26 @@ struct sk_filter { DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); -#define BPF_PROG_RUN(prog, ctx) ({ \ - u32 ret; \ - cant_sleep(); \ - if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ - struct bpf_prog_stats *stats; \ - u64 start = sched_clock(); \ - ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \ - stats = this_cpu_ptr(prog->aux->stats); \ - u64_stats_update_begin(&stats->syncp); \ - stats->cnt++; \ - stats->nsecs += sched_clock() - start; \ - u64_stats_update_end(&stats->syncp); \ - } else { \ - ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \ - } \ +#define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \ + u32 ret; \ + cant_sleep(); \ + if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ + struct bpf_prog_stats *stats; \ + u64 start = sched_clock(); \ + ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + stats = this_cpu_ptr(prog->aux->stats); \ + u64_stats_update_begin(&stats->syncp); \ + stats->cnt++; \ + stats->nsecs += sched_clock() - start; \ + u64_stats_update_end(&stats->syncp); \ + } else { \ + ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + } \ ret; }) +#define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx, \ + bpf_dispatcher_nopfunc) + #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN struct bpf_skb_data_end { @@ -699,6 +702,8 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, return res; } +DECLARE_BPF_DISPATCHER(bpf_dispatcher_xdp) + static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, struct xdp_buff *xdp) { @@ -708,9 +713,12 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, * already takes rcu_read_lock() when fetching the program, so * it's not necessary here anymore. */ - return BPF_PROG_RUN(prog, xdp); + return __BPF_PROG_RUN(prog, xdp, + BPF_DISPATCHER_FUNC(bpf_dispatcher_xdp)); } +void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); + static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog) { return prog->len * sizeof(struct bpf_insn); -- cgit v1.2.3 From 116eb788f57c9c35c40b29cfaa2607020de99a84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 13 Dec 2019 18:51:12 +0100 Subject: bpf, x86: Align dispatcher branch targets to 16B MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit >From Intel 64 and IA-32 Architectures Optimization Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler Coding Rule 11: All branch targets should be 16-byte aligned. This commits aligns branch targets according to the Intel manual. The nops used to align branch targets make the dispatcher larger, and therefore the number of supported dispatch points/programs are descreased from 64 to 48. Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191213175112.30208-7-bjorn.topel@gmail.com --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5970989b99d1..d467983e61bb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -471,7 +471,7 @@ struct bpf_trampoline { u64 selector; }; -#define BPF_DISPATCHER_MAX 64 /* Fits in 2048B */ +#define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */ struct bpf_dispatcher_prog { struct bpf_prog *prog; -- cgit v1.2.3 From 9429439f59cd3b82a3e2732ead5363578de97a84 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Thu, 12 Dec 2019 18:08:05 +0800 Subject: ptp_qoriq: export extts_clean_up() function Export extts_clean_up() function so that dpaa2-ptp driver is able to reuse it. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- include/linux/fsl/ptp_qoriq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index 992bf9fa1729..b0b743563f43 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -192,6 +192,7 @@ int ptp_qoriq_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts); int ptp_qoriq_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on); +int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, bool update_event); #ifdef CONFIG_DEBUG_FS void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq); void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq); -- cgit v1.2.3 From a5e37de90e67ac1072a9a44bd0cec9f5e98ded08 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sat, 14 Dec 2019 19:51:07 +0000 Subject: stop_machine: remove try_stop_cpus helper try_stop_cpus is not used after this: commit c190c3b16c0f ("rcu: Switch synchronize_sched_expedited() to stop_one_cpu()") So remove it. Signed-off-by: Yangtao Li Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191214195107.26480-1-tiny.windzz@gmail.com --- include/linux/stop_machine.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index f9a0c6189852..648298f877da 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -33,7 +33,6 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, struct cpu_stop_work *work_buf); int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); -int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); void stop_machine_park(int cpu); void stop_machine_unpark(int cpu); void stop_machine_yield(const struct cpumask *cpumask); @@ -90,12 +89,6 @@ static inline int stop_cpus(const struct cpumask *cpumask, return -ENOENT; } -static inline int try_stop_cpus(const struct cpumask *cpumask, - cpu_stop_fn_t fn, void *arg) -{ - return stop_cpus(cpumask, fn, arg); -} - #endif /* CONFIG_SMP */ /* -- cgit v1.2.3 From 2f5e70c8ce47396bfa8f5c437574b569c02597bb Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Nov 2019 12:33:59 +0100 Subject: netfilter: Document ingress hook Amend kerneldoc of struct net_device to fix a "make htmldocs" warning: include/linux/netdevice.h:2045: warning: Function parameter or member 'nf_hooks_ingress' not described in 'net_device' Reported-by: kbuild test robot Signed-off-by: Lukas Wunner Cc: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 30745068fb39..0b097bbd3663 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1708,6 +1708,7 @@ enum netdev_priv_flags { * @miniq_ingress: ingress/clsact qdisc specific data for * ingress processing * @ingress_queue: XXX: need comments on this one + * @nf_hooks_ingress: netfilter hooks executed for ingress packets * @broadcast: hw bcast address * * @rx_cpu_rmap: CPU reverse-mapping for RX completion interrupts, -- cgit v1.2.3 From d4aef159394d5940bd7158ab789969dab82f7c76 Mon Sep 17 00:00:00 2001 From: Soeren Moch Date: Thu, 12 Dec 2019 00:52:49 +0100 Subject: brcmfmac: add support for BCM4359 SDIO chipset BCM4359 is a 2x2 802.11 abgn+ac Dual-Band HT80 combo chip and it supports Real Simultaneous Dual Band feature. Based on a similar patch by: Wright Feng Signed-off-by: Soeren Moch Acked-by: Chi-Hsien Lin Acked-by: Ulf Hansson Signed-off-by: Kalle Valo --- include/linux/mmc/sdio_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 08b25c02b5a1..2e9a6e4634eb 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -41,8 +41,10 @@ #define SDIO_DEVICE_ID_BROADCOM_43455 0xa9bf #define SDIO_DEVICE_ID_BROADCOM_4354 0x4354 #define SDIO_DEVICE_ID_BROADCOM_4356 0x4356 +#define SDIO_DEVICE_ID_BROADCOM_4359 0x4359 #define SDIO_DEVICE_ID_CYPRESS_4373 0x4373 #define SDIO_DEVICE_ID_CYPRESS_43012 43012 +#define SDIO_DEVICE_ID_CYPRESS_89359 0x4355 #define SDIO_VENDOR_ID_INTEL 0x0089 #define SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX 0x1402 -- cgit v1.2.3 From 504723af0d85434be5fb6f2dde0b62644a7f1ead Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 18 Dec 2019 11:33:05 +0100 Subject: net: stmmac: Add basic EST support for GMAC5+ Adds the support for EST in GMAC5+ cores. This feature allows to offload scheduling of queues opening time to the IP. Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- include/linux/stmmac.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index d4bcd9387136..0531afa9b21e 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -109,6 +109,18 @@ struct stmmac_axi { bool axi_rb; }; +#define EST_GCL 1024 +struct stmmac_est { + int enable; + u32 btr_offset[2]; + u32 btr[2]; + u32 ctr[2]; + u32 ter; + u32 gcl_unaligned[EST_GCL]; + u32 gcl[EST_GCL]; + u32 gcl_size; +}; + struct stmmac_rxq_cfg { u8 mode_to_use; u32 chan; @@ -139,6 +151,7 @@ struct plat_stmmacenet_data { struct device_node *phylink_node; struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; + struct stmmac_est *est; int clk_csr; int has_gmac; int enh_desc; -- cgit v1.2.3 From d35eb52bd2ac7557b62bda52668f2e64dde2cf90 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 18 Dec 2019 14:55:15 +0000 Subject: net: sch_ets: Make the ETS qdisc offloadable Add hooks at appropriate points to make it possible to offload the ETS Qdisc. Signed-off-by: Petr Machata Acked-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 30745068fb39..7a8ed11f5d45 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -849,6 +849,7 @@ enum tc_setup_type { TC_SETUP_QDISC_GRED, TC_SETUP_QDISC_TAPRIO, TC_SETUP_FT, + TC_SETUP_QDISC_ETS, }; /* These structures hold the attributes of bpf state that are being passed -- cgit v1.2.3 From 2a10ab043ac5a658225ee77852db7942de9ac4c5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 17 Dec 2019 13:39:11 +0000 Subject: net: phy: add genphy_check_and_restart_aneg() Add a helper for restarting autonegotiation(), similar to the clause 45 variant. Use it in __genphy_config_aneg() Signed-off-by: Russell King Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 5032d453ac66..1c4f97d2631d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1094,6 +1094,7 @@ void phy_attached_info(struct phy_device *phydev); int genphy_read_abilities(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); +int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart); int genphy_config_eee_advert(struct phy_device *phydev); int __genphy_config_aneg(struct phy_device *phydev, bool changed); int genphy_aneg_done(struct phy_device *phydev); -- cgit v1.2.3 From 0efc286a923874f0c243e5766cce54e9429ed949 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 17 Dec 2019 13:39:16 +0000 Subject: net: phy: provide and use genphy_read_status_fixed() There are two drivers and generic code which contain exactly the same code to read the status of a PHY operating without autonegotiation enabled. Rather than duplicate this code, provide a helper to read this information. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 1c4f97d2631d..b2105e0d72d3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1100,6 +1100,7 @@ int __genphy_config_aneg(struct phy_device *phydev, bool changed); int genphy_aneg_done(struct phy_device *phydev); int genphy_update_link(struct phy_device *phydev); int genphy_read_lpa(struct phy_device *phydev); +int genphy_read_status_fixed(struct phy_device *phydev); int genphy_read_status(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); -- cgit v1.2.3 From 96360004b8628541f5d05a845ea213267db0b1a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 19 Dec 2019 07:10:03 +0100 Subject: xdp: Make devmap flush_list common for all map instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The devmap flush list is used to track entries that need to flushed from via the xdp_do_flush_map() function. This list used to be per-map, but there is really no reason for that. Instead make the flush list global for all devmaps, which simplifies __dev_map_flush() and dev_map_init_map(). Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20191219061006.21980-6-bjorn.topel@gmail.com --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d467983e61bb..31191804ca09 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -959,7 +959,7 @@ struct sk_buff; struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key); -void __dev_map_flush(struct bpf_map *map); +void __dev_map_flush(void); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, struct net_device *dev_rx); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, @@ -1068,7 +1068,7 @@ static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map return NULL; } -static inline void __dev_map_flush(struct bpf_map *map) +static inline void __dev_map_flush(void) { } -- cgit v1.2.3 From cdfafe98cabefeedbbc65af5c191c59745c03298 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 19 Dec 2019 07:10:04 +0100 Subject: xdp: Make cpumap flush_list common for all map instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cpumap flush list is used to track entries that need to flushed from via the xdp_do_flush_map() function. This list used to be per-map, but there is really no reason for that. Instead make the flush list global for all devmaps, which simplifies __cpu_map_flush() and cpu_map_alloc(). Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20191219061006.21980-7-bjorn.topel@gmail.com --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 31191804ca09..8f3e00c84f39 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -966,7 +966,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, struct bpf_prog *xdp_prog); struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); -void __cpu_map_flush(struct bpf_map *map); +void __cpu_map_flush(void); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); @@ -1097,7 +1097,7 @@ struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) return NULL; } -static inline void __cpu_map_flush(struct bpf_map *map) +static inline void __cpu_map_flush(void) { } -- cgit v1.2.3 From 332f22a60e4c3492d4953cd6f7aaa4e8bd0bba97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 19 Dec 2019 07:10:05 +0100 Subject: xdp: Remove map_to_flush and map swap detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that all XDP maps that can be used with bpf_redirect_map() tracks entries to be flushed in a global fashion, there is not need to track that the map has changed and flush from xdp_do_generic_map() anymore. All entries will be flushed in xdp_do_flush_map(). This means that the map_to_flush can be removed, and the corresponding checks. Moving the flush logic to one place, xdp_do_flush_map(), give a bulking behavior and performance boost. Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20191219061006.21980-8-bjorn.topel@gmail.com --- include/linux/filter.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 37ac7025031d..69d6706fc889 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -592,7 +592,6 @@ struct bpf_redirect_info { u32 tgt_index; void *tgt_value; struct bpf_map *map; - struct bpf_map *map_to_flush; u32 kern_flags; }; -- cgit v1.2.3 From 7dd68b3279f1792103d12e69933db3128c6d416e Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 18 Dec 2019 23:44:35 -0800 Subject: bpf: Support replacing cgroup-bpf program in MULTI mode The common use-case in production is to have multiple cgroup-bpf programs per attach type that cover multiple use-cases. Such programs are attached with BPF_F_ALLOW_MULTI and can be maintained by different people. Order of programs usually matters, for example imagine two egress programs: the first one drops packets and the second one counts packets. If they're swapped the result of counting program will be different. It brings operational challenges with updating cgroup-bpf program(s) attached with BPF_F_ALLOW_MULTI since there is no way to replace a program: * One way to update is to detach all programs first and then attach the new version(s) again in the right order. This introduces an interruption in the work a program is doing and may not be acceptable (e.g. if it's egress firewall); * Another way is attach the new version of a program first and only then detach the old version. This introduces the time interval when two versions of same program are working, what may not be acceptable if a program is not idempotent. It also imposes additional burden on program developers to make sure that two versions of their program can co-exist. Solve the problem by introducing a "replace" mode in BPF_PROG_ATTACH command for cgroup-bpf programs being attached with BPF_F_ALLOW_MULTI flag. This mode is enabled by newly introduced BPF_F_REPLACE attach flag and bpf_attr.replace_bpf_fd attribute to pass fd of the old program to replace That way user can replace any program among those attached with BPF_F_ALLOW_MULTI flag without the problems described above. Details of the new API: * If BPF_F_REPLACE is set but replace_bpf_fd doesn't have valid descriptor of BPF program, BPF_PROG_ATTACH will return corresponding error (EINVAL or EBADF). * If replace_bpf_fd has valid descriptor of BPF program but such a program is not attached to specified cgroup, BPF_PROG_ATTACH will return ENOENT. BPF_F_REPLACE is introduced to make the user intent clear, since replace_bpf_fd alone can't be used for this (its default value, 0, is a valid fd). BPF_F_REPLACE also makes it possible to extend the API in the future (e.g. add BPF_F_BEFORE and BPF_F_AFTER if needed). Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Acked-by: Andrii Narkyiko Link: https://lore.kernel.org/bpf/30cd850044a0057bdfcaaf154b7d2f39850ba813.1576741281.git.rdna@fb.com --- include/linux/bpf-cgroup.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 169fd25f6bc2..18f6a6da7c3c 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -85,6 +85,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp); void cgroup_bpf_offline(struct cgroup *cgrp); int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + struct bpf_prog *replace_prog, enum bpf_attach_type type, u32 flags); int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type); @@ -93,7 +94,8 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 flags); + struct bpf_prog *replace_prog, enum bpf_attach_type type, + u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, -- cgit v1.2.3 From 632b0b5301f67ce54b840d55950707003a489151 Mon Sep 17 00:00:00 2001 From: Rijo Thomas Date: Wed, 4 Dec 2019 11:49:03 +0530 Subject: crypto: ccp - provide in-kernel API to submit TEE commands Extend the functionality of AMD Secure Processor (SP) driver by providing an in-kernel API to submit commands to TEE ring buffer for processing by Trusted OS running on AMD Secure Processor. Following TEE commands are supported by Trusted OS: * TEE_CMD_ID_LOAD_TA : Load Trusted Application (TA) binary into TEE environment * TEE_CMD_ID_UNLOAD_TA : Unload TA binary from TEE environment * TEE_CMD_ID_OPEN_SESSION : Open session with loaded TA * TEE_CMD_ID_CLOSE_SESSION : Close session with loaded TA * TEE_CMD_ID_INVOKE_CMD : Invoke a command with loaded TA * TEE_CMD_ID_MAP_SHARED_MEM : Map shared memory * TEE_CMD_ID_UNMAP_SHARED_MEM : Unmap shared memory Linux AMD-TEE driver will use this API to submit command buffers for processing in Trusted Execution Environment. The AMD-TEE driver shall be introduced in a separate patch. Cc: Jens Wiklander Cc: Tom Lendacky Cc: Ard Biesheuvel Co-developed-by: Devaraj Rangasamy Signed-off-by: Devaraj Rangasamy Signed-off-by: Rijo Thomas Acked-by: Gary R Hook Signed-off-by: Herbert Xu --- include/linux/psp-tee.h | 73 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 include/linux/psp-tee.h (limited to 'include/linux') diff --git a/include/linux/psp-tee.h b/include/linux/psp-tee.h new file mode 100644 index 000000000000..63bb2212fce0 --- /dev/null +++ b/include/linux/psp-tee.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: MIT */ +/* + * AMD Trusted Execution Environment (TEE) interface + * + * Author: Rijo Thomas + * + * Copyright 2019 Advanced Micro Devices, Inc. + * + */ + +#ifndef __PSP_TEE_H_ +#define __PSP_TEE_H_ + +#include +#include + +/* This file defines the Trusted Execution Environment (TEE) interface commands + * and the API exported by AMD Secure Processor driver to communicate with + * AMD-TEE Trusted OS. + */ + +/** + * enum tee_cmd_id - TEE Interface Command IDs + * @TEE_CMD_ID_LOAD_TA: Load Trusted Application (TA) binary into + * TEE environment + * @TEE_CMD_ID_UNLOAD_TA: Unload TA binary from TEE environment + * @TEE_CMD_ID_OPEN_SESSION: Open session with loaded TA + * @TEE_CMD_ID_CLOSE_SESSION: Close session with loaded TA + * @TEE_CMD_ID_INVOKE_CMD: Invoke a command with loaded TA + * @TEE_CMD_ID_MAP_SHARED_MEM: Map shared memory + * @TEE_CMD_ID_UNMAP_SHARED_MEM: Unmap shared memory + */ +enum tee_cmd_id { + TEE_CMD_ID_LOAD_TA = 1, + TEE_CMD_ID_UNLOAD_TA, + TEE_CMD_ID_OPEN_SESSION, + TEE_CMD_ID_CLOSE_SESSION, + TEE_CMD_ID_INVOKE_CMD, + TEE_CMD_ID_MAP_SHARED_MEM, + TEE_CMD_ID_UNMAP_SHARED_MEM, +}; + +#ifdef CONFIG_CRYPTO_DEV_SP_PSP +/** + * psp_tee_process_cmd() - Process command in Trusted Execution Environment + * @cmd_id: TEE command ID (&enum tee_cmd_id) + * @buf: Command buffer for TEE processing. On success, is updated + * with the response + * @len: Length of command buffer in bytes + * @status: On success, holds the TEE command execution status + * + * This function submits a command to the Trusted OS for processing in the + * TEE environment and waits for a response or until the command times out. + * + * Returns: + * 0 if TEE successfully processed the command + * -%ENODEV if PSP device not available + * -%EINVAL if invalid input + * -%ETIMEDOUT if TEE command timed out + * -%EBUSY if PSP device is not responsive + */ +int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, size_t len, + u32 *status); + +#else /* !CONFIG_CRYPTO_DEV_SP_PSP */ + +static inline int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, + size_t len, u32 *status) +{ + return -ENODEV; +} +#endif /* CONFIG_CRYPTO_DEV_SP_PSP */ +#endif /* __PSP_TEE_H_ */ -- cgit v1.2.3 From c6d633a927499f35a06455a960ad6b5a59c87c2c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 15 Dec 2019 15:51:19 -0800 Subject: crypto: algapi - make unregistration functions return void Some of the algorithm unregistration functions return -ENOENT when asked to unregister a non-registered algorithm, while others always return 0 or always return void. But no users check the return value, except for two of the bulk unregistration functions which print a message on error but still always return 0 to their caller, and crypto_del_alg() which calls crypto_unregister_instance() which always returns 0. Since unregistering a non-registered algorithm is always a kernel bug but there isn't anything callers should do to handle this situation at runtime, let's simplify things by making all the unregistration functions return void, and moving the error message into crypto_unregister_alg() and upgrading it to a WARN(). Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/crypto.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index c23f1eed7970..a905e524e332 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -584,9 +584,9 @@ static inline void crypto_init_wait(struct crypto_wait *wait) * Algorithm registration interface. */ int crypto_register_alg(struct crypto_alg *alg); -int crypto_unregister_alg(struct crypto_alg *alg); +void crypto_unregister_alg(struct crypto_alg *alg); int crypto_register_algs(struct crypto_alg *algs, int count); -int crypto_unregister_algs(struct crypto_alg *algs, int count); +void crypto_unregister_algs(struct crypto_alg *algs, int count); /* * Algorithm query interface. -- cgit v1.2.3 From 8d62af1778125bd674cc66e8432305cc6aac5d89 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 24 Dec 2019 16:10:01 +0100 Subject: efi/gop: Remove bogus packed attribute from GOP structures EFI structures are not packed, they follow natural alignment. The packed attribute doesn't have any effect on the structure layout due to the types and order of the members, and we only ever get these structures as output from the EFI firmware so alignment issues have not come up. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-2-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index aa54586db7a5..83a62f5c3fd7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1429,7 +1429,7 @@ struct efi_graphics_output_mode_info { int pixel_format; struct efi_pixel_bitmask pixel_information; u32 pixels_per_scan_line; -} __packed; +}; struct efi_graphics_output_protocol_mode_32 { u32 max_mode; @@ -1438,7 +1438,7 @@ struct efi_graphics_output_protocol_mode_32 { u32 size_of_info; u64 frame_buffer_base; u32 frame_buffer_size; -} __packed; +}; struct efi_graphics_output_protocol_mode_64 { u32 max_mode; @@ -1447,7 +1447,7 @@ struct efi_graphics_output_protocol_mode_64 { u64 size_of_info; u64 frame_buffer_base; u64 frame_buffer_size; -} __packed; +}; struct efi_graphics_output_protocol_mode { u32 max_mode; @@ -1456,7 +1456,7 @@ struct efi_graphics_output_protocol_mode { unsigned long size_of_info; u64 frame_buffer_base; unsigned long frame_buffer_size; -} __packed; +}; struct efi_graphics_output_protocol_32 { u32 query_mode; -- cgit v1.2.3 From 6c895c2fca8a8d4e740b5498b48f81111569502a Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 24 Dec 2019 16:10:02 +0100 Subject: efi/gop: Remove unused typedef We have stopped using gop->query_mode(), so remove the unused typedef for the function prototype. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-3-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 83a62f5c3fd7..9ea81cfe1576 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1479,10 +1479,6 @@ struct efi_graphics_output_protocol { struct efi_graphics_output_protocol_mode *mode; }; -typedef efi_status_t (*efi_graphics_output_protocol_query_mode)( - struct efi_graphics_output_protocol *, u32, unsigned long *, - struct efi_graphics_output_mode_info **); - extern struct list_head efivar_sysfs_list; static inline void -- cgit v1.2.3 From 44c84b4ada73b8ff156181fcf6e320459b8daefd Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 24 Dec 2019 16:10:03 +0100 Subject: efi/gop: Convert GOP structures to typedef and clean up some types Use typedef for the GOP structures, in anticipation of unifying 32/64-bit code. Also use more appropriate types in the non-bitness specific structures for the framebuffer address and pointers. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-4-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 9ea81cfe1576..561db9deedae 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1415,69 +1415,69 @@ struct efi_simple_text_output_protocol { #define PIXEL_BLT_ONLY 3 #define PIXEL_FORMAT_MAX 4 -struct efi_pixel_bitmask { +typedef struct { u32 red_mask; u32 green_mask; u32 blue_mask; u32 reserved_mask; -}; +} efi_pixel_bitmask_t; -struct efi_graphics_output_mode_info { +typedef struct { u32 version; u32 horizontal_resolution; u32 vertical_resolution; int pixel_format; - struct efi_pixel_bitmask pixel_information; + efi_pixel_bitmask_t pixel_information; u32 pixels_per_scan_line; -}; +} efi_graphics_output_mode_info_t; -struct efi_graphics_output_protocol_mode_32 { +typedef struct { u32 max_mode; u32 mode; u32 info; u32 size_of_info; u64 frame_buffer_base; u32 frame_buffer_size; -}; +} efi_graphics_output_protocol_mode_32_t; -struct efi_graphics_output_protocol_mode_64 { +typedef struct { u32 max_mode; u32 mode; u64 info; u64 size_of_info; u64 frame_buffer_base; u64 frame_buffer_size; -}; +} efi_graphics_output_protocol_mode_64_t; -struct efi_graphics_output_protocol_mode { +typedef struct { u32 max_mode; u32 mode; - unsigned long info; + efi_graphics_output_mode_info_t *info; unsigned long size_of_info; - u64 frame_buffer_base; + efi_physical_addr_t frame_buffer_base; unsigned long frame_buffer_size; -}; +} efi_graphics_output_protocol_mode_t; -struct efi_graphics_output_protocol_32 { +typedef struct { u32 query_mode; u32 set_mode; u32 blt; u32 mode; -}; +} efi_graphics_output_protocol_32_t; -struct efi_graphics_output_protocol_64 { +typedef struct { u64 query_mode; u64 set_mode; u64 blt; u64 mode; -}; +} efi_graphics_output_protocol_64_t; -struct efi_graphics_output_protocol { - unsigned long query_mode; - unsigned long set_mode; - unsigned long blt; - struct efi_graphics_output_protocol_mode *mode; -}; +typedef struct { + void *query_mode; + void *set_mode; + void *blt; + efi_graphics_output_protocol_mode_t *mode; +} efi_graphics_output_protocol_t; extern struct list_head efivar_sysfs_list; -- cgit v1.2.3 From 2732ea0d5c0a67ec86bfbde2bd68b6152e23ec4e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:07 +0100 Subject: efi/libstub: Use a helper to iterate over a EFI handle array Iterating over a EFI handle array is a bit finicky, since we have to take mixed mode into account, where handles are only 32-bit while the native efi_handle_t type is 64-bit. So introduce a helper, and replace the various occurrences of this pattern. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-8-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 561db9deedae..8d267715ce22 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -48,6 +48,19 @@ typedef u16 efi_char16_t; /* UNICODE character */ typedef u64 efi_physical_addr_t; typedef void *efi_handle_t; +#define efi_get_handle_at(array, idx) \ + (efi_is_64bit() ? (efi_handle_t)(unsigned long)((u64 *)(array))[idx] \ + : (efi_handle_t)(unsigned long)((u32 *)(array))[idx]) + +#define efi_get_handle_num(size) \ + ((size) / (efi_is_64bit() ? sizeof(u64) : sizeof(u32))) + +#define for_each_efi_handle(handle, array, size, i) \ + for (i = 0; \ + i < efi_get_handle_num(size) && \ + ((handle = efi_get_handle_at((array), i)) || true); \ + i++) + /* * The UEFI spec and EDK2 reference implementation both define EFI_GUID as * struct { u32 a; u16; b; u16 c; u8 d[8]; }; and so the implied alignment -- cgit v1.2.3 From 1786e83011644e18732ed006413339d5323766e9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:08 +0100 Subject: efi/libstub: Extend native protocol definitions with mixed_mode aliases In preparation of moving to a native vs. mixed mode split rather than a 32 vs. 64 bit split when it comes to invoking EFI firmware services, update all the native protocol definitions and redefine them as unions containing an anonymous struct for the native view and a struct called 'mixed_mode' describing the 32-bit view of the protocol when called from 64-bit code. While at it, flesh out some PCI I/O member definitions that we will be needing shortly. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-9-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 496 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 330 insertions(+), 166 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 8d267715ce22..5a220af263b1 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -315,55 +315,58 @@ typedef struct { /* * EFI Boot Services table */ -typedef struct { - efi_table_hdr_t hdr; - void *raise_tpl; - void *restore_tpl; - efi_status_t (*allocate_pages)(int, int, unsigned long, - efi_physical_addr_t *); - efi_status_t (*free_pages)(efi_physical_addr_t, unsigned long); - efi_status_t (*get_memory_map)(unsigned long *, void *, unsigned long *, - unsigned long *, u32 *); - efi_status_t (*allocate_pool)(int, unsigned long, void **); - efi_status_t (*free_pool)(void *); - void *create_event; - void *set_timer; - void *wait_for_event; - void *signal_event; - void *close_event; - void *check_event; - void *install_protocol_interface; - void *reinstall_protocol_interface; - void *uninstall_protocol_interface; - efi_status_t (*handle_protocol)(efi_handle_t, efi_guid_t *, void **); - void *__reserved; - void *register_protocol_notify; - efi_status_t (*locate_handle)(int, efi_guid_t *, void *, - unsigned long *, efi_handle_t *); - void *locate_device_path; - efi_status_t (*install_configuration_table)(efi_guid_t *, void *); - void *load_image; - void *start_image; - void *exit; - void *unload_image; - efi_status_t (*exit_boot_services)(efi_handle_t, unsigned long); - void *get_next_monotonic_count; - void *stall; - void *set_watchdog_timer; - void *connect_controller; - void *disconnect_controller; - void *open_protocol; - void *close_protocol; - void *open_protocol_information; - void *protocols_per_handle; - void *locate_handle_buffer; - efi_status_t (*locate_protocol)(efi_guid_t *, void *, void **); - void *install_multiple_protocol_interfaces; - void *uninstall_multiple_protocol_interfaces; - void *calculate_crc32; - void *copy_mem; - void *set_mem; - void *create_event_ex; +typedef union { + struct { + efi_table_hdr_t hdr; + void *raise_tpl; + void *restore_tpl; + efi_status_t (*allocate_pages)(int, int, unsigned long, + efi_physical_addr_t *); + efi_status_t (*free_pages)(efi_physical_addr_t, unsigned long); + efi_status_t (*get_memory_map)(unsigned long *, void *, unsigned long *, + unsigned long *, u32 *); + efi_status_t (*allocate_pool)(int, unsigned long, void **); + efi_status_t (*free_pool)(void *); + void *create_event; + void *set_timer; + void *wait_for_event; + void *signal_event; + void *close_event; + void *check_event; + void *install_protocol_interface; + void *reinstall_protocol_interface; + void *uninstall_protocol_interface; + efi_status_t (*handle_protocol)(efi_handle_t, efi_guid_t *, void **); + void *__reserved; + void *register_protocol_notify; + efi_status_t (*locate_handle)(int, efi_guid_t *, void *, + unsigned long *, efi_handle_t *); + void *locate_device_path; + efi_status_t (*install_configuration_table)(efi_guid_t *, void *); + void *load_image; + void *start_image; + void *exit; + void *unload_image; + efi_status_t (*exit_boot_services)(efi_handle_t, unsigned long); + void *get_next_monotonic_count; + void *stall; + void *set_watchdog_timer; + void *connect_controller; + void *disconnect_controller; + void *open_protocol; + void *close_protocol; + void *open_protocol_information; + void *protocols_per_handle; + void *locate_handle_buffer; + efi_status_t (*locate_protocol)(efi_guid_t *, void *, void **); + void *install_multiple_protocol_interfaces; + void *uninstall_multiple_protocol_interfaces; + void *calculate_crc32; + void *copy_mem; + void *set_mem; + void *create_event_ex; + }; + efi_boot_services_32_t mixed_mode; } efi_boot_services_t; typedef enum { @@ -401,11 +404,24 @@ typedef struct { u64 write; } efi_pci_io_protocol_access_64_t; +typedef union efi_pci_io_protocol efi_pci_io_protocol_t; + +typedef +efi_status_t (*efi_pci_io_protocol_cfg_t)(efi_pci_io_protocol_t *, + EFI_PCI_IO_PROTOCOL_WIDTH, + u32 offset, unsigned long count, + void *buffer); + typedef struct { void *read; void *write; } efi_pci_io_protocol_access_t; +typedef struct { + efi_pci_io_protocol_cfg_t read; + efi_pci_io_protocol_cfg_t write; +} efi_pci_io_protocol_config_access_t; + typedef struct { u32 poll_mem; u32 poll_io; @@ -446,25 +462,46 @@ typedef struct { u64 romimage; } efi_pci_io_protocol_64_t; -typedef struct { - void *poll_mem; - void *poll_io; - efi_pci_io_protocol_access_t mem; - efi_pci_io_protocol_access_t io; - efi_pci_io_protocol_access_t pci; - void *copy_mem; - void *map; - void *unmap; - void *allocate_buffer; - void *free_buffer; - void *flush; - void *get_location; - void *attributes; - void *get_bar_attributes; - void *set_bar_attributes; - uint64_t romsize; - void *romimage; -} efi_pci_io_protocol_t; +union efi_pci_io_protocol { + struct { + void *poll_mem; + void *poll_io; + efi_pci_io_protocol_access_t mem; + efi_pci_io_protocol_access_t io; + efi_pci_io_protocol_config_access_t pci; + void *copy_mem; + void *map; + void *unmap; + void *allocate_buffer; + void *free_buffer; + void *flush; + void *get_location; + void *attributes; + void *get_bar_attributes; + void *set_bar_attributes; + uint64_t romsize; + void *romimage; + }; + struct { + u32 poll_mem; + u32 poll_io; + efi_pci_io_protocol_access_32_t mem; + efi_pci_io_protocol_access_32_t io; + efi_pci_io_protocol_access_32_t pci; + u32 copy_mem; + u32 map; + u32 unmap; + u32 allocate_buffer; + u32 free_buffer; + u32 flush; + u32 get_location; + u32 attributes; + u32 get_bar_attributes; + u32 set_bar_attributes; + u64 romsize; + u32 romimage; + } mixed_mode; +}; #define EFI_PCI_IO_ATTRIBUTE_ISA_MOTHERBOARD_IO 0x0001 #define EFI_PCI_IO_ATTRIBUTE_ISA_IO 0x0002 @@ -502,6 +539,33 @@ typedef struct { u64 get_all; } apple_properties_protocol_64_t; +struct efi_dev_path; + +typedef union apple_properties_protocol apple_properties_protocol_t; + +union apple_properties_protocol { + struct { + unsigned long version; + efi_status_t (*get)(apple_properties_protocol_t *, + struct efi_dev_path *, efi_char16_t *, + void *, u32 *); + efi_status_t (*set)(apple_properties_protocol_t *, + struct efi_dev_path *, efi_char16_t *, + void *, u32); + efi_status_t (*del)(apple_properties_protocol_t *, + struct efi_dev_path *, efi_char16_t *); + efi_status_t (*get_all)(apple_properties_protocol_t *, + void *buffer, u32 *); + }; + struct { + u32 version; + u32 get; + u32 set; + u32 del; + u32 get_all; + } mixed_mode; +}; + typedef struct { u32 get_capability; u32 get_event_log; @@ -524,16 +588,32 @@ typedef struct { typedef u32 efi_tcg2_event_log_format; -typedef struct { - void *get_capability; - efi_status_t (*get_event_log)(efi_handle_t, efi_tcg2_event_log_format, - efi_physical_addr_t *, efi_physical_addr_t *, efi_bool_t *); - void *hash_log_extend_event; - void *submit_command; - void *get_active_pcr_banks; - void *set_active_pcr_banks; - void *get_result_of_set_active_pcr_banks; -} efi_tcg2_protocol_t; +typedef union efi_tcg2_protocol efi_tcg2_protocol_t; + +union efi_tcg2_protocol { + struct { + void *get_capability; + efi_status_t (*get_event_log)(efi_handle_t, + efi_tcg2_event_log_format, + efi_physical_addr_t *, + efi_physical_addr_t *, + efi_bool_t *); + void *hash_log_extend_event; + void *submit_command; + void *get_active_pcr_banks; + void *set_active_pcr_banks; + void *get_result_of_set_active_pcr_banks; + }; + struct { + u32 get_capability; + u32 get_event_log; + u32 hash_log_extend_event; + u32 submit_command; + u32 get_active_pcr_banks; + u32 set_active_pcr_banks; + u32 get_result_of_set_active_pcr_banks; + } mixed_mode; +}; /* * Types and defines for EFI ResetSystem @@ -618,22 +698,25 @@ typedef efi_status_t efi_query_variable_store_t(u32 attributes, unsigned long size, bool nonblocking); -typedef struct { - efi_table_hdr_t hdr; - efi_get_time_t *get_time; - efi_set_time_t *set_time; - efi_get_wakeup_time_t *get_wakeup_time; - efi_set_wakeup_time_t *set_wakeup_time; - efi_set_virtual_address_map_t *set_virtual_address_map; - void *convert_pointer; - efi_get_variable_t *get_variable; - efi_get_next_variable_t *get_next_variable; - efi_set_variable_t *set_variable; - efi_get_next_high_mono_count_t *get_next_high_mono_count; - efi_reset_system_t *reset_system; - efi_update_capsule_t *update_capsule; - efi_query_capsule_caps_t *query_capsule_caps; - efi_query_variable_info_t *query_variable_info; +typedef union { + struct { + efi_table_hdr_t hdr; + efi_get_time_t *get_time; + efi_set_time_t *set_time; + efi_get_wakeup_time_t *get_wakeup_time; + efi_set_wakeup_time_t *set_wakeup_time; + efi_set_virtual_address_map_t *set_virtual_address_map; + void *convert_pointer; + efi_get_variable_t *get_variable; + efi_get_next_variable_t *get_next_variable; + efi_set_variable_t *set_variable; + efi_get_next_high_mono_count_t *get_next_high_mono_count; + efi_reset_system_t *reset_system; + efi_update_capsule_t *update_capsule; + efi_query_capsule_caps_t *query_capsule_caps; + efi_query_variable_info_t *query_variable_info; + }; + efi_runtime_services_32_t mixed_mode; } efi_runtime_services_t; void efi_native_runtime_setup(void); @@ -719,9 +802,12 @@ typedef struct { u32 table; } efi_config_table_32_t; -typedef struct { - efi_guid_t guid; - unsigned long table; +typedef union { + struct { + efi_guid_t guid; + unsigned long table; + }; + efi_config_table_32_t mixed_mode; } efi_config_table_t; typedef struct { @@ -773,20 +859,23 @@ typedef struct { u32 tables; } efi_system_table_32_t; -typedef struct { - efi_table_hdr_t hdr; - unsigned long fw_vendor; /* physical addr of CHAR16 vendor string */ - u32 fw_revision; - unsigned long con_in_handle; - unsigned long con_in; - unsigned long con_out_handle; - unsigned long con_out; - unsigned long stderr_handle; - unsigned long stderr; - efi_runtime_services_t *runtime; - efi_boot_services_t *boottime; - unsigned long nr_tables; - unsigned long tables; +typedef union { + struct { + efi_table_hdr_t hdr; + unsigned long fw_vendor; /* physical addr of CHAR16 vendor string */ + u32 fw_revision; + unsigned long con_in_handle; + unsigned long con_in; + unsigned long con_out_handle; + unsigned long con_out; + unsigned long stderr_handle; + unsigned long stderr; + efi_runtime_services_t *runtime; + efi_boot_services_t *boottime; + unsigned long nr_tables; + unsigned long tables; + }; + efi_system_table_32_t mixed_mode; } efi_system_table_t; /* @@ -856,22 +945,40 @@ typedef struct { u64 unload; } efi_loaded_image_64_t; -typedef struct { - u32 revision; - efi_handle_t parent_handle; - efi_system_table_t *system_table; - efi_handle_t device_handle; - void *file_path; - void *reserved; - u32 load_options_size; - void *load_options; - void *image_base; - __aligned_u64 image_size; - unsigned int image_code_type; - unsigned int image_data_type; - efi_status_t (*unload)(efi_handle_t image_handle); -} efi_loaded_image_t; +typedef union efi_loaded_image efi_loaded_image_t; +union efi_loaded_image { + struct { + u32 revision; + efi_handle_t parent_handle; + efi_system_table_t *system_table; + efi_handle_t device_handle; + void *file_path; + void *reserved; + u32 load_options_size; + void *load_options; + void *image_base; + __aligned_u64 image_size; + unsigned int image_code_type; + unsigned int image_data_type; + efi_status_t (*unload)(efi_handle_t image_handle); + }; + struct { + u32 revision; + u32 parent_handle; + u32 system_table; + u32 device_handle; + u32 file_path; + u32 reserved; + u32 load_options_size; + u32 load_options; + u32 image_base; + __aligned_u64 image_size; + unsigned int image_code_type; + unsigned int image_data_type; + u32 unload; + } mixed_mode; +}; typedef struct { u64 size; @@ -912,23 +1019,40 @@ typedef struct { u64 flush; } efi_file_handle_64_t; -typedef struct _efi_file_handle { - u64 revision; - efi_status_t (*open)(struct _efi_file_handle *, - struct _efi_file_handle **, - efi_char16_t *, u64, u64); - efi_status_t (*close)(struct _efi_file_handle *); - void *delete; - efi_status_t (*read)(struct _efi_file_handle *, unsigned long *, - void *); - void *write; - void *get_position; - void *set_position; - efi_status_t (*get_info)(struct _efi_file_handle *, efi_guid_t *, - unsigned long *, void *); - void *set_info; - void *flush; -} efi_file_handle_t; +typedef union efi_file_handle efi_file_handle_t; + +union efi_file_handle { + struct { + u64 revision; + efi_status_t (*open)(efi_file_handle_t *, + efi_file_handle_t **, + efi_char16_t *, u64, u64); + efi_status_t (*close)(efi_file_handle_t *); + void *delete; + efi_status_t (*read)(efi_file_handle_t *, unsigned long *, + void *); + void *write; + void *get_position; + void *set_position; + efi_status_t (*get_info)(efi_file_handle_t *, efi_guid_t *, + unsigned long *, void *); + void *set_info; + void *flush; + }; + struct { + u64 revision; + u32 open; + u32 close; + u32 delete; + u32 read; + u32 write; + u32 get_position; + u32 set_position; + u32 get_info; + u32 set_info; + u32 flush; + } mixed_mode; +}; typedef struct { u64 revision; @@ -940,11 +1064,19 @@ typedef struct { u64 open_volume; } efi_file_io_interface_64_t; -typedef struct _efi_file_io_interface { - u64 revision; - int (*open_volume)(struct _efi_file_io_interface *, - efi_file_handle_t **); -} efi_file_io_interface_t; +typedef union efi_file_io_interface efi_file_io_interface_t; + +union efi_file_io_interface { + struct { + u64 revision; + int (*open_volume)(efi_file_io_interface_t *, + efi_file_handle_t **); + }; + struct { + u64 revision; + u32 open_volume; + } mixed_mode; +} ; #define EFI_FILE_MODE_READ 0x0000000000000001 #define EFI_FILE_MODE_WRITE 0x0000000000000002 @@ -1416,10 +1548,20 @@ typedef struct { u64 test_string; } efi_simple_text_output_protocol_64_t; -struct efi_simple_text_output_protocol { - void *reset; - efi_status_t (*output_string)(void *, void *); - void *test_string; +typedef union efi_simple_text_output_protocol efi_simple_text_output_protocol_t; + +union efi_simple_text_output_protocol { + struct { + void *reset; + efi_status_t (*output_string)(efi_simple_text_output_protocol_t *, + efi_char16_t *); + void *test_string; + }; + struct { + u32 reset; + u32 output_string; + u32 test_string; + } mixed_mode; }; #define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 @@ -1462,14 +1604,26 @@ typedef struct { u64 frame_buffer_size; } efi_graphics_output_protocol_mode_64_t; -typedef struct { - u32 max_mode; - u32 mode; - efi_graphics_output_mode_info_t *info; - unsigned long size_of_info; - efi_physical_addr_t frame_buffer_base; - unsigned long frame_buffer_size; -} efi_graphics_output_protocol_mode_t; +typedef union efi_graphics_output_protocol_mode efi_graphics_output_protocol_mode_t; + +union efi_graphics_output_protocol_mode { + struct { + u32 max_mode; + u32 mode; + efi_graphics_output_mode_info_t *info; + unsigned long size_of_info; + efi_physical_addr_t frame_buffer_base; + unsigned long frame_buffer_size; + }; + struct { + u32 max_mode; + u32 mode; + u32 info; + u32 size_of_info; + u64 frame_buffer_base; + u32 frame_buffer_size; + } mixed_mode; +}; typedef struct { u32 query_mode; @@ -1485,12 +1639,22 @@ typedef struct { u64 mode; } efi_graphics_output_protocol_64_t; -typedef struct { - void *query_mode; - void *set_mode; - void *blt; - efi_graphics_output_protocol_mode_t *mode; -} efi_graphics_output_protocol_t; +typedef union efi_graphics_output_protocol efi_graphics_output_protocol_t; + +union efi_graphics_output_protocol { + struct { + void *query_mode; + void *set_mode; + void *blt; + efi_graphics_output_protocol_mode_t *mode; + }; + struct { + u32 query_mode; + u32 set_mode; + u32 blt; + u32 mode; + } mixed_mode; +}; extern struct list_head efivar_sysfs_list; -- cgit v1.2.3 From f958efe97596837f9504fc38d75ef8e284bc0ebd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:09 +0100 Subject: efi/libstub: Distinguish between native/mixed not 32/64 bit Currently, we support mixed mode by casting all boot time firmware calls to 64-bit explicitly on native 64-bit systems, and to 32-bit on 32-bit systems or 64-bit systems running with 32-bit firmware. Due to this explicit awareness of the bitness in the code, we do a lot of casting even on generic code that is shared with other architectures, where mixed mode does not even exist. This casting leads to loss of coverage of type checking by the compiler, which we should try to avoid. So instead of distinguishing between 32-bit vs 64-bit, distinguish between native vs mixed, and limit all the nasty casting and pointer mangling to the code that actually deals with mixed mode. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-10-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 5a220af263b1..e9d74e9667c0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -49,11 +49,11 @@ typedef u64 efi_physical_addr_t; typedef void *efi_handle_t; #define efi_get_handle_at(array, idx) \ - (efi_is_64bit() ? (efi_handle_t)(unsigned long)((u64 *)(array))[idx] \ + (efi_is_native() ? (array)[idx] \ : (efi_handle_t)(unsigned long)((u32 *)(array))[idx]) #define efi_get_handle_num(size) \ - ((size) / (efi_is_64bit() ? sizeof(u64) : sizeof(u32))) + ((size) / (efi_is_native() ? sizeof(efi_handle_t) : sizeof(u32))) #define for_each_efi_handle(handle, array, size, i) \ for (i = 0; \ @@ -805,7 +805,7 @@ typedef struct { typedef union { struct { efi_guid_t guid; - unsigned long table; + void *table; }; efi_config_table_32_t mixed_mode; } efi_config_table_t; -- cgit v1.2.3 From e8bd5ddf60eedd6d584fa1e98d0cfe45abe95043 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:10 +0100 Subject: efi/libstub: Drop explicit 32/64-bit protocol definitions Now that we have incorporated the mixed mode protocol definitions into the native ones using unions, we no longer need the separate 32/64 bit struct definitions, with the exception of the EFI system table definition and the boot services, runtime services and configuration table definitions. So drop the unused ones. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-11-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 245 +--------------------------------------------------- 1 file changed, 1 insertion(+), 244 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index e9d74e9667c0..d8e987910853 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -264,54 +264,6 @@ typedef struct { u32 create_event_ex; } __packed efi_boot_services_32_t; -typedef struct { - efi_table_hdr_t hdr; - u64 raise_tpl; - u64 restore_tpl; - u64 allocate_pages; - u64 free_pages; - u64 get_memory_map; - u64 allocate_pool; - u64 free_pool; - u64 create_event; - u64 set_timer; - u64 wait_for_event; - u64 signal_event; - u64 close_event; - u64 check_event; - u64 install_protocol_interface; - u64 reinstall_protocol_interface; - u64 uninstall_protocol_interface; - u64 handle_protocol; - u64 __reserved; - u64 register_protocol_notify; - u64 locate_handle; - u64 locate_device_path; - u64 install_configuration_table; - u64 load_image; - u64 start_image; - u64 exit; - u64 unload_image; - u64 exit_boot_services; - u64 get_next_monotonic_count; - u64 stall; - u64 set_watchdog_timer; - u64 connect_controller; - u64 disconnect_controller; - u64 open_protocol; - u64 close_protocol; - u64 open_protocol_information; - u64 protocols_per_handle; - u64 locate_handle_buffer; - u64 locate_protocol; - u64 install_multiple_protocol_interfaces; - u64 uninstall_multiple_protocol_interfaces; - u64 calculate_crc32; - u64 copy_mem; - u64 set_mem; - u64 create_event_ex; -} __packed efi_boot_services_64_t; - /* * EFI Boot Services table */ @@ -399,11 +351,6 @@ typedef struct { u32 write; } efi_pci_io_protocol_access_32_t; -typedef struct { - u64 read; - u64 write; -} efi_pci_io_protocol_access_64_t; - typedef union efi_pci_io_protocol efi_pci_io_protocol_t; typedef @@ -422,46 +369,6 @@ typedef struct { efi_pci_io_protocol_cfg_t write; } efi_pci_io_protocol_config_access_t; -typedef struct { - u32 poll_mem; - u32 poll_io; - efi_pci_io_protocol_access_32_t mem; - efi_pci_io_protocol_access_32_t io; - efi_pci_io_protocol_access_32_t pci; - u32 copy_mem; - u32 map; - u32 unmap; - u32 allocate_buffer; - u32 free_buffer; - u32 flush; - u32 get_location; - u32 attributes; - u32 get_bar_attributes; - u32 set_bar_attributes; - u64 romsize; - u32 romimage; -} efi_pci_io_protocol_32_t; - -typedef struct { - u64 poll_mem; - u64 poll_io; - efi_pci_io_protocol_access_64_t mem; - efi_pci_io_protocol_access_64_t io; - efi_pci_io_protocol_access_64_t pci; - u64 copy_mem; - u64 map; - u64 unmap; - u64 allocate_buffer; - u64 free_buffer; - u64 flush; - u64 get_location; - u64 attributes; - u64 get_bar_attributes; - u64 set_bar_attributes; - u64 romsize; - u64 romimage; -} efi_pci_io_protocol_64_t; - union efi_pci_io_protocol { struct { void *poll_mem; @@ -523,22 +430,6 @@ union efi_pci_io_protocol { #define EFI_PCI_IO_ATTRIBUTE_VGA_PALETTE_IO_16 0x20000 #define EFI_PCI_IO_ATTRIBUTE_VGA_IO_16 0x40000 -typedef struct { - u32 version; - u32 get; - u32 set; - u32 del; - u32 get_all; -} apple_properties_protocol_32_t; - -typedef struct { - u64 version; - u64 get; - u64 set; - u64 del; - u64 get_all; -} apple_properties_protocol_64_t; - struct efi_dev_path; typedef union apple_properties_protocol apple_properties_protocol_t; @@ -566,26 +457,6 @@ union apple_properties_protocol { } mixed_mode; }; -typedef struct { - u32 get_capability; - u32 get_event_log; - u32 hash_log_extend_event; - u32 submit_command; - u32 get_active_pcr_banks; - u32 set_active_pcr_banks; - u32 get_result_of_set_active_pcr_banks; -} efi_tcg2_protocol_32_t; - -typedef struct { - u64 get_capability; - u64 get_event_log; - u64 hash_log_extend_event; - u64 submit_command; - u64 get_active_pcr_banks; - u64 set_active_pcr_banks; - u64 get_result_of_set_active_pcr_banks; -} efi_tcg2_protocol_64_t; - typedef u32 efi_tcg2_event_log_format; typedef union efi_tcg2_protocol efi_tcg2_protocol_t; @@ -913,38 +784,6 @@ struct efi_fdt_params { u32 desc_ver; }; -typedef struct { - u32 revision; - u32 parent_handle; - u32 system_table; - u32 device_handle; - u32 file_path; - u32 reserved; - u32 load_options_size; - u32 load_options; - u32 image_base; - __aligned_u64 image_size; - unsigned int image_code_type; - unsigned int image_data_type; - u32 unload; -} efi_loaded_image_32_t; - -typedef struct { - u32 revision; - u64 parent_handle; - u64 system_table; - u64 device_handle; - u64 file_path; - u64 reserved; - u32 load_options_size; - u64 load_options; - u64 image_base; - __aligned_u64 image_size; - unsigned int image_code_type; - unsigned int image_data_type; - u64 unload; -} efi_loaded_image_64_t; - typedef union efi_loaded_image efi_loaded_image_t; union efi_loaded_image { @@ -991,34 +830,6 @@ typedef struct { efi_char16_t filename[1]; } efi_file_info_t; -typedef struct { - u64 revision; - u32 open; - u32 close; - u32 delete; - u32 read; - u32 write; - u32 get_position; - u32 set_position; - u32 get_info; - u32 set_info; - u32 flush; -} efi_file_handle_32_t; - -typedef struct { - u64 revision; - u64 open; - u64 close; - u64 delete; - u64 read; - u64 write; - u64 get_position; - u64 set_position; - u64 get_info; - u64 set_info; - u64 flush; -} efi_file_handle_64_t; - typedef union efi_file_handle efi_file_handle_t; union efi_file_handle { @@ -1054,16 +865,6 @@ union efi_file_handle { } mixed_mode; }; -typedef struct { - u64 revision; - u32 open_volume; -} efi_file_io_interface_32_t; - -typedef struct { - u64 revision; - u64 open_volume; -} efi_file_io_interface_64_t; - typedef union efi_file_io_interface efi_file_io_interface_t; union efi_file_io_interface { @@ -1076,7 +877,7 @@ union efi_file_io_interface { u64 revision; u32 open_volume; } mixed_mode; -} ; +}; #define EFI_FILE_MODE_READ 0x0000000000000001 #define EFI_FILE_MODE_WRITE 0x0000000000000002 @@ -1536,18 +1337,6 @@ struct efivar_entry { bool deleting; }; -typedef struct { - u32 reset; - u32 output_string; - u32 test_string; -} efi_simple_text_output_protocol_32_t; - -typedef struct { - u64 reset; - u64 output_string; - u64 test_string; -} efi_simple_text_output_protocol_64_t; - typedef union efi_simple_text_output_protocol efi_simple_text_output_protocol_t; union efi_simple_text_output_protocol { @@ -1586,24 +1375,6 @@ typedef struct { u32 pixels_per_scan_line; } efi_graphics_output_mode_info_t; -typedef struct { - u32 max_mode; - u32 mode; - u32 info; - u32 size_of_info; - u64 frame_buffer_base; - u32 frame_buffer_size; -} efi_graphics_output_protocol_mode_32_t; - -typedef struct { - u32 max_mode; - u32 mode; - u64 info; - u64 size_of_info; - u64 frame_buffer_base; - u64 frame_buffer_size; -} efi_graphics_output_protocol_mode_64_t; - typedef union efi_graphics_output_protocol_mode efi_graphics_output_protocol_mode_t; union efi_graphics_output_protocol_mode { @@ -1625,20 +1396,6 @@ union efi_graphics_output_protocol_mode { } mixed_mode; }; -typedef struct { - u32 query_mode; - u32 set_mode; - u32 blt; - u32 mode; -} efi_graphics_output_protocol_32_t; - -typedef struct { - u64 query_mode; - u64 set_mode; - u64 blt; - u64 mode; -} efi_graphics_output_protocol_64_t; - typedef union efi_graphics_output_protocol efi_graphics_output_protocol_t; union efi_graphics_output_protocol { -- cgit v1.2.3 From 960a8d01834eabc4549928c60f8ce0300ad08519 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:11 +0100 Subject: efi/libstub: Use stricter typing for firmware function pointers We will soon remove another level of pointer casting, so let's make sure all type handling involving firmware calls at boot time is correct. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-12-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index d8e987910853..880077639113 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -382,7 +382,11 @@ union efi_pci_io_protocol { void *allocate_buffer; void *free_buffer; void *flush; - void *get_location; + efi_status_t (*get_location)(efi_pci_io_protocol_t *, + unsigned long *segment_nr, + unsigned long *bus_nr, + unsigned long *device_nr, + unsigned long *function_nr); void *attributes; void *get_bar_attributes; void *set_bar_attributes; @@ -730,6 +734,8 @@ typedef struct { u32 tables; } efi_system_table_32_t; +typedef union efi_simple_text_output_protocol efi_simple_text_output_protocol_t; + typedef union { struct { efi_table_hdr_t hdr; @@ -738,7 +744,7 @@ typedef union { unsigned long con_in_handle; unsigned long con_in; unsigned long con_out_handle; - unsigned long con_out; + efi_simple_text_output_protocol_t *con_out; unsigned long stderr_handle; unsigned long stderr; efi_runtime_services_t *runtime; @@ -1337,8 +1343,6 @@ struct efivar_entry { bool deleting; }; -typedef union efi_simple_text_output_protocol efi_simple_text_output_protocol_t; - union efi_simple_text_output_protocol { struct { void *reset; -- cgit v1.2.3 From 8f24f8c2fc82f701866419dcb594e2cc1d3f46ba Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:12 +0100 Subject: efi/libstub: Annotate firmware routines as __efiapi Annotate all the firmware routines (boot services, runtime services and protocol methods) called in the boot context as __efiapi, and make it expand to __attribute__((ms_abi)) on 64-bit x86. This allows us to use the compiler to generate the calls into firmware that use the MS calling convention instead of the SysV one. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-13-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 147 +++++++++++++++++++++++++++++----------------------- 1 file changed, 82 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 880077639113..2074b737aa17 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -48,6 +48,12 @@ typedef u16 efi_char16_t; /* UNICODE character */ typedef u64 efi_physical_addr_t; typedef void *efi_handle_t; +#ifdef CONFIG_X86_64 +#define __efiapi __attribute__((ms_abi)) +#else +#define __efiapi +#endif + #define efi_get_handle_at(array, idx) \ (efi_is_native() ? (array)[idx] \ : (efi_handle_t)(unsigned long)((u32 *)(array))[idx]) @@ -272,13 +278,16 @@ typedef union { efi_table_hdr_t hdr; void *raise_tpl; void *restore_tpl; - efi_status_t (*allocate_pages)(int, int, unsigned long, - efi_physical_addr_t *); - efi_status_t (*free_pages)(efi_physical_addr_t, unsigned long); - efi_status_t (*get_memory_map)(unsigned long *, void *, unsigned long *, - unsigned long *, u32 *); - efi_status_t (*allocate_pool)(int, unsigned long, void **); - efi_status_t (*free_pool)(void *); + efi_status_t (__efiapi *allocate_pages)(int, int, unsigned long, + efi_physical_addr_t *); + efi_status_t (__efiapi *free_pages)(efi_physical_addr_t, + unsigned long); + efi_status_t (__efiapi *get_memory_map)(unsigned long *, void *, + unsigned long *, + unsigned long *, u32 *); + efi_status_t (__efiapi *allocate_pool)(int, unsigned long, + void **); + efi_status_t (__efiapi *free_pool)(void *); void *create_event; void *set_timer; void *wait_for_event; @@ -288,18 +297,22 @@ typedef union { void *install_protocol_interface; void *reinstall_protocol_interface; void *uninstall_protocol_interface; - efi_status_t (*handle_protocol)(efi_handle_t, efi_guid_t *, void **); + efi_status_t (__efiapi *handle_protocol)(efi_handle_t, + efi_guid_t *, void **); void *__reserved; void *register_protocol_notify; - efi_status_t (*locate_handle)(int, efi_guid_t *, void *, - unsigned long *, efi_handle_t *); + efi_status_t (__efiapi *locate_handle)(int, efi_guid_t *, + void *, unsigned long *, + efi_handle_t *); void *locate_device_path; - efi_status_t (*install_configuration_table)(efi_guid_t *, void *); + efi_status_t (__efiapi *install_configuration_table)(efi_guid_t *, + void *); void *load_image; void *start_image; void *exit; void *unload_image; - efi_status_t (*exit_boot_services)(efi_handle_t, unsigned long); + efi_status_t (__efiapi *exit_boot_services)(efi_handle_t, + unsigned long); void *get_next_monotonic_count; void *stall; void *set_watchdog_timer; @@ -310,7 +323,8 @@ typedef union { void *open_protocol_information; void *protocols_per_handle; void *locate_handle_buffer; - efi_status_t (*locate_protocol)(efi_guid_t *, void *, void **); + efi_status_t (__efiapi *locate_protocol)(efi_guid_t *, void *, + void **); void *install_multiple_protocol_interfaces; void *uninstall_multiple_protocol_interfaces; void *calculate_crc32; @@ -354,10 +368,11 @@ typedef struct { typedef union efi_pci_io_protocol efi_pci_io_protocol_t; typedef -efi_status_t (*efi_pci_io_protocol_cfg_t)(efi_pci_io_protocol_t *, - EFI_PCI_IO_PROTOCOL_WIDTH, - u32 offset, unsigned long count, - void *buffer); +efi_status_t (__efiapi *efi_pci_io_protocol_cfg_t)(efi_pci_io_protocol_t *, + EFI_PCI_IO_PROTOCOL_WIDTH, + u32 offset, + unsigned long count, + void *buffer); typedef struct { void *read; @@ -382,11 +397,11 @@ union efi_pci_io_protocol { void *allocate_buffer; void *free_buffer; void *flush; - efi_status_t (*get_location)(efi_pci_io_protocol_t *, - unsigned long *segment_nr, - unsigned long *bus_nr, - unsigned long *device_nr, - unsigned long *function_nr); + efi_status_t (__efiapi *get_location)(efi_pci_io_protocol_t *, + unsigned long *segment_nr, + unsigned long *bus_nr, + unsigned long *device_nr, + unsigned long *func_nr); void *attributes; void *get_bar_attributes; void *set_bar_attributes; @@ -441,16 +456,17 @@ typedef union apple_properties_protocol apple_properties_protocol_t; union apple_properties_protocol { struct { unsigned long version; - efi_status_t (*get)(apple_properties_protocol_t *, - struct efi_dev_path *, efi_char16_t *, - void *, u32 *); - efi_status_t (*set)(apple_properties_protocol_t *, - struct efi_dev_path *, efi_char16_t *, - void *, u32); - efi_status_t (*del)(apple_properties_protocol_t *, - struct efi_dev_path *, efi_char16_t *); - efi_status_t (*get_all)(apple_properties_protocol_t *, - void *buffer, u32 *); + efi_status_t (__efiapi *get)(apple_properties_protocol_t *, + struct efi_dev_path *, + efi_char16_t *, void *, u32 *); + efi_status_t (__efiapi *set)(apple_properties_protocol_t *, + struct efi_dev_path *, + efi_char16_t *, void *, u32); + efi_status_t (__efiapi *del)(apple_properties_protocol_t *, + struct efi_dev_path *, + efi_char16_t *); + efi_status_t (__efiapi *get_all)(apple_properties_protocol_t *, + void *buffer, u32 *); }; struct { u32 version; @@ -468,11 +484,11 @@ typedef union efi_tcg2_protocol efi_tcg2_protocol_t; union efi_tcg2_protocol { struct { void *get_capability; - efi_status_t (*get_event_log)(efi_handle_t, - efi_tcg2_event_log_format, - efi_physical_addr_t *, - efi_physical_addr_t *, - efi_bool_t *); + efi_status_t (__efiapi *get_event_log)(efi_handle_t, + efi_tcg2_event_log_format, + efi_physical_addr_t *, + efi_physical_addr_t *, + efi_bool_t *); void *hash_log_extend_event; void *submit_command; void *get_active_pcr_banks; @@ -575,21 +591,21 @@ typedef efi_status_t efi_query_variable_store_t(u32 attributes, typedef union { struct { - efi_table_hdr_t hdr; - efi_get_time_t *get_time; - efi_set_time_t *set_time; - efi_get_wakeup_time_t *get_wakeup_time; - efi_set_wakeup_time_t *set_wakeup_time; - efi_set_virtual_address_map_t *set_virtual_address_map; - void *convert_pointer; - efi_get_variable_t *get_variable; - efi_get_next_variable_t *get_next_variable; - efi_set_variable_t *set_variable; - efi_get_next_high_mono_count_t *get_next_high_mono_count; - efi_reset_system_t *reset_system; - efi_update_capsule_t *update_capsule; - efi_query_capsule_caps_t *query_capsule_caps; - efi_query_variable_info_t *query_variable_info; + efi_table_hdr_t hdr; + efi_get_time_t __efiapi *get_time; + efi_set_time_t __efiapi *set_time; + efi_get_wakeup_time_t __efiapi *get_wakeup_time; + efi_set_wakeup_time_t __efiapi *set_wakeup_time; + efi_set_virtual_address_map_t __efiapi *set_virtual_address_map; + void *convert_pointer; + efi_get_variable_t __efiapi *get_variable; + efi_get_next_variable_t __efiapi *get_next_variable; + efi_set_variable_t __efiapi *set_variable; + efi_get_next_high_mono_count_t __efiapi *get_next_high_mono_count; + efi_reset_system_t __efiapi *reset_system; + efi_update_capsule_t __efiapi *update_capsule; + efi_query_capsule_caps_t __efiapi *query_capsule_caps; + efi_query_variable_info_t __efiapi *query_variable_info; }; efi_runtime_services_32_t mixed_mode; } efi_runtime_services_t; @@ -806,7 +822,7 @@ union efi_loaded_image { __aligned_u64 image_size; unsigned int image_code_type; unsigned int image_data_type; - efi_status_t (*unload)(efi_handle_t image_handle); + efi_status_t ( __efiapi *unload)(efi_handle_t image_handle); }; struct { u32 revision; @@ -841,18 +857,19 @@ typedef union efi_file_handle efi_file_handle_t; union efi_file_handle { struct { u64 revision; - efi_status_t (*open)(efi_file_handle_t *, - efi_file_handle_t **, - efi_char16_t *, u64, u64); - efi_status_t (*close)(efi_file_handle_t *); + efi_status_t (__efiapi *open)(efi_file_handle_t *, + efi_file_handle_t **, + efi_char16_t *, u64, u64); + efi_status_t (__efiapi *close)(efi_file_handle_t *); void *delete; - efi_status_t (*read)(efi_file_handle_t *, unsigned long *, - void *); + efi_status_t (__efiapi *read)(efi_file_handle_t *, + unsigned long *, void *); void *write; void *get_position; void *set_position; - efi_status_t (*get_info)(efi_file_handle_t *, efi_guid_t *, - unsigned long *, void *); + efi_status_t (__efiapi *get_info)(efi_file_handle_t *, + efi_guid_t *, unsigned long *, + void *); void *set_info; void *flush; }; @@ -876,8 +893,8 @@ typedef union efi_file_io_interface efi_file_io_interface_t; union efi_file_io_interface { struct { u64 revision; - int (*open_volume)(efi_file_io_interface_t *, - efi_file_handle_t **); + int (__efiapi *open_volume)(efi_file_io_interface_t *, + efi_file_handle_t **); }; struct { u64 revision; @@ -1346,8 +1363,8 @@ struct efivar_entry { union efi_simple_text_output_protocol { struct { void *reset; - efi_status_t (*output_string)(efi_simple_text_output_protocol_t *, - efi_char16_t *); + efi_status_t (__efiapi *output_string)(efi_simple_text_output_protocol_t *, + efi_char16_t *); void *test_string; }; struct { -- cgit v1.2.3 From 14e900c7e4033d6ee3398b9f133e1716cc072401 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:14 +0100 Subject: efi/libstub: Avoid protocol wrapper for file I/O routines The EFI file I/O routines built on top of the file I/O firmware services are incompatible with mixed mode, so there is no need to obfuscate them by using protocol wrappers whose only purpose is to hide the mixed mode handling. So let's switch to plain indirect calls instead. This also means we can drop the mixed_mode aliases from the various types involved. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-15-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 118 +++++++++++++++++----------------------------------- 1 file changed, 39 insertions(+), 79 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 2074b737aa17..14dd08ecf8a7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -806,40 +806,21 @@ struct efi_fdt_params { u32 desc_ver; }; -typedef union efi_loaded_image efi_loaded_image_t; - -union efi_loaded_image { - struct { - u32 revision; - efi_handle_t parent_handle; - efi_system_table_t *system_table; - efi_handle_t device_handle; - void *file_path; - void *reserved; - u32 load_options_size; - void *load_options; - void *image_base; - __aligned_u64 image_size; - unsigned int image_code_type; - unsigned int image_data_type; - efi_status_t ( __efiapi *unload)(efi_handle_t image_handle); - }; - struct { - u32 revision; - u32 parent_handle; - u32 system_table; - u32 device_handle; - u32 file_path; - u32 reserved; - u32 load_options_size; - u32 load_options; - u32 image_base; - __aligned_u64 image_size; - unsigned int image_code_type; - unsigned int image_data_type; - u32 unload; - } mixed_mode; -}; +typedef struct { + u32 revision; + efi_handle_t parent_handle; + efi_system_table_t *system_table; + efi_handle_t device_handle; + void *file_path; + void *reserved; + u32 load_options_size; + void *load_options; + void *image_base; + __aligned_u64 image_size; + unsigned int image_code_type; + unsigned int image_data_type; + efi_status_t ( __efiapi *unload)(efi_handle_t image_handle); +} efi_loaded_image_t; typedef struct { u64 size; @@ -852,54 +833,33 @@ typedef struct { efi_char16_t filename[1]; } efi_file_info_t; -typedef union efi_file_handle efi_file_handle_t; - -union efi_file_handle { - struct { - u64 revision; - efi_status_t (__efiapi *open)(efi_file_handle_t *, - efi_file_handle_t **, - efi_char16_t *, u64, u64); - efi_status_t (__efiapi *close)(efi_file_handle_t *); - void *delete; - efi_status_t (__efiapi *read)(efi_file_handle_t *, - unsigned long *, void *); - void *write; - void *get_position; - void *set_position; - efi_status_t (__efiapi *get_info)(efi_file_handle_t *, - efi_guid_t *, unsigned long *, - void *); - void *set_info; - void *flush; - }; - struct { - u64 revision; - u32 open; - u32 close; - u32 delete; - u32 read; - u32 write; - u32 get_position; - u32 set_position; - u32 get_info; - u32 set_info; - u32 flush; - } mixed_mode; +typedef struct efi_file_handle efi_file_handle_t; + +struct efi_file_handle { + u64 revision; + efi_status_t (__efiapi *open)(efi_file_handle_t *, + efi_file_handle_t **, + efi_char16_t *, u64, u64); + efi_status_t (__efiapi *close)(efi_file_handle_t *); + void *delete; + efi_status_t (__efiapi *read)(efi_file_handle_t *, + unsigned long *, void *); + void *write; + void *get_position; + void *set_position; + efi_status_t (__efiapi *get_info)(efi_file_handle_t *, + efi_guid_t *, unsigned long *, + void *); + void *set_info; + void *flush; }; -typedef union efi_file_io_interface efi_file_io_interface_t; +typedef struct efi_file_io_interface efi_file_io_interface_t; -union efi_file_io_interface { - struct { - u64 revision; - int (__efiapi *open_volume)(efi_file_io_interface_t *, - efi_file_handle_t **); - }; - struct { - u64 revision; - u32 open_volume; - } mixed_mode; +struct efi_file_io_interface { + u64 revision; + int (__efiapi *open_volume)(efi_file_io_interface_t *, + efi_file_handle_t **); }; #define EFI_FILE_MODE_READ 0x0000000000000001 -- cgit v1.2.3 From 8173ec7905b5b07c989b06a105d171c169dde93b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:18 +0100 Subject: efi/libstub: Drop sys_table_arg from printk routines As a first step towards getting rid of the need to pass around a function parameter 'sys_table_arg' pointing to the EFI system table, remove the references to it in the printing code, which is represents the majority of the use cases. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-19-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 14dd08ecf8a7..5b207db6ead0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1493,7 +1493,7 @@ static inline int efi_runtime_map_copy(void *buf, size_t bufsz) /* prototypes shared between arch specific and generic stub code */ -void efi_printk(efi_system_table_t *sys_table_arg, char *str); +void efi_printk(char *str); void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long addr); -- cgit v1.2.3 From cd33a5c1d53e43bef1683c70dc3b68b6d9e8eca6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:19 +0100 Subject: efi/libstub: Remove 'sys_table_arg' from all function prototypes We have a helper efi_system_table() that gives us the address of the EFI system table in memory, so there is no longer point in passing it around from each function to the next. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-20-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 43 ++++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 5b207db6ead0..726673e98990 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1495,22 +1495,17 @@ static inline int efi_runtime_map_copy(void *buf, size_t bufsz) void efi_printk(char *str); -void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, - unsigned long addr); +void efi_free(unsigned long size, unsigned long addr); -char *efi_convert_cmdline(efi_system_table_t *sys_table_arg, - efi_loaded_image_t *image, int *cmd_line_len); +char *efi_convert_cmdline(efi_loaded_image_t *image, int *cmd_line_len); -efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg, - struct efi_boot_memmap *map); +efi_status_t efi_get_memory_map(struct efi_boot_memmap *map); -efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg, - unsigned long size, unsigned long align, +efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, unsigned long *addr, unsigned long min); static inline -efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, - unsigned long size, unsigned long align, +efi_status_t efi_low_alloc(unsigned long size, unsigned long align, unsigned long *addr) { /* @@ -1518,23 +1513,20 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, * checks pointers against NULL. Skip the first 8 * bytes so we start at a nice even number. */ - return efi_low_alloc_above(sys_table_arg, size, align, addr, 0x8); + return efi_low_alloc_above(size, align, addr, 0x8); } -efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg, - unsigned long size, unsigned long align, +efi_status_t efi_high_alloc(unsigned long size, unsigned long align, unsigned long *addr, unsigned long max); -efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, - unsigned long *image_addr, +efi_status_t efi_relocate_kernel(unsigned long *image_addr, unsigned long image_size, unsigned long alloc_size, unsigned long preferred_addr, unsigned long alignment, unsigned long min_addr); -efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, - efi_loaded_image_t *image, +efi_status_t handle_cmdline_files(efi_loaded_image_t *image, char *cmd_line, char *option_string, unsigned long max_addr, unsigned long *load_addr, @@ -1542,8 +1534,7 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, efi_status_t efi_parse_options(char const *cmdline); -efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, - struct screen_info *si, efi_guid_t *proto, +efi_status_t efi_setup_gop(struct screen_info *si, efi_guid_t *proto, unsigned long size); #ifdef CONFIG_EFI @@ -1561,18 +1552,18 @@ enum efi_secureboot_mode { efi_secureboot_mode_disabled, efi_secureboot_mode_enabled, }; -enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table); +enum efi_secureboot_mode efi_get_secureboot(void); #ifdef CONFIG_RESET_ATTACK_MITIGATION -void efi_enable_reset_attack_mitigation(efi_system_table_t *sys_table_arg); +void efi_enable_reset_attack_mitigation(void); #else static inline void -efi_enable_reset_attack_mitigation(efi_system_table_t *sys_table_arg) { } +efi_enable_reset_attack_mitigation(void) { } #endif -efi_status_t efi_random_get_seed(efi_system_table_t *sys_table_arg); +efi_status_t efi_random_get_seed(void); -void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table); +void efi_retrieve_tpm2_eventlog(void); /* * Arch code can implement the following three template macros, avoiding @@ -1624,12 +1615,10 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table); }) typedef efi_status_t (*efi_exit_boot_map_processing)( - efi_system_table_t *sys_table_arg, struct efi_boot_memmap *map, void *priv); -efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table, - void *handle, +efi_status_t efi_exit_boot_services(void *handle, struct efi_boot_memmap *map, void *priv, efi_exit_boot_map_processing priv_func); -- cgit v1.2.3 From 0e5dafc8a6e540c0145b61545c557c43be70af10 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 25 Dec 2019 18:16:09 -0800 Subject: net: phy: Introduce helper functions for time stamping support. Some parts of the networking stack and at least one driver test fields within the 'struct phy_device' in order to query time stamping capabilities and to invoke time stamping methods. This patch adds a functional interface around the time stamping fields. This will allow insulating the callers from future changes to the details of the time stamping implemenation. Signed-off-by: Richard Cochran Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7d530b3f8855..0248f5e9939d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -936,6 +936,66 @@ static inline bool phy_polling_mode(struct phy_device *phydev) return phydev->irq == PHY_POLL; } +/** + * phy_has_hwtstamp - Tests whether a PHY time stamp configuration. + * @phydev: the phy_device struct + */ +static inline bool phy_has_hwtstamp(struct phy_device *phydev) +{ + return phydev && phydev->drv && phydev->drv->hwtstamp; +} + +/** + * phy_has_rxtstamp - Tests whether a PHY supports receive time stamping. + * @phydev: the phy_device struct + */ +static inline bool phy_has_rxtstamp(struct phy_device *phydev) +{ + return phydev && phydev->drv && phydev->drv->rxtstamp; +} + +/** + * phy_has_tsinfo - Tests whether a PHY reports time stamping and/or + * PTP hardware clock capabilities. + * @phydev: the phy_device struct + */ +static inline bool phy_has_tsinfo(struct phy_device *phydev) +{ + return phydev && phydev->drv && phydev->drv->ts_info; +} + +/** + * phy_has_txtstamp - Tests whether a PHY supports transmit time stamping. + * @phydev: the phy_device struct + */ +static inline bool phy_has_txtstamp(struct phy_device *phydev) +{ + return phydev && phydev->drv && phydev->drv->txtstamp; +} + +static inline int phy_hwtstamp(struct phy_device *phydev, struct ifreq *ifr) +{ + return phydev->drv->hwtstamp(phydev, ifr); +} + +static inline bool phy_rxtstamp(struct phy_device *phydev, struct sk_buff *skb, + int type) +{ + return phydev->drv->rxtstamp(phydev, skb, type); +} + +static inline int phy_ts_info(struct phy_device *phydev, + struct ethtool_ts_info *tsinfo) +{ + return phydev->drv->ts_info(phydev, tsinfo); +} + +static inline void phy_txtstamp(struct phy_device *phydev, struct sk_buff *skb, + int type) +{ + phydev->drv->txtstamp(phydev, skb, type); +} + /** * phy_is_internal - Convenience function for testing if a PHY is internal * @phydev: the phy_device struct -- cgit v1.2.3 From 4715f65ffa0520af0680dbfbedbe349f175adaf4 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 25 Dec 2019 18:16:15 -0800 Subject: net: Introduce a new MII time stamping interface. Currently the stack supports time stamping in PHY devices. However, there are newer, non-PHY devices that can snoop an MII bus and provide time stamps. In order to support such devices, this patch introduces a new interface to be used by both PHY and non-PHY devices. In addition, the one and only user of the old PHY time stamping API is converted to the new interface. Signed-off-by: Richard Cochran Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mii_timestamper.h | 58 +++++++++++++++++++++++++++++++++++++++++ include/linux/phy.h | 41 +++++++---------------------- 2 files changed, 68 insertions(+), 31 deletions(-) create mode 100644 include/linux/mii_timestamper.h (limited to 'include/linux') diff --git a/include/linux/mii_timestamper.h b/include/linux/mii_timestamper.h new file mode 100644 index 000000000000..36002386029c --- /dev/null +++ b/include/linux/mii_timestamper.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Support for generic time stamping devices on MII buses. + * Copyright (C) 2018 Richard Cochran + */ +#ifndef _LINUX_MII_TIMESTAMPER_H +#define _LINUX_MII_TIMESTAMPER_H + +#include +#include +#include + +struct phy_device; + +/** + * struct mii_timestamper - Callback interface to MII time stamping devices. + * + * @rxtstamp: Requests a Rx timestamp for 'skb'. If the skb is accepted, + * the MII time stamping device promises to deliver it using + * netif_rx() as soon as a timestamp becomes available. One of + * the PTP_CLASS_ values is passed in 'type'. The function + * must return true if the skb is accepted for delivery. + * + * @txtstamp: Requests a Tx timestamp for 'skb'. The MII time stamping + * device promises to deliver it using skb_complete_tx_timestamp() + * as soon as a timestamp becomes available. One of the PTP_CLASS_ + * values is passed in 'type'. + * + * @hwtstamp: Handles SIOCSHWTSTAMP ioctl for hardware time stamping. + * + * @link_state: Allows the device to respond to changes in the link + * state. The caller invokes this function while holding + * the phy_device mutex. + * + * @ts_info: Handles ethtool queries for hardware time stamping. + * + * Drivers for PHY time stamping devices should embed their + * mii_timestamper within a private structure, obtaining a reference + * to it using container_of(). + */ +struct mii_timestamper { + bool (*rxtstamp)(struct mii_timestamper *mii_ts, + struct sk_buff *skb, int type); + + void (*txtstamp)(struct mii_timestamper *mii_ts, + struct sk_buff *skb, int type); + + int (*hwtstamp)(struct mii_timestamper *mii_ts, + struct ifreq *ifreq); + + void (*link_state)(struct mii_timestamper *mii_ts, + struct phy_device *phydev); + + int (*ts_info)(struct mii_timestamper *mii_ts, + struct ethtool_ts_info *ts_info); +}; + +#endif diff --git a/include/linux/phy.h b/include/linux/phy.h index 0248f5e9939d..30e599c454db 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -441,6 +442,7 @@ struct phy_device { struct sfp_bus *sfp_bus; struct phylink *phylink; struct net_device *attached_dev; + struct mii_timestamper *mii_ts; u8 mdix; u8 mdix_ctrl; @@ -546,29 +548,6 @@ struct phy_driver { */ int (*match_phy_device)(struct phy_device *phydev); - /* Handles ethtool queries for hardware time stamping. */ - int (*ts_info)(struct phy_device *phydev, struct ethtool_ts_info *ti); - - /* Handles SIOCSHWTSTAMP ioctl for hardware time stamping. */ - int (*hwtstamp)(struct phy_device *phydev, struct ifreq *ifr); - - /* - * Requests a Rx timestamp for 'skb'. If the skb is accepted, - * the phy driver promises to deliver it using netif_rx() as - * soon as a timestamp becomes available. One of the - * PTP_CLASS_ values is passed in 'type'. The function must - * return true if the skb is accepted for delivery. - */ - bool (*rxtstamp)(struct phy_device *dev, struct sk_buff *skb, int type); - - /* - * Requests a Tx timestamp for 'skb'. The phy driver promises - * to deliver it using skb_complete_tx_timestamp() as soon as a - * timestamp becomes available. One of the PTP_CLASS_ values - * is passed in 'type'. - */ - void (*txtstamp)(struct phy_device *dev, struct sk_buff *skb, int type); - /* Some devices (e.g. qnap TS-119P II) require PHY register changes to * enable Wake on LAN, so set_wol is provided to be called in the * ethernet driver's set_wol function. */ @@ -942,7 +921,7 @@ static inline bool phy_polling_mode(struct phy_device *phydev) */ static inline bool phy_has_hwtstamp(struct phy_device *phydev) { - return phydev && phydev->drv && phydev->drv->hwtstamp; + return phydev && phydev->mii_ts && phydev->mii_ts->hwtstamp; } /** @@ -951,7 +930,7 @@ static inline bool phy_has_hwtstamp(struct phy_device *phydev) */ static inline bool phy_has_rxtstamp(struct phy_device *phydev) { - return phydev && phydev->drv && phydev->drv->rxtstamp; + return phydev && phydev->mii_ts && phydev->mii_ts->rxtstamp; } /** @@ -961,7 +940,7 @@ static inline bool phy_has_rxtstamp(struct phy_device *phydev) */ static inline bool phy_has_tsinfo(struct phy_device *phydev) { - return phydev && phydev->drv && phydev->drv->ts_info; + return phydev && phydev->mii_ts && phydev->mii_ts->ts_info; } /** @@ -970,30 +949,30 @@ static inline bool phy_has_tsinfo(struct phy_device *phydev) */ static inline bool phy_has_txtstamp(struct phy_device *phydev) { - return phydev && phydev->drv && phydev->drv->txtstamp; + return phydev && phydev->mii_ts && phydev->mii_ts->txtstamp; } static inline int phy_hwtstamp(struct phy_device *phydev, struct ifreq *ifr) { - return phydev->drv->hwtstamp(phydev, ifr); + return phydev->mii_ts->hwtstamp(phydev->mii_ts, ifr); } static inline bool phy_rxtstamp(struct phy_device *phydev, struct sk_buff *skb, int type) { - return phydev->drv->rxtstamp(phydev, skb, type); + return phydev->mii_ts->rxtstamp(phydev->mii_ts, skb, type); } static inline int phy_ts_info(struct phy_device *phydev, struct ethtool_ts_info *tsinfo) { - return phydev->drv->ts_info(phydev, tsinfo); + return phydev->mii_ts->ts_info(phydev->mii_ts, tsinfo); } static inline void phy_txtstamp(struct phy_device *phydev, struct sk_buff *skb, int type) { - phydev->drv->txtstamp(phydev, skb, type); + phydev->mii_ts->txtstamp(phydev->mii_ts, skb, type); } /** -- cgit v1.2.3 From 767ff483731502a0fc34f34a3a0851aca175eb71 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 25 Dec 2019 18:16:16 -0800 Subject: net: Add a layer for non-PHY MII time stamping drivers. While PHY time stamping drivers can simply attach their interface directly to the PHY instance, stand alone drivers require support in order to manage their services. Non-PHY MII time stamping drivers have a control interface over another bus like I2C, SPI, UART, or via a memory mapped peripheral. The controller device will be associated with one or more time stamping channels, each of which sits snoops in on a MII bus. This patch provides a glue layer that will enable time stamping channels to find their controlling device. Signed-off-by: Richard Cochran Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mii_timestamper.h | 63 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mii_timestamper.h b/include/linux/mii_timestamper.h index 36002386029c..fa940bbaf8ae 100644 --- a/include/linux/mii_timestamper.h +++ b/include/linux/mii_timestamper.h @@ -33,10 +33,15 @@ struct phy_device; * the phy_device mutex. * * @ts_info: Handles ethtool queries for hardware time stamping. + * @device: Remembers the device to which the instance belongs. * * Drivers for PHY time stamping devices should embed their * mii_timestamper within a private structure, obtaining a reference * to it using container_of(). + * + * Drivers for non-PHY time stamping devices should return a pointer + * to a mii_timestamper from the probe_channel() callback of their + * mii_timestamping_ctrl interface. */ struct mii_timestamper { bool (*rxtstamp)(struct mii_timestamper *mii_ts, @@ -53,6 +58,64 @@ struct mii_timestamper { int (*ts_info)(struct mii_timestamper *mii_ts, struct ethtool_ts_info *ts_info); + + struct device *device; +}; + +/** + * struct mii_timestamping_ctrl - MII time stamping controller interface. + * + * @probe_channel: Callback into the controller driver announcing the + * presence of the 'port' channel. The 'device' field + * had been passed to register_mii_tstamp_controller(). + * The driver must return either a pointer to a valid + * MII timestamper instance or PTR_ERR. + * + * @release_channel: Releases an instance obtained via .probe_channel. + */ +struct mii_timestamping_ctrl { + struct mii_timestamper *(*probe_channel)(struct device *device, + unsigned int port); + void (*release_channel)(struct device *device, + struct mii_timestamper *mii_ts); }; +#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING + +int register_mii_tstamp_controller(struct device *device, + struct mii_timestamping_ctrl *ctrl); + +void unregister_mii_tstamp_controller(struct device *device); + +struct mii_timestamper *register_mii_timestamper(struct device_node *node, + unsigned int port); + +void unregister_mii_timestamper(struct mii_timestamper *mii_ts); + +#else + +static inline +int register_mii_tstamp_controller(struct device *device, + struct mii_timestamping_ctrl *ctrl) +{ + return -EOPNOTSUPP; +} + +static inline void unregister_mii_tstamp_controller(struct device *device) +{ +} + +static inline +struct mii_timestamper *register_mii_timestamper(struct device_node *node, + unsigned int port) +{ + return NULL; +} + +static inline void unregister_mii_timestamper(struct mii_timestamper *mii_ts) +{ +} + +#endif + #endif -- cgit v1.2.3 From c782937e92826af464e65111e9f7a7a774b6c276 Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Mon, 16 Dec 2019 18:58:48 +0800 Subject: crypto: api - remove unneeded semicolon Fixes coccicheck warning: ./include/linux/crypto.h:573:2-3: Unneeded semicolon Signed-off-by: Chen Zhou Signed-off-by: Herbert Xu --- include/linux/crypto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index a905e524e332..8729f957f83c 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -570,7 +570,7 @@ static inline int crypto_wait_req(int err, struct crypto_wait *wait) reinit_completion(&wait->completion); err = wait->err; break; - }; + } return err; } -- cgit v1.2.3 From 2b4a8990b7df55875745a80a609a1ceaaf51f322 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Fri, 27 Dec 2019 15:55:18 +0100 Subject: ethtool: introduce ethtool netlink interface Basic genetlink and init infrastructure for the netlink interface, register genetlink family "ethtool". Add CONFIG_ETHTOOL_NETLINK Kconfig option to make the build optional. Add initial overall interface description into Documentation/networking/ethtool-netlink.rst, further patches will add more detailed information. Signed-off-by: Michal Kubecek Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/ethtool_netlink.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 include/linux/ethtool_netlink.h (limited to 'include/linux') diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h new file mode 100644 index 000000000000..f27e92b5f344 --- /dev/null +++ b/include/linux/ethtool_netlink.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _LINUX_ETHTOOL_NETLINK_H_ +#define _LINUX_ETHTOOL_NETLINK_H_ + +#include +#include + +#endif /* _LINUX_ETHTOOL_NETLINK_H_ */ -- cgit v1.2.3 From 6b08d6c146f4c5ed451c45339c10feb06d619db2 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Fri, 27 Dec 2019 15:55:33 +0100 Subject: ethtool: support for netlink notifications Add infrastructure for ethtool netlink notifications. There is only one multicast group "monitor" which is used to notify userspace about changes and actions performed. Notification messages (types using suffix _NTF) share the format with replies to GET requests. Notifications are supposed to be broadcasted on every configuration change, whether it is done using the netlink interface or ioctl one. Netlink SET requests only trigger a notification if some data is actually changed. To trigger an ethtool notification, both ethtool netlink and external code use ethtool_notify() helper. This helper requires RTNL to be held and may sleep. Handlers sending messages for specific notification message types are registered in ethnl_notify_handlers array. As notifications can be triggered from other code, ethnl_ok flag is used to prevent an attempt to send notification before genetlink family is registered. Signed-off-by: Michal Kubecek Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/ethtool_netlink.h | 5 +++++ include/linux/netdevice.h | 9 +++++++++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index f27e92b5f344..c98f6852c8eb 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -5,5 +5,10 @@ #include #include +#include + +enum ethtool_multicast_groups { + ETHNL_MCGRP_MONITOR, +}; #endif /* _LINUX_ETHTOOL_NETLINK_H_ */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 469a297b58c0..f007155ae8f4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4393,6 +4393,15 @@ struct netdev_notifier_bonding_info { void netdev_bonding_info_change(struct net_device *dev, struct netdev_bonding_info *bonding_info); +#if IS_ENABLED(CONFIG_ETHTOOL_NETLINK) +void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data); +#else +static inline void ethtool_notify(struct net_device *dev, unsigned int cmd, + const void *data) +{ +} +#endif + static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { -- cgit v1.2.3 From f625aa9be8c10f2e4dc677837e240730a25feda7 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Fri, 27 Dec 2019 15:56:08 +0100 Subject: ethtool: provide link mode information with LINKMODES_GET request Implement LINKMODES_GET netlink request to get link modes related information provided by ETHTOOL_GLINKSETTINGS and ETHTOOL_GSET ioctl commands. This request provides supported, advertised and peer advertised link modes, autonegotiation flag, speed and duplex. LINKMODES_GET request can be used with NLM_F_DUMP (without device identification) to request the information for all devices in current network namespace providing the data. Signed-off-by: Michal Kubecek Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/ethtool_netlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index c98f6852c8eb..d01b77887f82 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -7,6 +7,9 @@ #include #include +#define __ETHTOOL_LINK_MODE_MASK_NWORDS \ + DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32) + enum ethtool_multicast_groups { ETHNL_MCGRP_MONITOR, }; -- cgit v1.2.3 From 544fed47af4d2174ac0b550e9c8da15c2dfdb117 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 27 Dec 2019 15:02:28 +0200 Subject: ptp: introduce ptp_cancel_worker_sync In order to effectively use the PTP kernel thread for tasks such as timestamping packets, allow the user control over stopping it, which is needed e.g. when the timestamping queues must be drained. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 93cc4f1d444a..c64a1ef87240 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -243,6 +243,13 @@ int ptp_find_pin(struct ptp_clock *ptp, int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay); +/** + * ptp_cancel_worker_sync() - cancel ptp auxiliary clock + * + * @ptp: The clock obtained from ptp_clock_register(). + */ +void ptp_cancel_worker_sync(struct ptp_clock *ptp); + #else static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, struct device *parent) @@ -260,6 +267,8 @@ static inline int ptp_find_pin(struct ptp_clock *ptp, static inline int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) { return -EOPNOTSUPP; } +static inline void ptp_cancel_worker_sync(struct ptp_clock *ptp) +{ } #endif -- cgit v1.2.3 From 1e762bd278d2a70bc74b9cbee7f1e93bd4704fe2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 27 Dec 2019 15:02:29 +0200 Subject: net: dsa: sja1105: Use PTP core's dedicated kernel thread for RX timestamping And move the queue of skb's waiting for RX timestamps into the ptp_data structure, since it isn't needed if PTP is not compiled. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/sja1105.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 897e799dbcb9..c0b6a603ea8c 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -37,8 +37,6 @@ * the structure defined in struct sja1105_private. */ struct sja1105_tagger_data { - struct sk_buff_head skb_rxtstamp_queue; - struct work_struct rxtstamp_work; struct sk_buff *stampable_skb; /* Protects concurrent access to the meta state machine * from taggers running on multiple ports on SMP systems -- cgit v1.2.3 From 2f004eea0fc8f86b45dfc2007add2d4986de8d02 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 19 Dec 2019 00:11:50 +0100 Subject: x86/kasan: Print original address on #GP Make #GP exceptions caused by out-of-bounds KASAN shadow accesses easier to understand by computing the address of the original access and printing that. More details are in the comments in the patch. This turns an error like this: kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault, probably for non-canonical address 0xe017577ddf75b7dd: 0000 [#1] PREEMPT SMP KASAN PTI into this: general protection fault, probably for non-canonical address 0xe017577ddf75b7dd: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: maybe wild-memory-access in range [0x00badbeefbadbee8-0x00badbeefbadbeef] The hook is placed in architecture-independent code, but is currently only wired up to the X86 exception handler because I'm not sufficiently familiar with the address space layout and exception handling mechanisms on other architectures. Signed-off-by: Jann Horn Signed-off-by: Borislav Petkov Reviewed-by: Dmitry Vyukov Cc: Alexander Potapenko Cc: Andrew Morton Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: kasan-dev@googlegroups.com Cc: linux-mm Cc: Peter Zijlstra Cc: Sean Christopherson Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20191218231150.12139-4-jannh@google.com --- include/linux/kasan.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index e18fe54969e9..5cde9e7c2664 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -228,4 +228,10 @@ static inline void kasan_release_vmalloc(unsigned long start, unsigned long free_region_end) {} #endif +#ifdef CONFIG_KASAN_INLINE +void kasan_non_canonical_hook(unsigned long addr); +#else /* CONFIG_KASAN_INLINE */ +static inline void kasan_non_canonical_hook(unsigned long addr) { } +#endif /* CONFIG_KASAN_INLINE */ + #endif /* LINUX_KASAN_H */ -- cgit v1.2.3 From 8a4ab0b866d8aba85b9899edebf14b87b25f817f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 15 Dec 2019 13:39:47 -0800 Subject: fscrypt: constify inode parameter to filename encryption functions Constify the struct inode parameter to fscrypt_fname_disk_to_usr() and the other filename encryption functions so that users don't have to pass in a non-const inode when they are dealing with a const one, as in [1]. [1] https://lkml.kernel.org/linux-ext4/20191203051049.44573-6-drosen@google.com/ Cc: Daniel Rosenberg Link: https://lore.kernel.org/r/20191215213947.9521-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 1a7bffe78ed5..6eaa729544a3 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -153,8 +153,10 @@ static inline void fscrypt_free_filename(struct fscrypt_name *fname) extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, struct fscrypt_str *); extern void fscrypt_fname_free_buffer(struct fscrypt_str *); -extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, - const struct fscrypt_str *, struct fscrypt_str *); +extern int fscrypt_fname_disk_to_usr(const struct inode *inode, + u32 hash, u32 minor_hash, + const struct fscrypt_str *iname, + struct fscrypt_str *oname); #define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 @@ -438,7 +440,7 @@ static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) return; } -static inline int fscrypt_fname_disk_to_usr(struct inode *inode, +static inline int fscrypt_fname_disk_to_usr(const struct inode *inode, u32 hash, u32 minor_hash, const struct fscrypt_str *iname, struct fscrypt_str *oname) -- cgit v1.2.3 From 56dce717950925f9d734b9e5621cbd41cbeb3e33 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 12:50:21 -0800 Subject: fscrypt: introduce fscrypt_needs_contents_encryption() Add a function fscrypt_needs_contents_encryption() which takes an inode and returns true if it's an encrypted regular file and the kernel was built with fscrypt support. This will allow replacing duplicated checks of IS_ENCRYPTED() && S_ISREG() on the I/O paths in ext4 and f2fs, while also optimizing out unneeded code when !CONFIG_FS_ENCRYPTION. Link: https://lore.kernel.org/r/20191209205021.231767-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 6eaa729544a3..6fe8d0f96a4a 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -72,6 +72,21 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) return READ_ONCE(inode->i_crypt_info) != NULL; } +/** + * fscrypt_needs_contents_encryption() - check whether an inode needs + * contents encryption + * + * Return: %true iff the inode is an encrypted regular file and the kernel was + * built with fscrypt support. + * + * If you need to know whether the encrypt bit is set even when the kernel was + * built without fscrypt support, you must use IS_ENCRYPTED() directly instead. + */ +static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) +{ + return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); +} + static inline bool fscrypt_dummy_context_enabled(struct inode *inode) { return inode->i_sb->s_cop->dummy_context && @@ -269,6 +284,11 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) return false; } +static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) +{ + return false; +} + static inline bool fscrypt_dummy_context_enabled(struct inode *inode) { return false; -- cgit v1.2.3 From b39c78b2aa09cae05f3a48c11f67b3add0d604de Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 3 Jan 2020 11:51:00 +0800 Subject: net: remove the check argument from __skb_gro_checksum_convert The argument is always ignored, so remove it. Signed-off-by: Li RongQing Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2fd19fb8826d..2741aa35bec6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2826,16 +2826,16 @@ static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) } static inline void __skb_gro_checksum_convert(struct sk_buff *skb, - __sum16 check, __wsum pseudo) + __wsum pseudo) { NAPI_GRO_CB(skb)->csum = ~pseudo; NAPI_GRO_CB(skb)->csum_valid = 1; } -#define skb_gro_checksum_try_convert(skb, proto, check, compute_pseudo) \ +#define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \ do { \ if (__skb_gro_checksum_convert_check(skb)) \ - __skb_gro_checksum_convert(skb, check, \ + __skb_gro_checksum_convert(skb, \ compute_pseudo(skb, proto)); \ } while (0) -- cgit v1.2.3 From bade7e1fbd34f46462e6eb1db5474832a4144ac2 Mon Sep 17 00:00:00 2001 From: Rijo Thomas Date: Fri, 27 Dec 2019 10:54:02 +0530 Subject: tee: amdtee: check TEE status during driver initialization The AMD-TEE driver should check if TEE is available before registering itself with TEE subsystem. This ensures that there is a TEE which the driver can talk to before proceeding with tee device node allocation. Cc: Ard Biesheuvel Cc: Tom Lendacky Acked-by: Jens Wiklander Co-developed-by: Devaraj Rangasamy Signed-off-by: Devaraj Rangasamy Signed-off-by: Rijo Thomas Reviewed-by: Gary R Hook Signed-off-by: Herbert Xu --- include/linux/psp-tee.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/psp-tee.h b/include/linux/psp-tee.h index 63bb2212fce0..cb0c95d6d76b 100644 --- a/include/linux/psp-tee.h +++ b/include/linux/psp-tee.h @@ -62,6 +62,19 @@ enum tee_cmd_id { int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, size_t len, u32 *status); +/** + * psp_check_tee_status() - Checks whether there is a TEE which a driver can + * talk to. + * + * This function can be used by AMD-TEE driver to query if there is TEE with + * which it can communicate. + * + * Returns: + * 0 if the device has TEE + * -%ENODEV if there is no TEE available + */ +int psp_check_tee_status(void); + #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ static inline int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, @@ -69,5 +82,10 @@ static inline int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, { return -ENODEV; } + +static inline int psp_check_tee_status(void) +{ + return -ENODEV; +} #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ #endif /* __PSP_TEE_H_ */ -- cgit v1.2.3 From 14a65084f9310ba6a4017c365f9c9820b099dde5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 3 Jan 2020 18:11:30 +0100 Subject: net: ethernet: sxgbe: Rename Samsung to lowercase Fix up inconsistent usage of upper and lowercase letters in "Samsung" name. "SAMSUNG" is not an abbreviation but a regular trademarked name. Therefore it should be written with lowercase letters starting with capital letter. Although advertisement materials usually use uppercase "SAMSUNG", the lowercase version is used in all legal aspects (e.g. on Wikipedia and in privacy/legal statements on https://www.samsung.com/semiconductor/privacy-global/). Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/linux/sxgbe_platform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sxgbe_platform.h b/include/linux/sxgbe_platform.h index 85ec745767bd..966146f7267a 100644 --- a/include/linux/sxgbe_platform.h +++ b/include/linux/sxgbe_platform.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * 10G controller driver for Samsung EXYNOS SoCs + * 10G controller driver for Samsung Exynos SoCs * * Copyright (C) 2013 Samsung Electronics Co., Ltd. * http://www.samsung.com -- cgit v1.2.3 From c114574ebfdf42f826776f717c8056a00fa94881 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 3 Jan 2020 20:43:17 +0000 Subject: net: phy: add PHY_INTERFACE_MODE_10GBASER Recent discussion has revealed that the use of PHY_INTERFACE_MODE_10GKR is incorrect. Add a 10GBASE-R definition, document both the -R and -KR versions, and the fact that 10GKR was used incorrectly. Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phy.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 30e599c454db..5932bb8e9c35 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -100,9 +100,11 @@ typedef enum { PHY_INTERFACE_MODE_2500BASEX, PHY_INTERFACE_MODE_RXAUI, PHY_INTERFACE_MODE_XAUI, - /* 10GBASE-KR, XFI, SFI - single lane 10G Serdes */ - PHY_INTERFACE_MODE_10GKR, + /* 10GBASE-R, XFI, SFI - single lane 10G Serdes */ + PHY_INTERFACE_MODE_10GBASER, PHY_INTERFACE_MODE_USXGMII, + /* 10GBASE-KR - with Clause 73 AN */ + PHY_INTERFACE_MODE_10GKR, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -176,10 +178,12 @@ static inline const char *phy_modes(phy_interface_t interface) return "rxaui"; case PHY_INTERFACE_MODE_XAUI: return "xaui"; - case PHY_INTERFACE_MODE_10GKR: - return "10gbase-kr"; + case PHY_INTERFACE_MODE_10GBASER: + return "10gbase-r"; case PHY_INTERFACE_MODE_USXGMII: return "usxgmii"; + case PHY_INTERFACE_MODE_10GKR: + return "10gbase-kr"; default: return "unknown"; } -- cgit v1.2.3 From 0a51826c6e05c5b6cc423b376b81c311e9e485b0 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 4 Jan 2020 02:37:09 +0200 Subject: net: dsa: sja1105: Always send through management routes in slot 0 I finally found out how the 4 management route slots are supposed to be used, but.. it's not worth it. The description from the comment I've just deleted in this commit is still true: when more than 1 management slot is active at the same time, the switch will match frames incoming [from the CPU port] on the lowest numbered management slot that matches the frame's DMAC. My issue was that one was not supposed to statically assign each port a slot. Yes, there are 4 slots and also 4 non-CPU ports, but that is a mere coincidence. Instead, the switch can be used like this: every management frame gets a slot at the right of the most recently assigned slot: Send mgmt frame 1 through S0: S0 x x x Send mgmt frame 2 through S1: S0 S1 x x Send mgmt frame 3 through S2: S0 S1 S2 x Send mgmt frame 4 through S3: S0 S1 S2 S3 The difference compared to the old usage is that the transmission of frames 1-4 doesn't need to wait until the completion of the management route. It is safe to use a slot to the right of the most recently used one, because by protocol nobody will program a slot to your left and "steal" your route towards the correct egress port. So there is a potential throughput benefit here. But mgmt frame 5 has no more free slot to use, so it has to wait until _all_ of S0, S1, S2, S3 are full, in order to use S0 again. And that's actually exactly the problem: I was looking for something that would bring more predictable transmission latency, but this is exactly the opposite: 3 out of 4 frames would be transmitted quicker, but the 4th would draw the short straw and have a worse worst-case latency than before. Useless. Things are made even worse by PTP TX timestamping, which is something I won't go deeply into here. Suffice to say that the fact there is a driver-level lock on the SPI bus offsets any potential throughput gains that parallelism might bring. So there's no going back to the multi-slot scheme, remove the "mgmt_slot" variable from sja1105_port and the dummy static assignment made at probe time. While passing by, also remove the assignment to casc_port altogether. Don't pretend that we support cascaded setups. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/sja1105.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index c0b6a603ea8c..317e05b2584b 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -56,7 +56,6 @@ struct sja1105_port { struct sja1105_tagger_data *data; struct dsa_port *dp; bool hwts_tx_en; - int mgmt_slot; }; #endif /* _NET_DSA_SJA1105_H */ -- cgit v1.2.3 From a68578c20a9667463ee3000402b21644ea62d753 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 4 Jan 2020 02:37:10 +0200 Subject: net: dsa: Make deferred_xmit private to sja1105 There are 3 things that are wrong with the DSA deferred xmit mechanism: 1. Its introduction has made the DSA hotpath ever so slightly more inefficient for everybody, since DSA_SKB_CB(skb)->deferred_xmit needs to be initialized to false for every transmitted frame, in order to figure out whether the driver requested deferral or not (a very rare occasion, rare even for the only driver that does use this mechanism: sja1105). That was necessary to avoid kfree_skb from freeing the skb. 2. Because L2 PTP is a link-local protocol like STP, it requires management routes and deferred xmit with this switch. But as opposed to STP, the deferred work mechanism needs to schedule the packet rather quickly for the TX timstamp to be collected in time and sent to user space. But there is no provision for controlling the scheduling priority of this deferred xmit workqueue. Too bad this is a rather specific requirement for a feature that nobody else uses (more below). 3. Perhaps most importantly, it makes the DSA core adhere a bit too much to the NXP company-wide policy "Innovate Where It Doesn't Matter". The sja1105 is probably the only DSA switch that requires some frames sent from the CPU to be routed to the slave port via an out-of-band configuration (register write) rather than in-band (DSA tag). And there are indeed very good reasons to not want to do that: if that out-of-band register is at the other end of a slow bus such as SPI, then you limit that Ethernet flow's throughput to effectively the throughput of the SPI bus. So hardware vendors should definitely not be encouraged to design this way. We do _not_ want more widespread use of this mechanism. Luckily we have a solution for each of the 3 issues: For 1, we can just remove that variable in the skb->cb and counteract the effect of kfree_skb with skb_get, much to the same effect. The advantage, of course, being that anybody who doesn't use deferred xmit doesn't need to do any extra operation in the hotpath. For 2, we can create a kernel thread for each port's deferred xmit work. If the user switch ports are named swp0, swp1, swp2, the kernel threads will be named swp0_xmit, swp1_xmit, swp2_xmit (there appears to be a 15 character length limit on kernel thread names). With this, the user can change the scheduling priority with chrt $(pidof swp2_xmit). For 3, we can actually move the entire implementation to the sja1105 driver. So this patch deletes the generic implementation from the DSA core and adds a new one, more adequate to the requirements of PTP TX timestamping, in sja1105_main.c. Suggested-by: Florian Fainelli Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/sja1105.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 317e05b2584b..fa5735c353cd 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -53,6 +53,9 @@ struct sja1105_skb_cb { ((struct sja1105_skb_cb *)DSA_SKB_CB_PRIV(skb)) struct sja1105_port { + struct kthread_worker *xmit_worker; + struct kthread_work xmit_work; + struct sk_buff_head xmit_queue; struct sja1105_tagger_data *data; struct dsa_port *dp; bool hwts_tx_en; -- cgit v1.2.3 From 6c930994503d9b5bd34b1329427dd7d3d6d37cd4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 6 Jan 2020 03:34:09 +0200 Subject: mii: Add helpers for parsing SGMII auto-negotiation Typically a MAC PCS auto-configures itself after it receives the negotiated copper-side link settings from the PHY, but some MAC devices are more special and need manual interpretation of the SGMII AN result. In other cases, the PCS exposes the entire tx_config_reg base page as it is transmitted on the wire during auto-negotiation, so it makes sense to be able to decode the equivalent lp_advertised bit mask from the raw u16 (of course, "lp" considering the PCS to be the local PHY). Therefore, add the bit definitions for the SGMII registers 4 and 5 (local device ability, link partner ability), as well as a link_mode conversion helper that can be used to feed the AN results into phy_resolve_aneg_linkmode. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/mii.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 4ce8901a1af6..18c6208f56fc 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -372,6 +372,56 @@ static inline u32 mii_lpa_to_ethtool_lpa_x(u32 lpa) return result | mii_adv_to_ethtool_adv_x(lpa); } +/** + * mii_lpa_mod_linkmode_adv_sgmii + * @lp_advertising: pointer to destination link mode. + * @lpa: value of the MII_LPA register + * + * A small helper function that translates MII_LPA bits to + * linkmode advertisement settings for SGMII. + * Leaves other bits unchanged. + */ +static inline void +mii_lpa_mod_linkmode_lpa_sgmii(unsigned long *lp_advertising, u32 lpa) +{ + u32 speed_duplex = lpa & LPA_SGMII_DPX_SPD_MASK; + + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, lp_advertising, + speed_duplex == LPA_SGMII_1000HALF); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, lp_advertising, + speed_duplex == LPA_SGMII_1000FULL); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, lp_advertising, + speed_duplex == LPA_SGMII_100HALF); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, lp_advertising, + speed_duplex == LPA_SGMII_100FULL); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, lp_advertising, + speed_duplex == LPA_SGMII_10HALF); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, lp_advertising, + speed_duplex == LPA_SGMII_10FULL); +} + +/** + * mii_lpa_to_linkmode_adv_sgmii + * @advertising: pointer to destination link mode. + * @lpa: value of the MII_LPA register + * + * A small helper function that translates MII_ADVERTISE bits + * to linkmode advertisement settings when in SGMII mode. + * Clears the old value of advertising. + */ +static inline void mii_lpa_to_linkmode_lpa_sgmii(unsigned long *lp_advertising, + u32 lpa) +{ + linkmode_zero(lp_advertising); + + mii_lpa_mod_linkmode_lpa_sgmii(lp_advertising, lpa); +} + /** * mii_adv_mod_linkmode_adv_t * @advertising:pointer to destination link mode. -- cgit v1.2.3 From 1511ed0a0167f523a84b4e727372a5d2ce1b6c2f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 6 Jan 2020 03:34:11 +0200 Subject: net: phylink: add support for polling MAC PCS Some MAC PCS blocks are unable to provide interrupts when their status changes. As we already have support in phylink for polling status, use this to provide a hook for MACs to enable polling mode. The patch idea was picked up from Russell King's suggestion on the macb phylink patch thread here [0] but the implementation was changed. Instead of introducing a new phylink_start_poll() function, which would make the implementation cumbersome for common PHYLINK implementations for multiple types of devices, like DSA, just add a boolean property to the phylink_config structure, which is just as backwards-compatible. https://lkml.org/lkml/2019/12/16/603 Suggested-by: Russell King Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/phylink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index fed5488e3c75..523209e70947 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -63,10 +63,12 @@ enum phylink_op_type { * struct phylink_config - PHYLINK configuration structure * @dev: a pointer to a struct device associated with the MAC * @type: operation type of PHYLINK instance + * @pcs_poll: MAC PCS cannot provide link change interrupt */ struct phylink_config { struct device *dev; enum phylink_op_type type; + bool pcs_poll; }; /** -- cgit v1.2.3 From 6517798dd3432a0002109809bf74e4fcf9bb0c7d Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Mon, 6 Jan 2020 03:34:13 +0200 Subject: enetc: Make MDIO accessors more generic and export to include/linux/fsl Within the LS1028A SoC, the register map for the ENETC MDIO controller is instantiated a few times: for the central (external) MDIO controller, for the internal bus of each standalone ENETC port, and for the internal bus of the Felix switch. Refactoring is needed to support multiple MDIO buses from multiple drivers. The enetc_hw structure is made an opaque type and a smaller enetc_mdio_priv is created. 'mdio_base' - MDIO registers base address - is being parameterized, to be able to work with different MDIO register bases. The ENETC MDIO bus operations are exported from the fsl-enetc-mdio kernel object, the same that registers the central MDIO controller (the dedicated PF). The ENETC main driver has been changed to select it, and use its exported helpers to further register its private MDIO bus. The DSA Felix driver will do the same. Signed-off-by: Claudiu Manoil Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/fsl/enetc_mdio.h | 55 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 include/linux/fsl/enetc_mdio.h (limited to 'include/linux') diff --git a/include/linux/fsl/enetc_mdio.h b/include/linux/fsl/enetc_mdio.h new file mode 100644 index 000000000000..4875dd38af7e --- /dev/null +++ b/include/linux/fsl/enetc_mdio.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* Copyright 2019 NXP */ + +#ifndef _FSL_ENETC_MDIO_H_ +#define _FSL_ENETC_MDIO_H_ + +#include + +/* PCS registers */ +#define ENETC_PCS_LINK_TIMER1 0x12 +#define ENETC_PCS_LINK_TIMER1_VAL 0x06a0 +#define ENETC_PCS_LINK_TIMER2 0x13 +#define ENETC_PCS_LINK_TIMER2_VAL 0x0003 +#define ENETC_PCS_IF_MODE 0x14 +#define ENETC_PCS_IF_MODE_SGMII_EN BIT(0) +#define ENETC_PCS_IF_MODE_USE_SGMII_AN BIT(1) +#define ENETC_PCS_IF_MODE_SGMII_SPEED(x) (((x) << 2) & GENMASK(3, 2)) + +/* Not a mistake, the SerDes PLL needs to be set at 3.125 GHz by Reset + * Configuration Word (RCW, outside Linux control) for 2.5G SGMII mode. The PCS + * still thinks it's at gigabit. + */ +enum enetc_pcs_speed { + ENETC_PCS_SPEED_10 = 0, + ENETC_PCS_SPEED_100 = 1, + ENETC_PCS_SPEED_1000 = 2, + ENETC_PCS_SPEED_2500 = 2, +}; + +struct enetc_hw; + +struct enetc_mdio_priv { + struct enetc_hw *hw; + int mdio_base; +}; + +#if IS_REACHABLE(CONFIG_FSL_ENETC_MDIO) + +int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum); +int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value); +struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs); + +#else + +static inline int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum) +{ return -EINVAL; } +static inline int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, + u16 value) +{ return -EINVAL; } +struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs) +{ return ERR_PTR(-EINVAL); } + +#endif + +#endif -- cgit v1.2.3 From 8007880a2ca97c34e7ccd1fcf12daf854b792544 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sat, 14 Dec 2019 10:51:17 +0200 Subject: net/mlx5: limit the function in local scope The function mlx5_buf_alloc_node is only used by the function in the local scope. So it is appropriate to limit this function in the local scope. Signed-off-by: Zhu Yanjun Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 27200dea0297..59cff380f41a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -928,8 +928,6 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); -int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, - struct mlx5_frag_buf *buf, int node); int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); -- cgit v1.2.3 From dcfea72e79b0aa7a057c8f6024169d86a1bbc84b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 8 Jan 2020 16:59:02 -0500 Subject: net: introduce skb_list_walk_safe for skb segment walking As part of the continual effort to remove direct usage of skb->next and skb->prev, this patch adds a helper for iterating through the singly-linked variant of skb lists, which are used for lists of GSO packet. The name "skb_list_..." has been chosen to match the existing function, "kfree_skb_list, which also operates on these singly-linked lists, and the "..._walk_safe" part is the same idiom as elsewhere in the kernel. This patch removes the helper from wireguard and puts it into linux/skbuff.h, while making it a bit more robust for general usage. In particular, parenthesis are added around the macro argument usage, and it now accounts for trying to iterate through an already-null skb pointer, which will simply run the iteration zero times. This latter enhancement means it can be used to replace both do { ... } while and while (...) open-coded idioms. This should take care of these three possible usages, which match all current methods of iterations. skb_list_walk_safe(segs, skb, next) { ... } skb_list_walk_safe(skb, skb, next) { ... } skb_list_walk_safe(segs, skb, segs) { ... } Gcc appears to generate efficient code for each of these. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e9133bcf0544..64e5b1be9ff5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1478,6 +1478,11 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb) skb->next = NULL; } +/* Iterate through singly-linked GSO fragments of an skb. */ +#define skb_list_walk_safe(first, skb, next) \ + for ((skb) = (first), (next) = (skb) ? (skb)->next : NULL; (skb); \ + (skb) = (next), (next) = (skb) ? (skb)->next : NULL) + static inline void skb_list_del_init(struct sk_buff *skb) { __list_del_entry(&skb->list); -- cgit v1.2.3 From f9d89b853ec1709345c0e2f1f51ae53188eef981 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Dec 2019 21:19:34 -0600 Subject: crypto: remove unused tfm result flags The tfm result flags CRYPTO_TFM_RES_BAD_KEY_SCHED and CRYPTO_TFM_RES_BAD_FLAGS are never used, so remove them. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/crypto.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 8729f957f83c..950b592947b2 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -114,9 +114,7 @@ #define CRYPTO_TFM_REQ_MAY_BACKLOG 0x00000400 #define CRYPTO_TFM_RES_WEAK_KEY 0x00100000 #define CRYPTO_TFM_RES_BAD_KEY_LEN 0x00200000 -#define CRYPTO_TFM_RES_BAD_KEY_SCHED 0x00400000 #define CRYPTO_TFM_RES_BAD_BLOCK_LEN 0x00800000 -#define CRYPTO_TFM_RES_BAD_FLAGS 0x01000000 /* * Miscellaneous stuff. -- cgit v1.2.3 From 5c925e8b10a5f43f220755aceb9d5f14b2f4e2c5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Dec 2019 21:19:35 -0600 Subject: crypto: remove CRYPTO_TFM_RES_BAD_BLOCK_LEN The flag CRYPTO_TFM_RES_BAD_BLOCK_LEN is never checked for, and it's only set by one driver. And even that single driver's use is wrong because the driver is setting the flag from ->encrypt() and ->decrypt() with no locking, which is unsafe because ->encrypt() and ->decrypt() can be executed by many threads in parallel on the same tfm. Just remove this flag. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/crypto.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 950b592947b2..719a301af3f2 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -114,7 +114,6 @@ #define CRYPTO_TFM_REQ_MAY_BACKLOG 0x00000400 #define CRYPTO_TFM_RES_WEAK_KEY 0x00100000 #define CRYPTO_TFM_RES_BAD_KEY_LEN 0x00200000 -#define CRYPTO_TFM_RES_BAD_BLOCK_LEN 0x00800000 /* * Miscellaneous stuff. -- cgit v1.2.3 From 674f368a952c48ede71784935a799a5205b92b6c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Dec 2019 21:19:36 -0600 Subject: crypto: remove CRYPTO_TFM_RES_BAD_KEY_LEN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CRYPTO_TFM_RES_BAD_KEY_LEN flag was apparently meant as a way to make the ->setkey() functions provide more information about errors. However, no one actually checks for this flag, which makes it pointless. Also, many algorithms fail to set this flag when given a bad length key. Reviewing just the generic implementations, this is the case for aes-fixed-time, cbcmac, echainiv, nhpoly1305, pcrypt, rfc3686, rfc4309, rfc7539, rfc7539esp, salsa20, seqiv, and xcbc. But there are probably many more in arch/*/crypto/ and drivers/crypto/. Some algorithms can even set this flag when the key is the correct length. For example, authenc and authencesn set it when the key payload is malformed in any way (not just a bad length), the atmel-sha and ccree drivers can set it if a memory allocation fails, and the chelsio driver sets it for bad auth tag lengths, not just bad key lengths. So even if someone actually wanted to start checking this flag (which seems unlikely, since it's been unused for a long time), there would be a lot of work needed to get it working correctly. But it would probably be much better to go back to the drawing board and just define different return values, like -EINVAL if the key is invalid for the algorithm vs. -EKEYREJECTED if the key was rejected by a policy like "no weak keys". That would be much simpler, less error-prone, and easier to test. So just remove this flag. Signed-off-by: Eric Biggers Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- include/linux/crypto.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 719a301af3f2..61fccc7d0efb 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -113,7 +113,6 @@ #define CRYPTO_TFM_REQ_MAY_SLEEP 0x00000200 #define CRYPTO_TFM_REQ_MAY_BACKLOG 0x00000400 #define CRYPTO_TFM_RES_WEAK_KEY 0x00100000 -#define CRYPTO_TFM_RES_BAD_KEY_LEN 0x00200000 /* * Miscellaneous stuff. -- cgit v1.2.3 From c4c4db0d59774f6ab726edd012711490437345c2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Dec 2019 21:19:37 -0600 Subject: crypto: remove CRYPTO_TFM_RES_WEAK_KEY The CRYPTO_TFM_RES_WEAK_KEY flag was apparently meant as a way to make the ->setkey() functions provide more information about errors. However, no one actually checks for this flag, which makes it pointless. There are also no tests that verify that all algorithms actually set (or don't set) it correctly. This is also the last remaining CRYPTO_TFM_RES_* flag, which means that it's the only thing still needing all the boilerplate code which propagates these flags around from child => parent tfms. And if someone ever needs to distinguish this error in the future (which is somewhat unlikely, as it's been unneeded for a long time), it would be much better to just define a new return value like -EKEYREJECTED. That would be much simpler, less error-prone, and easier to test. So just remove this flag. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/crypto.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 61fccc7d0efb..accd0c8038fd 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -112,7 +112,6 @@ #define CRYPTO_TFM_REQ_FORBID_WEAK_KEYS 0x00000100 #define CRYPTO_TFM_REQ_MAY_SLEEP 0x00000200 #define CRYPTO_TFM_REQ_MAY_BACKLOG 0x00000400 -#define CRYPTO_TFM_RES_WEAK_KEY 0x00100000 /* * Miscellaneous stuff. -- cgit v1.2.3 From af5034e8e4a5838fc77e476c1a91822e449d5869 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Dec 2019 21:19:38 -0600 Subject: crypto: remove propagation of CRYPTO_TFM_RES_* flags The CRYPTO_TFM_RES_* flags were apparently meant as a way to make the ->setkey() functions provide more information about errors. But these flags weren't actually being used or tested, and in many cases they weren't being set correctly anyway. So they've now been removed. Also, if someone ever actually needs to start better distinguishing ->setkey() errors (which is somewhat unlikely, as this has been unneeded for a long time), we'd be much better off just defining different return values, like -EINVAL if the key is invalid for the algorithm vs. -EKEYREJECTED if the key was rejected by a policy like "no weak keys". That would be much simpler, less error-prone, and easier to test. So just remove CRYPTO_TFM_RES_MASK and all the unneeded logic that propagates these flags around. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/crypto.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index accd0c8038fd..763863dbc079 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -107,8 +107,6 @@ #define CRYPTO_TFM_NEED_KEY 0x00000001 #define CRYPTO_TFM_REQ_MASK 0x000fff00 -#define CRYPTO_TFM_RES_MASK 0xfff00000 - #define CRYPTO_TFM_REQ_FORBID_WEAK_KEYS 0x00000100 #define CRYPTO_TFM_REQ_MAY_SLEEP 0x00000200 #define CRYPTO_TFM_REQ_MAY_BACKLOG 0x00000400 -- cgit v1.2.3 From 27ae7997a66174cb8afd6a75b3989f5e0c1b9e5a Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 8 Jan 2020 16:35:03 -0800 Subject: bpf: Introduce BPF_PROG_TYPE_STRUCT_OPS This patch allows the kernel's struct ops (i.e. func ptr) to be implemented in BPF. The first use case in this series is the "struct tcp_congestion_ops" which will be introduced in a latter patch. This patch introduces a new prog type BPF_PROG_TYPE_STRUCT_OPS. The BPF_PROG_TYPE_STRUCT_OPS prog is verified against a particular func ptr of a kernel struct. The attr->attach_btf_id is the btf id of a kernel struct. The attr->expected_attach_type is the member "index" of that kernel struct. The first member of a struct starts with member index 0. That will avoid ambiguity when a kernel struct has multiple func ptrs with the same func signature. For example, a BPF_PROG_TYPE_STRUCT_OPS prog is written to implement the "init" func ptr of the "struct tcp_congestion_ops". The attr->attach_btf_id is the btf id of the "struct tcp_congestion_ops" of the _running_ kernel. The attr->expected_attach_type is 3. The ctx of BPF_PROG_TYPE_STRUCT_OPS is an array of u64 args saved by arch_prepare_bpf_trampoline that will be done in the next patch when introducing BPF_MAP_TYPE_STRUCT_OPS. "struct bpf_struct_ops" is introduced as a common interface for the kernel struct that supports BPF_PROG_TYPE_STRUCT_OPS prog. The supporting kernel struct will need to implement an instance of the "struct bpf_struct_ops". The supporting kernel struct also needs to implement a bpf_verifier_ops. During BPF_PROG_LOAD, bpf_struct_ops_find() will find the right bpf_verifier_ops by searching the attr->attach_btf_id. A new "btf_struct_access" is also added to the bpf_verifier_ops such that the supporting kernel struct can optionally provide its own specific check on accessing the func arg (e.g. provide limited write access). After btf_vmlinux is parsed, the new bpf_struct_ops_init() is called to initialize some values (e.g. the btf id of the supporting kernel struct) and it can only be done once the btf_vmlinux is available. The R0 checks at BPF_EXIT is excluded for the BPF_PROG_TYPE_STRUCT_OPS prog if the return type of the prog->aux->attach_func_proto is "void". Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200109003503.3855825-1-kafai@fb.com --- include/linux/bpf.h | 30 ++++++++++++++++++++++++++++++ include/linux/bpf_types.h | 4 ++++ include/linux/btf.h | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b14e51d56a82..50f3b20ae284 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -349,6 +349,10 @@ struct bpf_verifier_ops { const struct bpf_insn *src, struct bpf_insn *dst, struct bpf_prog *prog, u32 *target_size); + int (*btf_struct_access)(struct bpf_verifier_log *log, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, + u32 *next_btf_id); }; struct bpf_prog_offload_ops { @@ -668,6 +672,32 @@ struct bpf_array_aux { struct work_struct work; }; +struct btf_type; +struct btf_member; + +#define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64 +struct bpf_struct_ops { + const struct bpf_verifier_ops *verifier_ops; + int (*init)(struct btf *btf); + int (*check_member)(const struct btf_type *t, + const struct btf_member *member); + const struct btf_type *type; + const char *name; + struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS]; + u32 type_id; +}; + +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id); +void bpf_struct_ops_init(struct btf *btf); +#else +static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) +{ + return NULL; +} +static inline void bpf_struct_ops_init(struct btf *btf) { } +#endif + struct bpf_array { struct bpf_map map; u32 elem_size; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 93740b3614d7..fadd243ffa2d 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -65,6 +65,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2, BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport, struct sk_reuseport_md, struct sk_reuseport_kern) #endif +#if defined(CONFIG_BPF_JIT) +BPF_PROG_TYPE(BPF_PROG_TYPE_STRUCT_OPS, bpf_struct_ops, + void *, void *) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/btf.h b/include/linux/btf.h index 79d4abc2556a..f74a09a7120b 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -53,6 +53,18 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, u32 expected_offset, u32 expected_size); int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); bool btf_type_is_void(const struct btf_type *t); +s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); +const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, + u32 id, u32 *res_id); +const struct btf_type *btf_type_resolve_ptr(const struct btf *btf, + u32 id, u32 *res_id); +const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf, + u32 id, u32 *res_id); + +#define for_each_member(i, struct_type, member) \ + for (i = 0, member = btf_type_member(struct_type); \ + i < btf_type_vlen(struct_type); \ + i++, member++) static inline bool btf_type_is_ptr(const struct btf_type *t) { @@ -84,6 +96,28 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO; } +static inline u16 btf_type_vlen(const struct btf_type *t) +{ + return BTF_INFO_VLEN(t->info); +} + +static inline bool btf_type_kflag(const struct btf_type *t) +{ + return BTF_INFO_KFLAG(t->info); +} + +static inline u32 btf_member_bitfield_size(const struct btf_type *struct_type, + const struct btf_member *member) +{ + return btf_type_kflag(struct_type) ? BTF_MEMBER_BITFIELD_SIZE(member->offset) + : 0; +} + +static inline const struct btf_member *btf_type_member(const struct btf_type *t) +{ + return (const struct btf_member *)(t + 1); +} + #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); -- cgit v1.2.3 From 85d33df357b634649ddbe0a20fd2d0fc5732c3cb Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 8 Jan 2020 16:35:05 -0800 Subject: bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS The patch introduces BPF_MAP_TYPE_STRUCT_OPS. The map value is a kernel struct with its func ptr implemented in bpf prog. This new map is the interface to register/unregister/introspect a bpf implemented kernel struct. The kernel struct is actually embedded inside another new struct (or called the "value" struct in the code). For example, "struct tcp_congestion_ops" is embbeded in: struct bpf_struct_ops_tcp_congestion_ops { refcount_t refcnt; enum bpf_struct_ops_state state; struct tcp_congestion_ops data; /* <-- kernel subsystem struct here */ } The map value is "struct bpf_struct_ops_tcp_congestion_ops". The "bpftool map dump" will then be able to show the state ("inuse"/"tobefree") and the number of subsystem's refcnt (e.g. number of tcp_sock in the tcp_congestion_ops case). This "value" struct is created automatically by a macro. Having a separate "value" struct will also make extending "struct bpf_struct_ops_XYZ" easier (e.g. adding "void (*init)(void)" to "struct bpf_struct_ops_XYZ" to do some initialization works before registering the struct_ops to the kernel subsystem). The libbpf will take care of finding and populating the "struct bpf_struct_ops_XYZ" from "struct XYZ". Register a struct_ops to a kernel subsystem: 1. Load all needed BPF_PROG_TYPE_STRUCT_OPS prog(s) 2. Create a BPF_MAP_TYPE_STRUCT_OPS with attr->btf_vmlinux_value_type_id set to the btf id "struct bpf_struct_ops_tcp_congestion_ops" of the running kernel. Instead of reusing the attr->btf_value_type_id, btf_vmlinux_value_type_id s added such that attr->btf_fd can still be used as the "user" btf which could store other useful sysadmin/debug info that may be introduced in the furture, e.g. creation-date/compiler-details/map-creator...etc. 3. Create a "struct bpf_struct_ops_tcp_congestion_ops" object as described in the running kernel btf. Populate the value of this object. The function ptr should be populated with the prog fds. 4. Call BPF_MAP_UPDATE with the object created in (3) as the map value. The key is always "0". During BPF_MAP_UPDATE, the code that saves the kernel-func-ptr's args as an array of u64 is generated. BPF_MAP_UPDATE also allows the specific struct_ops to do some final checks in "st_ops->init_member()" (e.g. ensure all mandatory func ptrs are implemented). If everything looks good, it will register this kernel struct to the kernel subsystem. The map will not allow further update from this point. Unregister a struct_ops from the kernel subsystem: BPF_MAP_DELETE with key "0". Introspect a struct_ops: BPF_MAP_LOOKUP_ELEM with key "0". The map value returned will have the prog _id_ populated as the func ptr. The map value state (enum bpf_struct_ops_state) will transit from: INIT (map created) => INUSE (map updated, i.e. reg) => TOBEFREE (map value deleted, i.e. unreg) The kernel subsystem needs to call bpf_struct_ops_get() and bpf_struct_ops_put() to manage the "refcnt" in the "struct bpf_struct_ops_XYZ". This patch uses a separate refcnt for the purose of tracking the subsystem usage. Another approach is to reuse the map->refcnt and then "show" (i.e. during map_lookup) the subsystem's usage by doing map->refcnt - map->usercnt to filter out the map-fd/pinned-map usage. However, that will also tie down the future semantics of map->refcnt and map->usercnt. The very first subsystem's refcnt (during reg()) holds one count to map->refcnt. When the very last subsystem's refcnt is gone, it will also release the map->refcnt. All bpf_prog will be freed when the map->refcnt reaches 0 (i.e. during map_free()). Here is how the bpftool map command will look like: [root@arch-fb-vm1 bpf]# bpftool map show 6: struct_ops name dctcp flags 0x0 key 4B value 256B max_entries 1 memlock 4096B btf_id 6 [root@arch-fb-vm1 bpf]# bpftool map dump id 6 [{ "value": { "refcnt": { "refs": { "counter": 1 } }, "state": 1, "data": { "list": { "next": 0, "prev": 0 }, "key": 0, "flags": 2, "init": 24, "release": 0, "ssthresh": 25, "cong_avoid": 30, "set_state": 27, "cwnd_event": 28, "in_ack_event": 26, "undo_cwnd": 29, "pkts_acked": 0, "min_tso_segs": 0, "sndbuf_expand": 0, "cong_control": 0, "get_info": 0, "name": [98,112,102,95,100,99,116,99,112,0,0,0,0,0,0,0 ], "owner": 0 } } } ] Misc Notes: * bpf_struct_ops_map_sys_lookup_elem() is added for syscall lookup. It does an inplace update on "*value" instead returning a pointer to syscall.c. Otherwise, it needs a separate copy of "zero" value for the BPF_STRUCT_OPS_STATE_INIT to avoid races. * The bpf_struct_ops_map_delete_elem() is also called without preempt_disable() from map_delete_elem(). It is because the "->unreg()" may requires sleepable context, e.g. the "tcp_unregister_congestion_control()". * "const" is added to some of the existing "struct btf_func_model *" function arg to avoid a compiler warning caused by this patch. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200109003505.3855919-1-kafai@fb.com --- include/linux/bpf.h | 49 +++++++++++++++++++++++++++++++++++++++++++++-- include/linux/bpf_types.h | 3 +++ include/linux/btf.h | 13 +++++++++++++ 3 files changed, 63 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 50f3b20ae284..a7bfe8a388c6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -17,6 +17,7 @@ #include #include #include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -106,6 +107,7 @@ struct bpf_map { struct btf *btf; struct bpf_map_memory memory; char name[BPF_OBJ_NAME_LEN]; + u32 btf_vmlinux_value_type_id; bool unpriv_array; bool frozen; /* write-once; write-protected by freeze_mutex */ /* 22 bytes hole */ @@ -183,7 +185,8 @@ static inline bool bpf_map_offload_neutral(const struct bpf_map *map) static inline bool bpf_map_support_seq_show(const struct bpf_map *map) { - return map->btf && map->ops->map_seq_show_elem; + return (map->btf_value_type_id || map->btf_vmlinux_value_type_id) && + map->ops->map_seq_show_elem; } int map_check_no_btf(const struct bpf_map *map, @@ -441,7 +444,8 @@ struct btf_func_model { * fentry = a set of program to run before calling original function * fexit = a set of program to run after original function */ -int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags, +int arch_prepare_bpf_trampoline(void *image, void *image_end, + const struct btf_func_model *m, u32 flags, struct bpf_prog **fentry_progs, int fentry_cnt, struct bpf_prog **fexit_progs, int fexit_cnt, void *orig_call); @@ -672,6 +676,7 @@ struct bpf_array_aux { struct work_struct work; }; +struct bpf_struct_ops_value; struct btf_type; struct btf_member; @@ -681,21 +686,61 @@ struct bpf_struct_ops { int (*init)(struct btf *btf); int (*check_member)(const struct btf_type *t, const struct btf_member *member); + int (*init_member)(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata); + int (*reg)(void *kdata); + void (*unreg)(void *kdata); const struct btf_type *type; + const struct btf_type *value_type; const char *name; struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS]; u32 type_id; + u32 value_id; }; #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +#define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id); void bpf_struct_ops_init(struct btf *btf); +bool bpf_struct_ops_get(const void *kdata); +void bpf_struct_ops_put(const void *kdata); +int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, + void *value); +static inline bool bpf_try_module_get(const void *data, struct module *owner) +{ + if (owner == BPF_MODULE_OWNER) + return bpf_struct_ops_get(data); + else + return try_module_get(owner); +} +static inline void bpf_module_put(const void *data, struct module *owner) +{ + if (owner == BPF_MODULE_OWNER) + bpf_struct_ops_put(data); + else + module_put(owner); +} #else static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) { return NULL; } static inline void bpf_struct_ops_init(struct btf *btf) { } +static inline bool bpf_try_module_get(const void *data, struct module *owner) +{ + return try_module_get(owner); +} +static inline void bpf_module_put(const void *data, struct module *owner) +{ + module_put(owner); +} +static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, + void *key, + void *value) +{ + return -EINVAL; +} #endif struct bpf_array { diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index fadd243ffa2d..9f326e6ef885 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -109,3 +109,6 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) +#if defined(CONFIG_BPF_JIT) +BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) +#endif diff --git a/include/linux/btf.h b/include/linux/btf.h index f74a09a7120b..881e9b76ef49 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -7,6 +7,8 @@ #include #include +#define BTF_TYPE_EMIT(type) ((void)(type *)0) + struct btf; struct btf_member; struct btf_type; @@ -60,6 +62,10 @@ const struct btf_type *btf_type_resolve_ptr(const struct btf *btf, u32 id, u32 *res_id); const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf, u32 id, u32 *res_id); +const struct btf_type * +btf_resolve_size(const struct btf *btf, const struct btf_type *type, + u32 *type_size, const struct btf_type **elem_type, + u32 *total_nelems); #define for_each_member(i, struct_type, member) \ for (i = 0, member = btf_type_member(struct_type); \ @@ -106,6 +112,13 @@ static inline bool btf_type_kflag(const struct btf_type *t) return BTF_INFO_KFLAG(t->info); } +static inline u32 btf_member_bit_offset(const struct btf_type *struct_type, + const struct btf_member *member) +{ + return btf_type_kflag(struct_type) ? BTF_MEMBER_BIT_OFFSET(member->offset) + : member->offset; +} + static inline u32 btf_member_bitfield_size(const struct btf_type *struct_type, const struct btf_member *member) { -- cgit v1.2.3 From 0baf26b0fcd74bbfcef53c5d5e8bad2b99c8d0d2 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 8 Jan 2020 16:35:08 -0800 Subject: bpf: tcp: Support tcp_congestion_ops in bpf This patch makes "struct tcp_congestion_ops" to be the first user of BPF STRUCT_OPS. It allows implementing a tcp_congestion_ops in bpf. The BPF implemented tcp_congestion_ops can be used like regular kernel tcp-cc through sysctl and setsockopt. e.g. [root@arch-fb-vm1 bpf]# sysctl -a | egrep congestion net.ipv4.tcp_allowed_congestion_control = reno cubic bpf_cubic net.ipv4.tcp_available_congestion_control = reno bic cubic bpf_cubic net.ipv4.tcp_congestion_control = bpf_cubic There has been attempt to move the TCP CC to the user space (e.g. CCP in TCP). The common arguments are faster turn around, get away from long-tail kernel versions in production...etc, which are legit points. BPF has been the continuous effort to join both kernel and userspace upsides together (e.g. XDP to gain the performance advantage without bypassing the kernel). The recent BPF advancements (in particular BTF-aware verifier, BPF trampoline, BPF CO-RE...) made implementing kernel struct ops (e.g. tcp cc) possible in BPF. It allows a faster turnaround for testing algorithm in the production while leveraging the existing (and continue growing) BPF feature/framework instead of building one specifically for userspace TCP CC. This patch allows write access to a few fields in tcp-sock (in bpf_tcp_ca_btf_struct_access()). The optional "get_info" is unsupported now. It can be added later. One possible way is to output the info with a btf-id to describe the content. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200109003508.3856115-1-kafai@fb.com --- include/linux/filter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 70e6dd960bca..a366a0b64a57 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -843,6 +843,8 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, bpf_aux_classic_check_t trans, bool save_orig); void bpf_prog_destroy(struct bpf_prog *fp); +const struct bpf_func_proto * +bpf_base_func_proto(enum bpf_func_id func_id); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); -- cgit v1.2.3 From ea78979d302f7de9bbd59f9dafdb070ecb05ec39 Mon Sep 17 00:00:00 2001 From: Lakshmi Ramasubramanian Date: Wed, 8 Jan 2020 08:05:08 -0800 Subject: IMA: fix measuring asymmetric keys Kconfig As a result of the asymmetric public keys subtype Kconfig option being defined as tristate, with the existing IMA Makefile, ima_asymmetric_keys.c could be built as a kernel module. To prevent this from happening, this patch defines and uses an intermediate Kconfig boolean option named IMA_MEASURE_ASYMMETRIC_KEYS. Signed-off-by: Lakshmi Ramasubramanian Suggested-by: James.Bottomley Cc: David Howells Cc: Jarkko Sakkinen Reported-by: kbuild test robot # ima_asymmetric_keys.c is built as a kernel module. Fixes: 88e70da170e8 ("IMA: Define an IMA hook to measure keys") Fixes: cb1aa3823c92 ("KEYS: Call the IMA hook to measure keys") [zohar@linux.ibm.com: updated patch description] Signed-off-by: Mimi Zohar --- include/linux/ima.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 3b89136bc218..f4644c54f648 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -101,7 +101,7 @@ static inline void ima_add_kexec_buffer(struct kimage *image) {} #endif -#if defined(CONFIG_IMA) && defined(CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE) +#ifdef CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS extern void ima_post_key_create_or_update(struct key *keyring, struct key *key, const void *payload, size_t plen, @@ -113,7 +113,7 @@ static inline void ima_post_key_create_or_update(struct key *keyring, size_t plen, unsigned long flags, bool create) {} -#endif /* CONFIG_IMA && CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE */ +#endif /* CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS */ #ifdef CONFIG_IMA_APPRAISE extern bool is_ima_appraise_enabled(void); -- cgit v1.2.3 From 3ee17bc78e0f3fdeff9890993e8f3a9f5145163b Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 9 Jan 2020 07:59:19 -0800 Subject: mptcp: Add MPTCP to skb extensions Add enum value for MPTCP and update config dependencies v5 -> v6: - fixed '__unused' field size Co-developed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 64e5b1be9ff5..f5c27600b410 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4096,6 +4096,9 @@ enum skb_ext_id { #endif #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) TC_SKB_EXT, +#endif +#if IS_ENABLED(CONFIG_MPTCP) + SKB_EXT_MPTCP, #endif SKB_EXT_NUM, /* must be last */ }; -- cgit v1.2.3 From 8b69a803814bb8b14155ea60df83f6d57527e69e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 9 Jan 2020 07:59:24 -0800 Subject: skb: add helpers to allocate ext independently from sk_buff Currently we can allocate the extension only after the skb, this change allows the user to do the opposite, will simplify allocation failure handling from MPTCP. Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f5c27600b410..016b3c4ab99a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4120,6 +4120,9 @@ struct skb_ext { char data[0] __aligned(8); }; +struct skb_ext *__skb_ext_alloc(void); +void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id, + struct skb_ext *ext); void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id); void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id); void __skb_ext_put(struct skb_ext *ext); -- cgit v1.2.3 From 51c39bb1d5d105a02e29aa7960f0a395086e6342 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 9 Jan 2020 22:41:20 -0800 Subject: bpf: Introduce function-by-function verification New llvm and old llvm with libbpf help produce BTF that distinguish global and static functions. Unlike arguments of static function the arguments of global functions cannot be removed or optimized away by llvm. The compiler has to use exactly the arguments specified in a function prototype. The argument type information allows the verifier validate each global function independently. For now only supported argument types are pointer to context and scalars. In the future pointers to structures, sizes, pointer to packet data can be supported as well. Consider the following example: static int f1(int ...) { ... } int f3(int b); int f2(int a) { f1(a) + f3(a); } int f3(int b) { ... } int main(...) { f1(...) + f2(...) + f3(...); } The verifier will start its safety checks from the first global function f2(). It will recursively descend into f1() because it's static. Then it will check that arguments match for the f3() invocation inside f2(). It will not descend into f3(). It will finish f2() that has to be successfully verified for all possible values of 'a'. Then it will proceed with f3(). That function also has to be safe for all possible values of 'b'. Then it will start subprog 0 (which is main() function). It will recursively descend into f1() and will skip full check of f2() and f3(), since they are global. The order of processing global functions doesn't affect safety, since all global functions must be proven safe based on their arguments only. Such function by function verification can drastically improve speed of the verification and reduce complexity. Note that the stack limit of 512 still applies to the call chain regardless whether functions were static or global. The nested level of 8 also still applies. The same recursion prevention checks are in place as well. The type information and static/global kind is preserved after the verification hence in the above example global function f2() and f3() can be replaced later by equivalent functions with the same types that are loaded and verified later without affecting safety of this main() program. Such replacement (re-linking) of global functions is a subject of future patches. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200110064124.1760511-3-ast@kernel.org --- include/linux/bpf.h | 7 ++++++- include/linux/bpf_verifier.h | 10 ++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a7bfe8a388c6..aed2bc39d72b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -566,6 +566,7 @@ static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, #endif struct bpf_func_info_aux { + u16 linkage; bool unreliable; }; @@ -1081,7 +1082,11 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, const char *func_name, struct btf_func_model *m); -int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog); +struct bpf_reg_state; +int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, + struct bpf_reg_state *regs); +int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, + struct bpf_reg_state *reg); struct bpf_prog *bpf_prog_by_id(u32 id); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 26e40de9ef55..5406e6e96585 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -304,11 +304,13 @@ struct bpf_insn_aux_data { u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ int sanitize_stack_off; /* stack slot to be cleared */ - bool seen; /* this insn was processed by the verifier */ + u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ bool zext_dst; /* this insn zero extends dst reg */ u8 alu_state; /* used in combination with alu_limit */ - bool prune_point; + + /* below fields are initialized once */ unsigned int orig_idx; /* original instruction index */ + bool prune_point; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ @@ -379,6 +381,7 @@ struct bpf_verifier_env { int *insn_stack; int cur_stack; } cfg; + u32 pass_cnt; /* number of times do_check() was called */ u32 subprog_cnt; /* number of instructions analyzed by the verifier */ u32 prev_insn_processed, insn_processed; @@ -428,4 +431,7 @@ bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, void bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); +int check_ctx_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno); + #endif /* _LINUX_BPF_VERIFIER_H */ -- cgit v1.2.3 From 89ed486532c4d155565cc4b7984a918ee3c58f80 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:38 +0100 Subject: efi/x86: Avoid redundant cast of EFI firmware service pointer All EFI firmware call prototypes have been annotated as __efiapi, permitting us to attach attributes regarding the calling convention by overriding __efiapi to an architecture specific value. On 32-bit x86, EFI firmware calls use the plain calling convention where all arguments are passed via the stack, and cleaned up by the caller. Let's add this to the __efiapi definition so we no longer need to cast the function pointers before invoking them. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-6-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 726673e98990..952c1659dfd9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -48,8 +48,10 @@ typedef u16 efi_char16_t; /* UNICODE character */ typedef u64 efi_physical_addr_t; typedef void *efi_handle_t; -#ifdef CONFIG_X86_64 +#if defined(CONFIG_X86_64) #define __efiapi __attribute__((ms_abi)) +#elif defined(CONFIG_X86_32) +#define __efiapi __attribute__((regparm(0))) #else #define __efiapi #endif -- cgit v1.2.3 From 33b85447fa61946b94fea93dd4bc24772af14d54 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:44 +0100 Subject: efi/x86: Drop two near identical versions of efi_runtime_init() The routines efi_runtime_init32() and efi_runtime_init64() are almost indistinguishable, and the only relevant difference is the offset in the runtime struct from where to obtain the physical address of the SetVirtualAddressMap() routine. However, this address is only used once, when installing the virtual address map that the OS will use to invoke EFI runtime services, and at the time of the call, we will necessarily be running with a 1:1 mapping, and so there is no need to do the map/unmap dance here to retrieve the address. In fact, in the preceding changes to these users, we stopped using the address recorded here entirely. So let's just get rid of all this code since it no longer serves a purpose. While at it, tweak the logic so that we handle unsupported and disable EFI runtime services in the same way, and unmap the EFI memory map in both cases. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-12-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 952c1659dfd9..ee68ea6f85ff 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -539,24 +539,6 @@ typedef struct { u32 query_variable_info; } efi_runtime_services_32_t; -typedef struct { - efi_table_hdr_t hdr; - u64 get_time; - u64 set_time; - u64 get_wakeup_time; - u64 set_wakeup_time; - u64 set_virtual_address_map; - u64 convert_pointer; - u64 get_variable; - u64 get_next_variable; - u64 set_variable; - u64 get_next_high_mono_count; - u64 reset_system; - u64 update_capsule; - u64 query_capsule_caps; - u64 query_variable_info; -} efi_runtime_services_64_t; - typedef efi_status_t efi_get_time_t (efi_time_t *tm, efi_time_cap_t *tc); typedef efi_status_t efi_set_time_t (efi_time_t *tm); typedef efi_status_t efi_get_wakeup_time_t (efi_bool_t *enabled, efi_bool_t *pending, @@ -946,7 +928,6 @@ extern struct efi { efi_query_capsule_caps_t *query_capsule_caps; efi_get_next_high_mono_count_t *get_next_high_mono_count; efi_reset_system_t *reset_system; - efi_set_virtual_address_map_t *set_virtual_address_map; struct efi_memory_map memmap; unsigned long flags; } efi; -- cgit v1.2.3 From 4444f8541dad16fefd9b8807ad1451e806ef1d94 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 3 Jan 2020 12:39:50 +0100 Subject: efi: Allow disabling PCI busmastering on bridges during boot Add an option to disable the busmaster bit in the control register on all PCI bridges before calling ExitBootServices() and passing control to the runtime kernel. System firmware may configure the IOMMU to prevent malicious PCI devices from being able to attack the OS via DMA. However, since firmware can't guarantee that the OS is IOMMU-aware, it will tear down IOMMU configuration when ExitBootServices() is called. This leaves a window between where a hostile device could still cause damage before Linux configures the IOMMU again. If CONFIG_EFI_DISABLE_PCI_DMA is enabled or "efi=disable_early_pci_dma" is passed on the command line, the EFI stub will clear the busmaster bit on all PCI bridges before ExitBootServices() is called. This will prevent any malicious PCI devices from being able to perform DMA until the kernel reenables busmastering after configuring the IOMMU. This option may cause failures with some poorly behaved hardware and should not be enabled without testing. The kernel commandline options "efi=disable_early_pci_dma" or "efi=no_disable_early_pci_dma" may be used to override the default. Note that PCI devices downstream from PCI bridges are disconnected from their drivers first, using the UEFI driver model API, so that DMA can be disabled safely at the bridge level. [ardb: disconnect PCI I/O handles first, as suggested by Arvind] Co-developed-by: Matthew Garrett Signed-off-by: Matthew Garrett Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-18-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index ee68ea6f85ff..7e8e25b1d11c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -319,7 +319,9 @@ typedef union { void *stall; void *set_watchdog_timer; void *connect_controller; - void *disconnect_controller; + efi_status_t (__efiapi *disconnect_controller)(efi_handle_t, + efi_handle_t, + efi_handle_t); void *open_protocol; void *close_protocol; void *open_protocol_information; @@ -1692,4 +1694,6 @@ struct linux_efi_memreserve { #define EFI_MEMRESERVE_COUNT(size) (((size) - sizeof(struct linux_efi_memreserve)) \ / sizeof(((struct linux_efi_memreserve *)0)->entry[0])) +void efi_pci_disable_bridge_busmaster(void); + #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From 90fbca5952436e7817910b33eb4464ddd77a8964 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 12 Dec 2019 13:09:24 +0200 Subject: net/mlx5: Add Virtio Emulation related device capabilities Add Virtio Emulation related fields to the device capabilities. It includes a general bit to indicate whether Virtio Emulation is supported and the capabilities structure itself. Signed-off-by: Yishai Hadas Reviewed-by: Shahaf Shuler Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5d54fccf87fc..c6abaf4f1c55 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -87,6 +87,7 @@ enum { enum { MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM = (1ULL << MLX5_OBJ_TYPE_SW_ICM), MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT = (1ULL << 11), + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q = (1ULL << 13), }; enum { @@ -953,6 +954,19 @@ struct mlx5_ifc_device_event_cap_bits { u8 user_unaffiliated_events[4][0x40]; }; +struct mlx5_ifc_device_virtio_emulation_cap_bits { + u8 reserved_at_0[0x20]; + + u8 reserved_at_20[0x13]; + u8 log_doorbell_stride[0x5]; + u8 reserved_at_38[0x3]; + u8 log_doorbell_bar_size[0x5]; + + u8 doorbell_bar_offset[0x40]; + + u8 reserved_at_80[0x780]; +}; + enum { MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE = 0x0, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES = 0x2, @@ -2751,6 +2765,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_fpga_cap_bits fpga_cap; struct mlx5_ifc_tls_cap_bits tls_cap; struct mlx5_ifc_device_mem_cap_bits device_mem_cap; + struct mlx5_ifc_device_virtio_emulation_cap_bits virtio_emulation_cap; u8 reserved_at_0[0x8000]; }; -- cgit v1.2.3 From ca1992c62cadb6c8e1e1b47e197b550f3cd89b76 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 12 Dec 2019 13:09:25 +0200 Subject: net/mlx5: Expose vDPA emulation device capabilities Expose vDPA emulation device capabilities from the core layer. It includes reading the capabilities from the firmware and exposing helper functions to access the data. Signed-off-by: Yishai Hadas Reviewed-by: Shahaf Shuler Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index cc1c230f10ee..1a1c53f0262d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1105,6 +1105,7 @@ enum mlx5_cap_type { MLX5_CAP_DEV_MEM, MLX5_CAP_RESERVED_16, MLX5_CAP_TLS, + MLX5_CAP_VDPA_EMULATION = 0x13, MLX5_CAP_DEV_EVENT = 0x14, /* NUM OF CAP Types */ MLX5_CAP_NUM @@ -1297,6 +1298,14 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_DEV_EVENT(mdev, cap)\ MLX5_ADDR_OF(device_event_cap, (mdev)->caps.hca_cur[MLX5_CAP_DEV_EVENT], cap) +#define MLX5_CAP_DEV_VDPA_EMULATION(mdev, cap)\ + MLX5_GET(device_virtio_emulation_cap, \ + (mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap) + +#define MLX5_CAP64_DEV_VDPA_EMULATION(mdev, cap)\ + MLX5_GET64(device_virtio_emulation_cap, \ + (mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, -- cgit v1.2.3 From 860c8802ace14c646864795e057349c9fb2d60ad Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 9 Nov 2019 09:42:13 -0800 Subject: rcu: Use WRITE_ONCE() for assignments to ->pprev for hlist_nulls Eric Dumazet supplied a KCSAN report of a bug that forces use of hlist_unhashed_lockless() from sk_unhashed(): ------------------------------------------------------------------------ BUG: KCSAN: data-race in inet_unhash / inet_unhash write to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 1: __hlist_nulls_del include/linux/list_nulls.h:88 [inline] hlist_nulls_del_init_rcu include/linux/rculist_nulls.h:36 [inline] __sk_nulls_del_node_init_rcu include/net/sock.h:676 [inline] inet_unhash+0x38f/0x4a0 net/ipv4/inet_hashtables.c:612 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 start_secondary+0x208/0x260 arch/x86/kernel/smpboot.c:264 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 read to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 0: sk_unhashed include/net/sock.h:607 [inline] inet_unhash+0x3d/0x4a0 net/ipv4/inet_hashtables.c:592 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 rest_init+0xec/0xf6 init/main.c:452 arch_call_rest_init+0x17/0x37 start_kernel+0x838/0x85e init/main.c:786 x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:490 x86_64_start_kernel+0x72/0x76 arch/x86/kernel/head64.c:471 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc6+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ------------------------------------------------------------------------ This commit therefore replaces C-language assignments with WRITE_ONCE() in include/linux/list_nulls.h and include/linux/rculist_nulls.h. Reported-by: Eric Dumazet # For KCSAN Signed-off-by: Paul E. McKenney --- include/linux/list_nulls.h | 8 ++++---- include/linux/rculist_nulls.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index 3ef96743db8d..1ecd35664e0d 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -72,10 +72,10 @@ static inline void hlist_nulls_add_head(struct hlist_nulls_node *n, struct hlist_nulls_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); h->first = n; if (!is_a_nulls(first)) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); } static inline void __hlist_nulls_del(struct hlist_nulls_node *n) @@ -85,13 +85,13 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n) WRITE_ONCE(*pprev, next); if (!is_a_nulls(next)) - next->pprev = pprev; + WRITE_ONCE(next->pprev, pprev); } static inline void hlist_nulls_del(struct hlist_nulls_node *n) { __hlist_nulls_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index bc8206a8f30e..517a06f36c7a 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -34,7 +34,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) { if (!hlist_nulls_unhashed(n)) { __hlist_nulls_del(n); - n->pprev = NULL; + WRITE_ONCE(n->pprev, NULL); } } @@ -66,7 +66,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) { __hlist_nulls_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** @@ -94,10 +94,10 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, struct hlist_nulls_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); } /** -- cgit v1.2.3 From 46deb7449d99f37bebf5cbd7f95c136c6fafeaa5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 9 Nov 2019 10:35:13 -0800 Subject: rcu: Add and update docbook header comments in list.h [ paulmck: Fix typo found by kbuild test robot. ] Signed-off-by: Paul E. McKenney --- include/linux/list.h | 112 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 95 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 61f5aaf96192..4f3b7f71bdfd 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -23,6 +23,13 @@ #define LIST_HEAD(name) \ struct list_head name = LIST_HEAD_INIT(name) +/** + * INIT_LIST_HEAD - Initialize a list_head structure + * @list: list_head structure to be initialized. + * + * Initializes the list_head to point to itself. If it is a list header, + * the result is an empty list. + */ static inline void INIT_LIST_HEAD(struct list_head *list) { WRITE_ONCE(list->next, list); @@ -120,12 +127,6 @@ static inline void __list_del_clearprev(struct list_head *entry) entry->prev = NULL; } -/** - * list_del - deletes entry from list. - * @entry: the element to delete from the list. - * Note: list_empty() on entry does not return true after this, the entry is - * in an undefined state. - */ static inline void __list_del_entry(struct list_head *entry) { if (!__list_del_entry_valid(entry)) @@ -134,6 +135,12 @@ static inline void __list_del_entry(struct list_head *entry) __list_del(entry->prev, entry->next); } +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty() on entry does not return true after this, the entry is + * in an undefined state. + */ static inline void list_del(struct list_head *entry) { __list_del_entry(entry); @@ -157,8 +164,15 @@ static inline void list_replace(struct list_head *old, new->prev->next = new; } +/** + * list_replace_init - replace old entry by new one and initialize the old one + * @old : the element to be replaced + * @new : the new element to insert + * + * If @old was empty, it will be overwritten. + */ static inline void list_replace_init(struct list_head *old, - struct list_head *new) + struct list_head *new) { list_replace(old, new); INIT_LIST_HEAD(old); @@ -744,21 +758,36 @@ static inline void INIT_HLIST_NODE(struct hlist_node *h) h->pprev = NULL; } +/** + * hlist_unhashed - Has node been removed from list and reinitialized? + * @h: Node to be checked + * + * Not that not all removal functions will leave a node in unhashed + * state. For example, hlist_nulls_del_init_rcu() does leave the + * node in unhashed state, but hlist_nulls_del() does not. + */ static inline int hlist_unhashed(const struct hlist_node *h) { return !h->pprev; } -/* This variant of hlist_unhashed() must be used in lockless contexts - * to avoid potential load-tearing. - * The READ_ONCE() is paired with the various WRITE_ONCE() in hlist - * helpers that are defined below. +/** + * hlist_unhashed_lockless - Version of hlist_unhashed for lockless use + * @h: Node to be checked + * + * This variant of hlist_unhashed() must be used in lockless contexts + * to avoid potential load-tearing. The READ_ONCE() is paired with the + * various WRITE_ONCE() in hlist helpers that are defined below. */ static inline int hlist_unhashed_lockless(const struct hlist_node *h) { return !READ_ONCE(h->pprev); } +/** + * hlist_empty - Is the specified hlist_head structure an empty hlist? + * @h: Structure to check. + */ static inline int hlist_empty(const struct hlist_head *h) { return !READ_ONCE(h->first); @@ -774,6 +803,13 @@ static inline void __hlist_del(struct hlist_node *n) WRITE_ONCE(next->pprev, pprev); } +/** + * hlist_del - Delete the specified hlist_node from its list + * @n: Node to delete. + * + * Note that this function leaves the node in hashed state. Use + * hlist_del_init() or similar instead to unhash @n. + */ static inline void hlist_del(struct hlist_node *n) { __hlist_del(n); @@ -781,6 +817,12 @@ static inline void hlist_del(struct hlist_node *n) n->pprev = LIST_POISON2; } +/** + * hlist_del_init - Delete the specified hlist_node from its list and initialize + * @n: Node to delete. + * + * Note that this function leaves the node in unhashed state. + */ static inline void hlist_del_init(struct hlist_node *n) { if (!hlist_unhashed(n)) { @@ -789,6 +831,14 @@ static inline void hlist_del_init(struct hlist_node *n) } } +/** + * hlist_add_head - add a new entry at the beginning of the hlist + * @n: new entry to be added + * @h: hlist head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) { struct hlist_node *first = h->first; @@ -799,9 +849,13 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) WRITE_ONCE(n->pprev, &h->first); } -/* next must be != NULL */ +/** + * hlist_add_before - add a new entry before the one specified + * @n: new entry to be added + * @next: hlist node to add it before, which must be non-NULL + */ static inline void hlist_add_before(struct hlist_node *n, - struct hlist_node *next) + struct hlist_node *next) { WRITE_ONCE(n->pprev, next->pprev); WRITE_ONCE(n->next, next); @@ -809,6 +863,11 @@ static inline void hlist_add_before(struct hlist_node *n, WRITE_ONCE(*(n->pprev), n); } +/** + * hlist_add_behing - add a new entry after the one specified + * @n: new entry to be added + * @prev: hlist node to add it after, which must be non-NULL + */ static inline void hlist_add_behind(struct hlist_node *n, struct hlist_node *prev) { @@ -820,20 +879,35 @@ static inline void hlist_add_behind(struct hlist_node *n, WRITE_ONCE(n->next->pprev, &n->next); } -/* after that we'll appear to be on some hlist and hlist_del will work */ +/** + * hlist_add_fake - create a fake hlist consisting of a single headless node + * @n: Node to make a fake list out of + * + * This makes @n appear to be its own predecessor on a headless hlist. + * The point of this is to allow things like hlist_del() to work correctly + * in cases where there is no list. + */ static inline void hlist_add_fake(struct hlist_node *n) { n->pprev = &n->next; } +/** + * hlist_fake: Is this node a fake hlist? + * @h: Node to check for being a self-referential fake hlist. + */ static inline bool hlist_fake(struct hlist_node *h) { return h->pprev == &h->next; } -/* +/** + * hlist_is_singular_node - is node the only element of the specified hlist? + * @n: Node to check for singularity. + * @h: Header for potentially singular list. + * * Check whether the node is the only node of the head without - * accessing head: + * accessing head, thus avoiding unnecessary cache misses. */ static inline bool hlist_is_singular_node(struct hlist_node *n, struct hlist_head *h) @@ -841,7 +915,11 @@ hlist_is_singular_node(struct hlist_node *n, struct hlist_head *h) return !n->next && n->pprev == &h->first; } -/* +/** + * hlist_move_list - Move an hlist + * @old: hlist_head for old list. + * @new: hlist_head for new list. + * * Move a list from one list head to another. Fixup the pprev * reference of the first entry if it exists. */ -- cgit v1.2.3 From 02b99b38f3d96c77cf0a368d99952aa372dfe58a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 9 Nov 2019 10:45:47 -0800 Subject: rcu: Add a hlist_nulls_unhashed_lockless() function This commit adds an hlist_nulls_unhashed_lockless() to allow lockless checking for whether or note an hlist_nulls_node is hashed or not. While in the area, this commit also adds a docbook comment to the existing hlist_nulls_unhashed() function. Signed-off-by: Paul E. McKenney --- include/linux/list_nulls.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index 1ecd35664e0d..fa6e8471bd22 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -56,11 +56,33 @@ static inline unsigned long get_nulls_value(const struct hlist_nulls_node *ptr) return ((unsigned long)ptr) >> 1; } +/** + * hlist_nulls_unhashed - Has node been removed and reinitialized? + * @h: Node to be checked + * + * Not that not all removal functions will leave a node in unhashed state. + * For example, hlist_del_init_rcu() leaves the node in unhashed state, + * but hlist_nulls_del() does not. + */ static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h) { return !h->pprev; } +/** + * hlist_nulls_unhashed_lockless - Has node been removed and reinitialized? + * @h: Node to be checked + * + * Not that not all removal functions will leave a node in unhashed state. + * For example, hlist_del_init_rcu() leaves the node in unhashed state, + * but hlist_nulls_del() does not. Unlike hlist_nulls_unhashed(), this + * function may be used locklessly. + */ +static inline int hlist_nulls_unhashed_lockless(const struct hlist_nulls_node *h) +{ + return !READ_ONCE(h->pprev); +} + static inline int hlist_nulls_empty(const struct hlist_nulls_head *h) { return is_a_nulls(READ_ONCE(h->first)); -- cgit v1.2.3 From 7f5d51e26a471f771b8dae1b9ef417f5fd5e9c85 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Thu, 5 Dec 2019 11:46:49 +0530 Subject: rculist_nulls: Add docbook comments This patch adds docbook comment headers for hlist_nulls_first_rcu() and hlist_nulls_next_rcu() in rculist_nulls.h. Signed-off-by: Madhuparna Bhowmik Signed-off-by: Paul E. McKenney --- include/linux/rculist_nulls.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 517a06f36c7a..25952c4f83b0 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -38,9 +38,17 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) } } +/** + * hlist_nulls_first_rcu - returns the first element of the hash list. + * @head: the head of the list. + */ #define hlist_nulls_first_rcu(head) \ (*((struct hlist_nulls_node __rcu __force **)&(head)->first)) +/** + * hlist_nulls_next_rcu - returns the element of the list after @node. + * @node: element of the list. + */ #define hlist_nulls_next_rcu(node) \ (*((struct hlist_nulls_node __rcu __force **)&(node)->next)) -- cgit v1.2.3 From 459b5287066f53c4b91569c070780a540de90b85 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Fri, 6 Dec 2019 00:23:52 +0530 Subject: rculist_nulls: Change docbook comment headers This patch changes the docbook comment "head for your list" to "head of the list". Signed-off-by: Madhuparna Bhowmik Signed-off-by: Paul E. McKenney --- include/linux/rculist_nulls.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 25952c4f83b0..409a86bb5f25 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -112,7 +112,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_nulls_node to use as a loop cursor. - * @head: the head for your list. + * @head: the head of the list. * @member: the name of the hlist_nulls_node within the struct. * * The barrier() is needed to make sure compiler doesn't cache first element [1], @@ -132,7 +132,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, * iterate over list of given type safe against removal of list entry * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_nulls_node to use as a loop cursor. - * @head: the head for your list. + * @head: the head of the list. * @member: the name of the hlist_nulls_node within the struct. */ #define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \ -- cgit v1.2.3 From afa47fdfa29ffd3324e7b89551d1a6e54ccc042b Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Mon, 9 Dec 2019 13:20:43 +0530 Subject: rculist.h: Add list_tail_rcu() This patch adds the macro list_tail_rcu() and documents it. Signed-off-by: Madhuparna Bhowmik [ paulmck: Reword a bit. ] Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 4b7ae1bf50b3..9f313e4999fe 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -40,6 +40,16 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) */ #define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) +/** + * list_tail_rcu - returns the prev pointer of the head of the list + * @head: the head of the list + * + * Note: This should only be used with the list header, and even then + * only if list_del() and similar primitives are not also used on the + * list header. + */ +#define list_tail_rcu(head) (*((struct list_head __rcu **)(&(head)->prev))) + /* * Check during list traversal that we are within an RCU reader */ -- cgit v1.2.3 From a442c2c3850dc308ab972f3d10d1077e2c8fd035 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Thu, 9 Jan 2020 11:23:17 -0800 Subject: mlx4: Bump up MAX_MSIX from 64 to 128 On modern hardware with a large number of cpus and using XDP, the current MSIX limit is insufficient. Bump the limit in order to allow more queues. Signed-off-by: Jonathan Lemon Reviewed-by: Jack Wang Reviewed-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- include/linux/mlx4/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 36e412c3d657..20372de0b587 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -47,7 +47,7 @@ #define DEFAULT_UAR_PAGE_SHIFT 12 #define MAX_MSIX_P_PORT 17 -#define MAX_MSIX 64 +#define MAX_MSIX 128 #define MIN_MSIX_P_PORT 5 #define MLX4_IS_LEGACY_EQ_MODE(dev_cap) ((dev_cap).num_comp_vectors < \ (dev_cap).num_ports * MIN_MSIX_P_PORT) -- cgit v1.2.3 From c74f16b6034401b17bb1cf549871186a8ece5f92 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 12 Jan 2020 13:04:43 +0100 Subject: wan: ixp4xx_hss: prepare compile testing The ixp4xx_hss driver needs the platform data definition and the system clock rate to be compiled. Move both into a new platform_data header file. This is a prerequisite for compile testing, but turning on compile testing requires further patches to isolate the SoC headers. Signed-off-by: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Jakub Kicinski --- include/linux/platform_data/wan_ixp4xx_hss.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 include/linux/platform_data/wan_ixp4xx_hss.h (limited to 'include/linux') diff --git a/include/linux/platform_data/wan_ixp4xx_hss.h b/include/linux/platform_data/wan_ixp4xx_hss.h new file mode 100644 index 000000000000..d525a0feb9e1 --- /dev/null +++ b/include/linux/platform_data/wan_ixp4xx_hss.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PLATFORM_DATA_WAN_IXP4XX_HSS_H +#define __PLATFORM_DATA_WAN_IXP4XX_HSS_H + +#include + +/* Information about built-in HSS (synchronous serial) interfaces */ +struct hss_plat_info { + int (*set_clock)(int port, unsigned int clock_type); + int (*open)(int port, void *pdev, + void (*set_carrier_cb)(void *pdev, int carrier)); + void (*close)(int port, void *pdev); + u8 txreadyq; + u32 timer_freq; +}; + +#endif -- cgit v1.2.3 From a41a5b26d29fb0123cd3290dca453857cd8c0c66 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 12 Jan 2020 13:04:45 +0100 Subject: ixp4xx_eth: move platform_data definition The platform data is needed to compile the driver as standalone, so move it to a global location along with similar files. Signed-off-by: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Jakub Kicinski --- include/linux/platform_data/eth_ixp4xx.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/platform_data/eth_ixp4xx.h (limited to 'include/linux') diff --git a/include/linux/platform_data/eth_ixp4xx.h b/include/linux/platform_data/eth_ixp4xx.h new file mode 100644 index 000000000000..6f652ea0c6ae --- /dev/null +++ b/include/linux/platform_data/eth_ixp4xx.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PLATFORM_DATA_ETH_IXP4XX +#define __PLATFORM_DATA_ETH_IXP4XX + +#include + +#define IXP4XX_ETH_NPEA 0x00 +#define IXP4XX_ETH_NPEB 0x10 +#define IXP4XX_ETH_NPEC 0x20 + +/* Information about built-in Ethernet MAC interfaces */ +struct eth_plat_info { + u8 phy; /* MII PHY ID, 0 - 31 */ + u8 rxq; /* configurable, currently 0 - 31 only */ + u8 txreadyq; + u8 hwaddr[6]; +}; + +#endif -- cgit v1.2.3 From 0eac8ce95bb386838121189b2aa2216cd070f143 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 13 Jan 2020 11:22:16 +0100 Subject: ptr_ring: add include of linux/mm.h Commit 0bf7800f1799 ("ptr_ring: try vmalloc() when kmalloc() fails") started to use kvmalloc_array and kvfree, which are defined in mm.h, the previous functions kcalloc and kfree, which are defined in slab.h. Add the missing include of linux/mm.h. This went unnoticed as other include files happened to include mm.h. Fixes: 0bf7800f1799 ("ptr_ring: try vmalloc() when kmalloc() fails") Signed-off-by: Jesper Dangaard Brouer Acked-by: Michael S. Tsirkin Signed-off-by: Jakub Kicinski --- include/linux/ptr_ring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 0abe9a4fc842..417db0a79a62 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #endif -- cgit v1.2.3 From 579a25a854d482bc9d0f9ab0e07ba32fb66bd9e3 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Mon, 13 Jan 2020 17:24:09 +0100 Subject: net: stmmac: Initial support for TBS Adds the initial hooks for TBS support. This needs a 32 byte descriptor in order for it to work with current HW. Adds all the logic for Enhanced Descriptors in main core but no HW related logic for now. Changes from v2: - Use bitfield for TBS status / support (Jakub) - Remove unneeded cache alignment (Jakub) - Fix checkpatch issues Signed-off-by: Jose Abreu Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 0531afa9b21e..19190c609282 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -139,6 +139,7 @@ struct stmmac_txq_cfg { u32 low_credit; bool use_prio; u32 prio; + int tbs_en; }; struct plat_stmmacenet_data { -- cgit v1.2.3 From e27f178793de16ca1b421f2c3f4bc3497b2ce723 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 12 Jan 2020 09:35:38 -0800 Subject: net: phy: Added IRQ print to phylink_bringup_phy() The information about the PHY attached to the PHYLINK instance is useful but is missing the IRQ prints that phy_attached_info() adds. phy_attached_info() is a bit long and it would not be possible to use phylink_info() anyway. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 5932bb8e9c35..3a70b756ac1a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1131,6 +1131,8 @@ static inline void phy_unlock_mdio_bus(struct phy_device *phydev) void phy_attached_print(struct phy_device *phydev, const char *fmt, ...) __printf(2, 3); +char *phy_attached_info_irq(struct phy_device *phydev) + __malloc; void phy_attached_info(struct phy_device *phydev); /* Clause 22 PHY */ -- cgit v1.2.3 From 76564261a7db80c5f5c624e0122a28787f266bdf Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 13 Jan 2020 23:31:40 +0100 Subject: net: macsec: introduce the macsec_context structure This patch introduces the macsec_context structure. It will be used in the kernel to exchange information between the common MACsec implementation (macsec.c) and the MACsec hardware offloading implementations. This structure contains pointers to MACsec specific structures which contain the actual MACsec configuration, and to the underlying device (phydev for now). Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 3a70b756ac1a..be079a7bb40a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -332,6 +332,8 @@ struct phy_c45_device_ids { u32 device_ids[8]; }; +struct macsec_context; + /* phy_device: An instance of a PHY * * drv: Pointer to the driver for this PHY instance -- cgit v1.2.3 From 2e18135845b359f26c37df38ba56565496517c10 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 13 Jan 2020 23:31:42 +0100 Subject: net: phy: add MACsec ops in phy_device This patch adds a reference to MACsec ops in the phy_device, to allow PHYs to support offloading MACsec operations. The phydev lock will be held while calling those helpers. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- include/linux/phy.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index be079a7bb40a..2929d0bc307f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -333,6 +333,7 @@ struct phy_c45_device_ids { }; struct macsec_context; +struct macsec_ops; /* phy_device: An instance of a PHY * @@ -356,6 +357,7 @@ struct macsec_context; * attached_dev: The attached enet driver's device instance ptr * adjust_link: Callback for the enet controller to respond to * changes in the link state. + * macsec_ops: MACsec offloading ops. * * speed, duplex, pause, supported, advertising, lp_advertising, * and autoneg are used like in mii_if_info @@ -455,6 +457,11 @@ struct phy_device { void (*phy_link_change)(struct phy_device *, bool up, bool do_carrier); void (*adjust_link)(struct net_device *dev); + +#if IS_ENABLED(CONFIG_MACSEC) + /* MACsec management functions */ + const struct macsec_ops *macsec_ops; +#endif }; #define to_phy_device(d) container_of(to_mdio_device(d), \ struct phy_device, mdio) -- cgit v1.2.3 From 5eee7bd7e245914e4e050c413dfe864e31805207 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 13 Jan 2020 18:42:26 -0500 Subject: net: skbuff: disambiguate argument and member for skb_list_walk_safe helper This worked before, because we made all callers name their next pointer "next". But in trying to be more "drop-in" ready, the silliness here is revealed. This commit fixes the problem by making the macro argument and the member use different names. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 016b3c4ab99a..aaf73b34f72f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1479,9 +1479,9 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb) } /* Iterate through singly-linked GSO fragments of an skb. */ -#define skb_list_walk_safe(first, skb, next) \ - for ((skb) = (first), (next) = (skb) ? (skb)->next : NULL; (skb); \ - (skb) = (next), (next) = (skb) ? (skb)->next : NULL) +#define skb_list_walk_safe(first, skb, next_skb) \ + for ((skb) = (first), (next_skb) = (skb) ? (skb)->next : NULL; (skb); \ + (skb) = (next_skb), (next_skb) = (skb) ? (skb)->next : NULL) static inline void skb_list_del_init(struct sk_buff *skb) { -- cgit v1.2.3 From fd39073dba8632575b920edefba2577e1b84262a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 6 Jan 2020 12:55:33 -0800 Subject: fs-verity: implement readahead of Merkle tree pages When fs-verity verifies data pages, currently it reads each Merkle tree page synchronously using read_mapping_page(). Therefore, when the Merkle tree pages aren't already cached, fs-verity causes an extra 4 KiB I/O request for every 512 KiB of data (assuming that the Merkle tree uses SHA-256 and 4 KiB blocks). This results in more I/O requests and performance loss than is strictly necessary. Therefore, implement readahead of the Merkle tree pages. For simplicity, we take advantage of the fact that the kernel already does readahead of the file's *data*, just like it does for any other file. Due to this, we don't really need a separate readahead state (struct file_ra_state) just for the Merkle tree, but rather we just need to piggy-back on the existing data readahead requests. We also only really need to bother with the first level of the Merkle tree, since the usual fan-out factor is 128, so normally over 99% of Merkle tree I/O requests are for the first level. Therefore, make fsverity_verify_bio() enable readahead of the first Merkle tree level, for up to 1/4 the number of pages in the bio, when it sees that the REQ_RAHEAD flag is set on the bio. The readahead size is then passed down to ->read_merkle_tree_page() for the filesystem to (optionally) implement if it sees that the requested page is uncached. While we're at it, also make build_merkle_tree_level() set the Merkle tree readahead size, since it's easy to do there. However, for now don't set the readahead size in fsverity_verify_page(), since currently it's only used to verify holes on ext4 and f2fs, and it would need parameters added to know how much to read ahead. This patch significantly improves fs-verity sequential read performance. Some quick benchmarks with 'cat'-ing a 250MB file after dropping caches: On an ARM64 phone (using sha256-ce): Before: 217 MB/s After: 263 MB/s (compare to sha256sum of non-verity file: 357 MB/s) In an x86_64 VM (using sha256-avx2): Before: 173 MB/s After: 215 MB/s (compare to sha256sum of non-verity file: 223 MB/s) Link: https://lore.kernel.org/r/20200106205533.137005-1-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Eric Biggers --- include/linux/fsverity.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 3b6b8ccebe7d..ecc604e61d61 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -77,6 +77,10 @@ struct fsverity_operations { * * @inode: the inode * @index: 0-based index of the page within the Merkle tree + * @num_ra_pages: The number of Merkle tree pages that should be + * prefetched starting at @index if the page at @index + * isn't already cached. Implementations may ignore this + * argument; it's only a performance optimization. * * This can be called at any time on an open verity file, as well as * between ->begin_enable_verity() and ->end_enable_verity(). It may be @@ -87,7 +91,8 @@ struct fsverity_operations { * Return: the page on success, ERR_PTR() on failure */ struct page *(*read_merkle_tree_page)(struct inode *inode, - pgoff_t index); + pgoff_t index, + unsigned long num_ra_pages); /** * Write a Merkle tree block to the given inode. -- cgit v1.2.3 From cb4d03ab499d4c040f4ab6fd4389d2b49f42b5a5 Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Wed, 15 Jan 2020 10:43:01 -0800 Subject: bpf: Add generic support for lookup batch op This commit introduces generic support for the bpf_map_lookup_batch. This implementation can be used by almost all the bpf maps since its core implementation is relying on the existing map_get_next_key and map_lookup_elem. The bpf syscall subcommand introduced is: BPF_MAP_LOOKUP_BATCH The UAPI attribute is: struct { /* struct used by BPF_MAP_*_BATCH commands */ __aligned_u64 in_batch; /* start batch, * NULL to start from beginning */ __aligned_u64 out_batch; /* output: next start batch */ __aligned_u64 keys; __aligned_u64 values; __u32 count; /* input/output: * input: # of key/value * elements * output: # of filled elements */ __u32 map_fd; __u64 elem_flags; __u64 flags; } batch; in_batch/out_batch are opaque values use to communicate between user/kernel space, in_batch/out_batch must be of key_size length. To start iterating from the beginning in_batch must be null, count is the # of key/value elements to retrieve. Note that the 'keys' buffer must be a buffer of key_size * count size and the 'values' buffer must be value_size * count, where value_size must be aligned to 8 bytes by userspace if it's dealing with percpu maps. 'count' will contain the number of keys/values successfully retrieved. Note that 'count' is an input/output variable and it can contain a lower value after a call. If there's no more entries to retrieve, ENOENT will be returned. If error is ENOENT, count might be > 0 in case it copied some values but there were no more entries to retrieve. Note that if the return code is an error and not -EFAULT, count indicates the number of elements successfully processed. Suggested-by: Stanislav Fomichev Signed-off-by: Brian Vazquez Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200115184308.162644-3-brianvv@google.com --- include/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index aed2bc39d72b..807744ecaa5a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -44,6 +44,8 @@ struct bpf_map_ops { int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); void (*map_release_uref)(struct bpf_map *map); void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key); + int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr, + union bpf_attr __user *uattr); /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); @@ -982,6 +984,9 @@ void *bpf_map_area_alloc(u64 size, int numa_node); void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); void bpf_map_area_free(void *base); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); +int generic_map_lookup_batch(struct bpf_map *map, + const union bpf_attr *attr, + union bpf_attr __user *uattr); extern int sysctl_unprivileged_bpf_disabled; -- cgit v1.2.3 From aa2e93b8e58e18442edfb2427446732415bc215e Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Wed, 15 Jan 2020 10:43:02 -0800 Subject: bpf: Add generic support for update and delete batch ops This commit adds generic support for update and delete batch ops that can be used for almost all the bpf maps. These commands share the same UAPI attr that lookup and lookup_and_delete batch ops use and the syscall commands are: BPF_MAP_UPDATE_BATCH BPF_MAP_DELETE_BATCH The main difference between update/delete and lookup batch ops is that for update/delete keys/values must be specified for userspace and because of that, neither in_batch nor out_batch are used. Suggested-by: Stanislav Fomichev Signed-off-by: Brian Vazquez Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200115184308.162644-4-brianvv@google.com --- include/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 807744ecaa5a..05466ad6cf1c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -46,6 +46,10 @@ struct bpf_map_ops { void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key); int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); + int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr, + union bpf_attr __user *uattr); + int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, + union bpf_attr __user *uattr); /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); @@ -987,6 +991,12 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); +int generic_map_update_batch(struct bpf_map *map, + const union bpf_attr *attr, + union bpf_attr __user *uattr); +int generic_map_delete_batch(struct bpf_map *map, + const union bpf_attr *attr, + union bpf_attr __user *uattr); extern int sysctl_unprivileged_bpf_disabled; -- cgit v1.2.3 From 057996380a42bb64ccc04383cfa9c0ace4ea11f0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 15 Jan 2020 10:43:04 -0800 Subject: bpf: Add batch ops to all htab bpf map htab can't use generic batch support due some problematic behaviours inherent to the data structre, i.e. while iterating the bpf map a concurrent program might delete the next entry that batch was about to use, in that case there's no easy solution to retrieve the next entry, the issue has been discussed multiple times (see [1] and [2]). The only way hmap can be traversed without the problem previously exposed is by making sure that the map is traversing entire buckets. This commit implements those strict requirements for hmap, the implementation follows the same interaction that generic support with some exceptions: - If keys/values buffer are not big enough to traverse a bucket, ENOSPC will be returned. - out_batch contains the value of the next bucket in the iteration, not the next key, but this is transparent for the user since the user should never use out_batch for other than bpf batch syscalls. This commits implements BPF_MAP_LOOKUP_BATCH and adds support for new command BPF_MAP_LOOKUP_AND_DELETE_BATCH. Note that for update/delete batch ops it is possible to use the generic implementations. [1] https://lore.kernel.org/bpf/20190724165803.87470-1-brianvv@google.com/ [2] https://lore.kernel.org/bpf/20190906225434.3635421-1-yhs@fb.com/ Signed-off-by: Yonghong Song Signed-off-by: Brian Vazquez Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200115184308.162644-6-brianvv@google.com --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 05466ad6cf1c..3517e32149a4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -46,6 +46,9 @@ struct bpf_map_ops { void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key); int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); + int (*map_lookup_and_delete_batch)(struct bpf_map *map, + const union bpf_attr *attr, + union bpf_attr __user *uattr); int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, -- cgit v1.2.3 From f397464eb7c25bda903ec8b9cf5701e72a1f7b16 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 7 Oct 2019 10:29:46 +0300 Subject: net/mlx5: Add structures layout for new MCAM access reg groups MCAM has 3 access_reg_groups (0-2). Defines data structures in order to read and parse access_reg_groups #1 and #2. Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c6abaf4f1c55..43cdf9211747 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8832,6 +8832,28 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 regs_31_to_0[0x20]; }; +struct mlx5_ifc_mcam_access_reg_bits1 { + u8 regs_127_to_96[0x20]; + + u8 regs_95_to_64[0x20]; + + u8 regs_63_to_32[0x20]; + + u8 regs_31_to_0[0x20]; +}; + +struct mlx5_ifc_mcam_access_reg_bits2 { + u8 regs_127_to_99[0x1d]; + u8 mirc[0x1]; + u8 regs_97_to_96[0x2]; + + u8 regs_95_to_64[0x20]; + + u8 regs_63_to_32[0x20]; + + u8 regs_31_to_0[0x20]; +}; + struct mlx5_ifc_mcam_reg_bits { u8 reserved_at_0[0x8]; u8 feature_group[0x8]; @@ -8842,6 +8864,8 @@ struct mlx5_ifc_mcam_reg_bits { union { struct mlx5_ifc_mcam_access_reg_bits access_regs; + struct mlx5_ifc_mcam_access_reg_bits1 access_regs1; + struct mlx5_ifc_mcam_access_reg_bits2 access_regs2; u8 reserved_at_0[0x80]; } mng_access_reg_cap_mask; -- cgit v1.2.3 From 932ef155117cc5caf1108bd27664dab974ba6e89 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 7 Oct 2019 10:31:42 +0300 Subject: net/mlx5: Read MCAM register groups 1 and 2 On load, Driver caches MCAM (Management Capabilities Mask Register) registers. in addition to the only MCAM register group (0) the driver already reads, here we add support for reading groups 1 and 2. Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 14 +++++++++++++- include/linux/mlx5/driver.h | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 1a1c53f0262d..0e62c3db45e5 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1121,6 +1121,9 @@ enum mlx5_pcam_feature_groups { enum mlx5_mcam_reg_groups { MLX5_MCAM_REGS_FIRST_128 = 0x0, + MLX5_MCAM_REGS_0x9080_0x90FF = 0x1, + MLX5_MCAM_REGS_0x9100_0x917F = 0x2, + MLX5_MCAM_REGS_NUM = 0x3, }; enum mlx5_mcam_feature_groups { @@ -1269,7 +1272,16 @@ enum mlx5_qcam_feature_groups { MLX5_GET(pcam_reg, (mdev)->caps.pcam, port_access_reg_cap_mask.regs_5000_to_507f.reg) #define MLX5_CAP_MCAM_REG(mdev, reg) \ - MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_access_reg_cap_mask.access_regs.reg) + MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_FIRST_128], \ + mng_access_reg_cap_mask.access_regs.reg) + +#define MLX5_CAP_MCAM_REG1(mdev, reg) \ + MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9080_0x90FF], \ + mng_access_reg_cap_mask.access_regs1.reg) + +#define MLX5_CAP_MCAM_REG2(mdev, reg) \ + MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9100_0x917F], \ + mng_access_reg_cap_mask.access_regs2.reg) #define MLX5_CAP_MCAM_FEATURE(mdev, fld) \ MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 27200dea0297..54431256af42 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -684,7 +684,7 @@ struct mlx5_core_dev { u32 hca_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; u32 hca_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; u32 pcam[MLX5_ST_SZ_DW(pcam_reg)]; - u32 mcam[MLX5_ST_SZ_DW(mcam_reg)]; + u32 mcam[MLX5_MCAM_REGS_NUM][MLX5_ST_SZ_DW(mcam_reg)]; u32 fpga[MLX5_ST_SZ_DW(fpga_cap)]; u32 qcam[MLX5_ST_SZ_DW(qcam_reg)]; u8 embedded_cpu; -- cgit v1.2.3 From bab58ba10ecfa39c46d280d2acbca6054e1e863d Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 7 Oct 2019 10:30:32 +0300 Subject: net/mlx5: Add structures and defines for MIRC register Add needed structures, layouts and defines for MIRC (Management Image Re-activation Control) register. This structure will be used for the FSM reactivation flow in the downstream patches. Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 54431256af42..7848b9858587 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -145,6 +145,7 @@ enum { MLX5_REG_MCC = 0x9062, MLX5_REG_MCDA = 0x9063, MLX5_REG_MCAM = 0x907f, + MLX5_REG_MIRC = 0x9162, }; enum mlx5_qpts_trust_state { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 43cdf9211747..a133583c3e4f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9471,6 +9471,13 @@ struct mlx5_ifc_mcda_reg_bits { u8 data[0][0x20]; }; +struct mlx5_ifc_mirc_reg_bits { + u8 reserved_at_0[0x18]; + u8 status_code[0x8]; + + u8 reserved_at_20[0x20]; +}; + union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_bufferx_reg_bits bufferx_reg; struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; @@ -9526,6 +9533,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_mcqi_reg_bits mcqi_reg; struct mlx5_ifc_mcc_reg_bits mcc_reg; struct mlx5_ifc_mcda_reg_bits mcda_reg; + struct mlx5_ifc_mirc_reg_bits mirc_reg; u8 reserved_at_0[0x60e0]; }; -- cgit v1.2.3 From 609b82727f719b41b50440c4028d48d0b2e04913 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 4 Nov 2019 14:51:55 +0200 Subject: net/mlx5: Expose resource dump register mapping Add new register enumeration for resource dump. Add layout mapping for resource dump: access command and response. Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 130 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 130 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7848b9858587..c821fa4d7475 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -146,6 +146,7 @@ enum { MLX5_REG_MCDA = 0x9063, MLX5_REG_MCAM = 0x907f, MLX5_REG_MIRC = 0x9162, + MLX5_REG_RESOURCE_DUMP = 0xC000, }; enum mlx5_qpts_trust_state { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a133583c3e4f..6fe0431e11ec 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -823,7 +823,9 @@ struct mlx5_ifc_qos_cap_bits { struct mlx5_ifc_debug_cap_bits { u8 core_dump_general[0x1]; u8 core_dump_qp[0x1]; - u8 reserved_at_2[0x1e]; + u8 reserved_at_2[0x7]; + u8 resource_dump[0x1]; + u8 reserved_at_a[0x16]; u8 reserved_at_20[0x2]; u8 stall_detect[0x1]; @@ -1767,6 +1769,132 @@ struct mlx5_ifc_resize_field_select_bits { u8 resize_field_select[0x20]; }; +struct mlx5_ifc_resource_dump_bits { + u8 more_dump[0x1]; + u8 inline_dump[0x1]; + u8 reserved_at_2[0xa]; + u8 seq_num[0x4]; + u8 segment_type[0x10]; + + u8 reserved_at_20[0x10]; + u8 vhca_id[0x10]; + + u8 index1[0x20]; + + u8 index2[0x20]; + + u8 num_of_obj1[0x10]; + u8 num_of_obj2[0x10]; + + u8 reserved_at_a0[0x20]; + + u8 device_opaque[0x40]; + + u8 mkey[0x20]; + + u8 size[0x20]; + + u8 address[0x40]; + + u8 inline_data[52][0x20]; +}; + +struct mlx5_ifc_resource_dump_menu_record_bits { + u8 reserved_at_0[0x4]; + u8 num_of_obj2_supports_active[0x1]; + u8 num_of_obj2_supports_all[0x1]; + u8 must_have_num_of_obj2[0x1]; + u8 support_num_of_obj2[0x1]; + u8 num_of_obj1_supports_active[0x1]; + u8 num_of_obj1_supports_all[0x1]; + u8 must_have_num_of_obj1[0x1]; + u8 support_num_of_obj1[0x1]; + u8 must_have_index2[0x1]; + u8 support_index2[0x1]; + u8 must_have_index1[0x1]; + u8 support_index1[0x1]; + u8 segment_type[0x10]; + + u8 segment_name[4][0x20]; + + u8 index1_name[4][0x20]; + + u8 index2_name[4][0x20]; +}; + +struct mlx5_ifc_resource_dump_segment_header_bits { + u8 length_dw[0x10]; + u8 segment_type[0x10]; +}; + +struct mlx5_ifc_resource_dump_command_segment_bits { + struct mlx5_ifc_resource_dump_segment_header_bits segment_header; + + u8 segment_called[0x10]; + u8 vhca_id[0x10]; + + u8 index1[0x20]; + + u8 index2[0x20]; + + u8 num_of_obj1[0x10]; + u8 num_of_obj2[0x10]; +}; + +struct mlx5_ifc_resource_dump_error_segment_bits { + struct mlx5_ifc_resource_dump_segment_header_bits segment_header; + + u8 reserved_at_20[0x10]; + u8 syndrome_id[0x10]; + + u8 reserved_at_40[0x40]; + + u8 error[8][0x20]; +}; + +struct mlx5_ifc_resource_dump_info_segment_bits { + struct mlx5_ifc_resource_dump_segment_header_bits segment_header; + + u8 reserved_at_20[0x18]; + u8 dump_version[0x8]; + + u8 hw_version[0x20]; + + u8 fw_version[0x20]; +}; + +struct mlx5_ifc_resource_dump_menu_segment_bits { + struct mlx5_ifc_resource_dump_segment_header_bits segment_header; + + u8 reserved_at_20[0x10]; + u8 num_of_records[0x10]; + + struct mlx5_ifc_resource_dump_menu_record_bits record[0]; +}; + +struct mlx5_ifc_resource_dump_resource_segment_bits { + struct mlx5_ifc_resource_dump_segment_header_bits segment_header; + + u8 reserved_at_20[0x20]; + + u8 index1[0x20]; + + u8 index2[0x20]; + + u8 payload[0][0x20]; +}; + +struct mlx5_ifc_resource_dump_terminate_segment_bits { + struct mlx5_ifc_resource_dump_segment_header_bits segment_header; +}; + +struct mlx5_ifc_menu_resource_dump_response_bits { + struct mlx5_ifc_resource_dump_info_segment_bits info; + struct mlx5_ifc_resource_dump_command_segment_bits cmd; + struct mlx5_ifc_resource_dump_menu_segment_bits menu; + struct mlx5_ifc_resource_dump_terminate_segment_bits terminate; +}; + enum { MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_CQ_PERIOD = 0x1, MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_CQ_MAX_COUNT = 0x2, -- cgit v1.2.3 From 31d8bde1c8812c9b44065dcd98e554488c6a98d2 Mon Sep 17 00:00:00 2001 From: Hamdan Igbaria Date: Thu, 9 Jan 2020 13:26:53 +0200 Subject: net/mlx5: Add copy header action struct layout Add definition for copy header action, copy action is used to copy header fields from source to destination. Signed-off-by: Hamdan Igbaria Signed-off-by: Alex Vesker Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6fe0431e11ec..23613a6ea51c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5609,6 +5609,21 @@ struct mlx5_ifc_add_action_in_bits { u8 data[0x20]; }; +struct mlx5_ifc_copy_action_in_bits { + u8 action_type[0x4]; + u8 src_field[0xc]; + u8 reserved_at_10[0x3]; + u8 src_offset[0x5]; + u8 reserved_at_18[0x3]; + u8 length[0x5]; + + u8 reserved_at_20[0x4]; + u8 dst_field[0xc]; + u8 reserved_at_30[0x3]; + u8 dst_offset[0x5]; + u8 reserved_at_38[0x8]; +}; + union mlx5_ifc_set_action_in_add_action_in_auto_bits { struct mlx5_ifc_set_action_in_bits set_action_in; struct mlx5_ifc_add_action_in_bits add_action_in; @@ -5618,6 +5633,7 @@ union mlx5_ifc_set_action_in_add_action_in_auto_bits { enum { MLX5_ACTION_TYPE_SET = 0x1, MLX5_ACTION_TYPE_ADD = 0x2, + MLX5_ACTION_TYPE_COPY = 0x3, }; enum { -- cgit v1.2.3 From 822e114b50641d3b57d2eb30939e60d8b4758288 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Mon, 1 Apr 2019 13:31:32 +0300 Subject: net/mlx5: Add mlx5_ifc definitions for connection tracking support Add the required hardware definitions to mlx5_ifc: ignore_flow_level, registers, copy_header, and fwd_and_modify cap. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Oz Sholomo Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 23613a6ea51c..e9c165ffe3f9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -375,8 +375,17 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 outer_esp_spi[0x1]; u8 reserved_at_58[0x2]; u8 bth_dst_qp[0x1]; + u8 reserved_at_5b[0x5]; - u8 reserved_at_5b[0x25]; + u8 reserved_at_60[0x18]; + u8 metadata_reg_c_7[0x1]; + u8 metadata_reg_c_6[0x1]; + u8 metadata_reg_c_5[0x1]; + u8 metadata_reg_c_4[0x1]; + u8 metadata_reg_c_3[0x1]; + u8 metadata_reg_c_2[0x1]; + u8 metadata_reg_c_1[0x1]; + u8 metadata_reg_c_0[0x1]; }; struct mlx5_ifc_flow_table_prop_layout_bits { @@ -401,7 +410,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reformat_l3_tunnel_to_l2[0x1]; u8 reformat_l2_to_l3_tunnel[0x1]; u8 reformat_and_modify_action[0x1]; - u8 reserved_at_15[0x2]; + u8 ignore_flow_level[0x1]; + u8 reserved_at_16[0x1]; u8 table_miss_action_domain[0x1]; u8 termination_table[0x1]; u8 reserved_at_19[0x7]; @@ -722,7 +732,9 @@ enum { struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 fdb_to_vport_reg_c_id[0x8]; - u8 reserved_at_8[0xf]; + u8 reserved_at_8[0xd]; + u8 fdb_modify_header_fwd_to_table[0x1]; + u8 reserved_at_16[0x1]; u8 flow_source[0x1]; u8 reserved_at_18[0x2]; u8 multi_fdb_encap[0x1]; @@ -4141,7 +4153,8 @@ struct mlx5_ifc_set_fte_in_bits { u8 reserved_at_a0[0x8]; u8 table_id[0x18]; - u8 reserved_at_c0[0x18]; + u8 ignore_flow_level[0x1]; + u8 reserved_at_c1[0x17]; u8 modify_enable_mask[0x8]; u8 reserved_at_e0[0x20]; @@ -5627,6 +5640,7 @@ struct mlx5_ifc_copy_action_in_bits { union mlx5_ifc_set_action_in_add_action_in_auto_bits { struct mlx5_ifc_set_action_in_bits set_action_in; struct mlx5_ifc_add_action_in_bits add_action_in; + struct mlx5_ifc_copy_action_in_bits copy_action_in; u8 reserved_at_0[0x40]; }; @@ -5669,6 +5683,8 @@ enum { MLX5_ACTION_IN_FIELD_METADATA_REG_C_3 = 0x54, MLX5_ACTION_IN_FIELD_METADATA_REG_C_4 = 0x55, MLX5_ACTION_IN_FIELD_METADATA_REG_C_5 = 0x56, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_6 = 0x57, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_7 = 0x58, MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM = 0x59, MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM = 0x5B, }; -- cgit v1.2.3 From a58837f52d432f32995b1c00e803cc4db18762d3 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 30 Dec 2019 14:22:57 +0200 Subject: net/mlx5e: Expose FEC feilds and related capability bit Introduce 50G per lane FEC modes capability bit and newly supported fields in PPLM register which allow this configuration. Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index e9c165ffe3f9..2ab4562b4851 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8581,6 +8581,18 @@ struct mlx5_ifc_pplm_reg_bits { u8 fec_override_admin_50g[0x4]; u8 fec_override_admin_25g[0x4]; u8 fec_override_admin_10g_40g[0x4]; + + u8 fec_override_cap_400g_8x[0x10]; + u8 fec_override_cap_200g_4x[0x10]; + + u8 fec_override_cap_100g_2x[0x10]; + u8 fec_override_cap_50g_1x[0x10]; + + u8 fec_override_admin_400g_8x[0x10]; + u8 fec_override_admin_200g_4x[0x10]; + + u8 fec_override_admin_100g_2x[0x10]; + u8 fec_override_admin_50g_1x[0x10]; }; struct mlx5_ifc_ppcnt_reg_bits { @@ -8907,7 +8919,9 @@ struct mlx5_ifc_mpegc_reg_bits { }; struct mlx5_ifc_pcam_enhanced_features_bits { - u8 reserved_at_0[0x6d]; + u8 reserved_at_0[0x68]; + u8 fec_50G_per_lane_in_pplm[0x1]; + u8 reserved_at_69[0x4]; u8 rx_icrc_encapsulated_counter[0x1]; u8 reserved_at_6e[0x4]; u8 ptys_extended_ethernet[0x1]; -- cgit v1.2.3 From 827a8cb2dd2b72848652b2a425bba3262808ff44 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Mon, 16 Dec 2019 12:50:13 +0200 Subject: net/mlx5e: Add discard counters per priority Add counters that count (per priority) the number of received packets that dropped due to lack of buffers on a physical port. If this counter is increasing, it implies that the adapter is congested and cannot absorb the traffic coming from the network. Signed-off-by: Aharon Landau Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2ab4562b4851..ee0a34d66c7c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2180,7 +2180,9 @@ struct mlx5_ifc_eth_per_prio_grp_data_layout_bits { u8 rx_pause_transition_low[0x20]; - u8 reserved_at_3c0[0x40]; + u8 rx_discards_high[0x20]; + + u8 rx_discards_low[0x20]; u8 device_stall_minor_watermark_cnt_high[0x20]; -- cgit v1.2.3 From 61dc7b0141c51f5fa4aed97e49f9cf102ec51479 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 14 Nov 2019 16:59:58 +0200 Subject: net/mlx5: Refactor mlx5_create_auto_grouped_flow_table Refactor mlx5_create_auto_grouped_flow_table() to use ft_attr param which already carries the max_fte, prio and flags memebers, and is used the same in similar mlx5_create_flow_table() function. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 4e5b84e66822..a3f8b63839de 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -145,25 +145,25 @@ mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type, int vport); -struct mlx5_flow_table * -mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, - int prio, - int num_flow_table_entries, - int max_num_groups, - u32 level, - u32 flags); - struct mlx5_flow_table_attr { int prio; int max_fte; u32 level; u32 flags; + + struct { + int max_num_groups; + } autogroup; }; struct mlx5_flow_table * mlx5_create_flow_table(struct mlx5_flow_namespace *ns, struct mlx5_flow_table_attr *ft_attr); +struct mlx5_flow_table * +mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr); + struct mlx5_flow_table * mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, -- cgit v1.2.3 From 5281a0c909194c477656e89401ac11dd7b29ad2d Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 23 Jul 2019 11:43:57 +0300 Subject: net/mlx5: fs_core: Introduce unmanaged flow tables Currently, Most of the steering tree is statically declared ahead of time, with steering prios instances allocated for each fdb chain to assign max number of levels for each of them. This allows fs_core to manage the connections and levels of the flow tables hierarcy to prevent loops, but restricts us with the number of supported chains and priorities. Introduce unmananged flow tables, allowing the user to manage the flow table connections. A unamanged table is detached from the fs_core flow table hierarcy, and is only connected back to the hierarchy by explicit FTEs forward actions. This will be used together with firmware that supports ignoring the flow table levels to increase the number of supported chains and prios. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index a3f8b63839de..de2c838bae90 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -48,6 +48,7 @@ enum { MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT = BIT(0), MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1), MLX5_FLOW_TABLE_TERMINATION = BIT(2), + MLX5_FLOW_TABLE_UNMANAGED = BIT(3), }; #define LEFTOVERS_RULE_NUM 2 @@ -150,6 +151,7 @@ struct mlx5_flow_table_attr { int max_fte; u32 level; u32 flags; + struct mlx5_flow_table *next_ft; struct { int max_num_groups; -- cgit v1.2.3 From ff189b43568216c6211e9e7ddd9026cb8295e744 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 5 Jan 2020 15:15:54 +0200 Subject: net/mlx5: Add ignore level support fwd to table rules If user sets ignore flow level flag on a rule, that rule can point to a flow table of any level, including those with levels equal or less than the level of the flow table it is added on. This with unamanged tables will be used to create a FDB chain/prio hierarchy much larger than currently supported level range. Signed-off-by: Paul Blakey Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index de2c838bae90..81f393fb7d96 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -196,6 +196,7 @@ struct mlx5_fs_vlan { enum { FLOW_ACT_NO_APPEND = BIT(0), + FLOW_ACT_IGNORE_FLOW_LEVEL = BIT(1), }; struct mlx5_flow_act { -- cgit v1.2.3 From 79cdb0aaea8b5478db34afa1d4d5ecc808689a67 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 14 Nov 2019 17:02:59 +0200 Subject: net/mlx5: Allow creating autogroups with reserved entries Exclude the last n entries for an autogrouped flow table. Reserving entries at the end of the FT will ensure that this FG will be the last to be evaluated. This will be used in the next patch to create a miss group enabling custom actions on FT miss. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 81f393fb7d96..4cae16016b2b 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -155,6 +155,7 @@ struct mlx5_flow_table_attr { struct { int max_num_groups; + int num_reserved_entries; } autogroup; }; -- cgit v1.2.3 From 75ccae62cb8d42a619323a85c577107b8b37d797 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 16 Jan 2020 16:14:44 +0100 Subject: xdp: Move devmap bulk queue into struct net_device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 96360004b862 ("xdp: Make devmap flush_list common for all map instances"), changed devmap flushing to be a global operation instead of a per-map operation. However, the queue structure used for bulking was still allocated as part of the containing map. This patch moves the devmap bulk queue into struct net_device. The motivation for this is reusing it for the non-map variant of XDP_REDIRECT, which will be changed in a subsequent commit. To avoid other fields of struct net_device moving to different cache lines, we also move a couple of other members around. We defer the actual allocation of the bulk queue structure until the NETDEV_REGISTER notification devmap.c. This makes it possible to check for ndo_xdp_xmit support before allocating the structure, which is not possible at the time struct net_device is allocated. However, we keep the freeing in free_netdev() to avoid adding another RCU callback on NETDEV_UNREGISTER. Because of this change, we lose the reference back to the map that originated the redirect, so change the tracepoint to always return 0 as the map ID and index. Otherwise no functional change is intended with this patch. After this patch, the relevant part of struct net_device looks like this, according to pahole: /* --- cacheline 14 boundary (896 bytes) --- */ struct netdev_queue * _tx __attribute__((__aligned__(64))); /* 896 8 */ unsigned int num_tx_queues; /* 904 4 */ unsigned int real_num_tx_queues; /* 908 4 */ struct Qdisc * qdisc; /* 912 8 */ unsigned int tx_queue_len; /* 920 4 */ spinlock_t tx_global_lock; /* 924 4 */ struct xdp_dev_bulk_queue * xdp_bulkq; /* 928 8 */ struct xps_dev_maps * xps_cpus_map; /* 936 8 */ struct xps_dev_maps * xps_rxqs_map; /* 944 8 */ struct mini_Qdisc * miniq_egress; /* 952 8 */ /* --- cacheline 15 boundary (960 bytes) --- */ struct hlist_head qdisc_hash[16]; /* 960 128 */ /* --- cacheline 17 boundary (1088 bytes) --- */ struct timer_list watchdog_timer; /* 1088 40 */ /* XXX last struct has 4 bytes of padding */ int watchdog_timeo; /* 1128 4 */ /* XXX 4 bytes hole, try to pack */ struct list_head todo_list; /* 1136 16 */ /* --- cacheline 18 boundary (1152 bytes) --- */ Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Björn Töpel Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/157918768397.1458396.12673224324627072349.stgit@toke.dk --- include/linux/netdevice.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2741aa35bec6..5ec3537fbdb1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -876,6 +876,7 @@ enum bpf_netdev_command { struct bpf_prog_offload_ops; struct netlink_ext_ack; struct xdp_umem; +struct xdp_dev_bulk_queue; struct netdev_bpf { enum bpf_netdev_command command; @@ -1986,12 +1987,10 @@ struct net_device { unsigned int num_tx_queues; unsigned int real_num_tx_queues; struct Qdisc *qdisc; -#ifdef CONFIG_NET_SCHED - DECLARE_HASHTABLE (qdisc_hash, 4); -#endif unsigned int tx_queue_len; spinlock_t tx_global_lock; - int watchdog_timeo; + + struct xdp_dev_bulk_queue __percpu *xdp_bulkq; #ifdef CONFIG_XPS struct xps_dev_maps __rcu *xps_cpus_map; @@ -2001,11 +2000,15 @@ struct net_device { struct mini_Qdisc __rcu *miniq_egress; #endif +#ifdef CONFIG_NET_SCHED + DECLARE_HASHTABLE (qdisc_hash, 4); +#endif /* These may be needed for future network-power-down code. */ struct timer_list watchdog_timer; + int watchdog_timeo; - int __percpu *pcpu_refcnt; struct list_head todo_list; + int __percpu *pcpu_refcnt; struct list_head link_watch_list; -- cgit v1.2.3 From 1d233886dd904edbf239eeffe435c3308ae97625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 16 Jan 2020 16:14:45 +0100 Subject: xdp: Use bulking for non-map XDP_REDIRECT and consolidate code paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the bulk queue used by XDP_REDIRECT now lives in struct net_device, we can re-use the bulking for the non-map version of the bpf_redirect() helper. This is a simple matter of having xdp_do_redirect_slow() queue the frame on the bulk queue instead of sending it out with __bpf_tx_xdp(). Unfortunately we can't make the bpf_redirect() helper return an error if the ifindex doesn't exit (as bpf_redirect_map() does), because we don't have a reference to the network namespace of the ingress device at the time the helper is called. So we have to leave it as-is and keep the device lookup in xdp_do_redirect_slow(). Since this leaves less reason to have the non-map redirect code in a separate function, so we get rid of the xdp_do_redirect_slow() function entirely. This does lose us the tracepoint disambiguation, but fortunately the xdp_redirect and xdp_redirect_map tracepoints use the same tracepoint entry structures. This means both can contain a map index, so we can just amend the tracepoint definitions so we always emit the xdp_redirect(_err) tracepoints, but with the map ID only populated if a map is present. This means we retire the xdp_redirect_map(_err) tracepoints entirely, but keep the definitions around in case someone is still listening for them. With this change, the performance of the xdp_redirect sample program goes from 5Mpps to 8.4Mpps (a 68% increase). Since the flush functions are no longer map-specific, rename the flush() functions to drop _map from their names. One of the renamed functions is the xdp_do_flush_map() callback used in all the xdp-enabled drivers. To keep from having to update all drivers, use a #define to keep the old name working, and only update the virtual drivers in this patch. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/157918768505.1458396.17518057312953572912.stgit@toke.dk --- include/linux/bpf.h | 13 +++++++++++-- include/linux/filter.h | 10 ++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3517e32149a4..8e3b8f4ad183 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1056,7 +1056,9 @@ struct sk_buff; struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key); -void __dev_map_flush(void); +void __dev_flush(void); +int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, + struct net_device *dev_rx); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, struct net_device *dev_rx); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, @@ -1169,13 +1171,20 @@ static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map return NULL; } -static inline void __dev_map_flush(void) +static inline void __dev_flush(void) { } struct xdp_buff; struct bpf_dtab_netdev; +static inline +int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, + struct net_device *dev_rx) +{ + return 0; +} + static inline int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, struct net_device *dev_rx) diff --git a/include/linux/filter.h b/include/linux/filter.h index a366a0b64a57..f349e2c0884c 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -918,7 +918,7 @@ static inline int xdp_ok_fwd_dev(const struct net_device *fwd, return 0; } -/* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the +/* The pair of xdp_do_redirect and xdp_do_flush MUST be called in the * same cpu context. Further for best results no more than a single map * for the do_redirect/do_flush pair should be used. This limitation is * because we only track one map and force a flush when the map changes. @@ -929,7 +929,13 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *prog); -void xdp_do_flush_map(void); +void xdp_do_flush(void); + +/* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as + * it is no longer only flushing maps. Keep this define for compatibility + * until all drivers are updated - do not use xdp_do_flush_map() in new code! + */ +#define xdp_do_flush_map xdp_do_flush void bpf_warn_invalid_xdp_action(u32 act); -- cgit v1.2.3 From 35f4cd96f5551dc1b2641159e7bb7bf91de6600f Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sat, 28 Dec 2019 16:19:12 +0000 Subject: stop_machine: Make stop_cpus() static The function stop_cpus() is only used internally by the stop_machine for stop multiple cpus. Make it static. Signed-off-by: Yangtao Li Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191228161912.24082-1-tiny.windzz@gmail.com --- include/linux/stop_machine.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 648298f877da..76d8b09384a7 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -32,7 +32,6 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg); int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg); bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, struct cpu_stop_work *work_buf); -int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); void stop_machine_park(int cpu); void stop_machine_unpark(int cpu); void stop_machine_yield(const struct cpumask *cpumask); @@ -81,14 +80,6 @@ static inline bool stop_one_cpu_nowait(unsigned int cpu, return false; } -static inline int stop_cpus(const struct cpumask *cpumask, - cpu_stop_fn_t fn, void *arg) -{ - if (cpumask_test_cpu(raw_smp_processor_id(), cpumask)) - return stop_one_cpu(raw_smp_processor_id(), fn, arg); - return -ENOENT; -} - #endif /* CONFIG_SMP */ /* -- cgit v1.2.3 From a4f9a0e51bbf89cb461b1985a1a570e6b87da3b5 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 15 Jan 2020 11:20:20 +0100 Subject: sched/fair: Remove redundant call to cpufreq_update_util() With commit bef69dd87828 ("sched/cpufreq: Move the cfs_rq_util_change() call to cpufreq_update_util()") update_load_avg() has become the central point for calling cpufreq (not including the update of blocked load). This change helps to simplify further the number of calls to cpufreq_update_util() and to remove last redundant ones. With update_load_avg(), we are now sure that cpufreq_update_util() will be called after every task attachment to a cfs_rq and especially after propagating this event down to the util_avg of the root cfs_rq, which is the level that is used by cpufreq governors like schedutil to set the frequency of a CPU. The SCHED_CPUFREQ_MIGRATION flag forces an early call to cpufreq when the migration happens in a cgroup whereas util_avg of root cfs_rq is not yet updated and this call is duplicated with the one that happens immediately after when the migration event reaches the root cfs_rq. The dedicated flag SCHED_CPUFREQ_MIGRATION is now useless and can be removed. The interface of attach_entity_load_avg() can also be simplified accordingly. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Rafael J. Wysocki Link: https://lkml.kernel.org/r/1579083620-24943-1-git-send-email-vincent.guittot@linaro.org --- include/linux/sched/cpufreq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index cc6bcc1e96bc..3ed5aa18593f 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h @@ -9,7 +9,6 @@ */ #define SCHED_CPUFREQ_IOWAIT (1U << 0) -#define SCHED_CPUFREQ_MIGRATION (1U << 1) #ifdef CONFIG_CPU_FREQ struct cpufreq_policy; -- cgit v1.2.3 From 080bb352fad00d04995102f681b134e3754bfb6e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 15 Jan 2020 20:48:50 -0800 Subject: net: phy: Maintain MDIO device and bus statistics We maintain global statistics for an entire MDIO bus, as well as broken down, per MDIO bus address statistics. Given that it is possible for MDIO devices such as switches to access MDIO bus addresses for which there is not a mdio_device instance created (therefore not a a corresponding device directory in sysfs either), we also maintain per-address statistics under the statistics folder. The layout looks like this: /sys/class/mdio_bus/../statistics/ transfers errrors writes reads transfers_ errors_ writes_ reads_ When a mdio_device instance is registered, a statistics/ folder is created with the tranfers, errors, writes and reads attributes which point to the appropriate MDIO bus statistics structure. Statistics are 64-bit unsigned quantities and maintained through the u64_stats_sync.h helper functions. Signed-off-by: Florian Fainelli Tested-by: Andrew Lunn Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 2929d0bc307f..99a87f02667f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -212,6 +213,15 @@ struct sfp_bus; struct sfp_upstream_ops; struct sk_buff; +struct mdio_bus_stats { + u64_stats_t transfers; + u64_stats_t errors; + u64_stats_t writes; + u64_stats_t reads; + /* Must be last, add new statistics above */ + struct u64_stats_sync syncp; +}; + /* * The Bus class for PHYs. Devices which provide access to * PHYs should register using this structure @@ -224,6 +234,7 @@ struct mii_bus { int (*read)(struct mii_bus *bus, int addr, int regnum); int (*write)(struct mii_bus *bus, int addr, int regnum, u16 val); int (*reset)(struct mii_bus *bus); + struct mdio_bus_stats stats[PHY_MAX_ADDR]; /* * A lock to ensure that only one thing can read/write -- cgit v1.2.3 From 26c0e44a213b272abec0e8fba4a5a2801f95208e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 13 Jan 2020 18:22:42 +0100 Subject: efi: Add a flags parameter to efi_memory_map In preparation for garbage collecting dynamically allocated EFI memory maps, where the allocation method of memblock vs slab needs to be recalled, convert the existing 'late' flag into a 'flags' bitmask. Arrange for the flag to be passed via 'struct efi_memory_map_data'. This structure grows additional flags in follow-on changes. Signed-off-by: Dan Williams Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-11-ardb@kernel.org --- include/linux/efi.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 7e8e25b1d11c..f117d68c314e 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -767,6 +767,7 @@ struct efi_memory_map_data { unsigned long size; unsigned long desc_version; unsigned long desc_size; + unsigned long flags; }; struct efi_memory_map { @@ -776,7 +777,8 @@ struct efi_memory_map { int nr_map; unsigned long desc_version; unsigned long desc_size; - bool late; +#define EFI_MEMMAP_LATE (1UL << 0) + unsigned long flags; }; struct efi_mem_range { -- cgit v1.2.3 From 1db91035d01aa8bfa2350c00ccb63d629b4041ad Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 13 Jan 2020 18:22:43 +0100 Subject: efi: Add tracking for dynamically allocated memmaps In preparation for fixing efi_memmap_alloc() leaks, add support for recording whether the memmap was dynamically allocated from slab, memblock, or is the original physical memmap provided by the platform. Given this tracking is established in efi_memmap_alloc() and needs to be carried to efi_memmap_install(), use 'struct efi_memory_map_data' to convey the flags. Some small cleanups result from this reorganization, specifically the removal of local variables for 'phys' and 'size' that are already tracked in @data. Signed-off-by: Dan Williams Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-12-ardb@kernel.org --- include/linux/efi.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index f117d68c314e..adbe421835c1 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -759,8 +759,8 @@ typedef union { /* * Architecture independent structure for describing a memory map for the - * benefit of efi_memmap_init_early(), saving us the need to pass four - * parameters. + * benefit of efi_memmap_init_early(), and for passing context between + * efi_memmap_alloc() and efi_memmap_install(). */ struct efi_memory_map_data { phys_addr_t phys_map; @@ -778,6 +778,8 @@ struct efi_memory_map { unsigned long desc_version; unsigned long desc_size; #define EFI_MEMMAP_LATE (1UL << 0) +#define EFI_MEMMAP_MEMBLOCK (1UL << 1) +#define EFI_MEMMAP_SLAB (1UL << 2) unsigned long flags; }; @@ -972,11 +974,12 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); -extern phys_addr_t __init efi_memmap_alloc(unsigned int num_entries); +extern int __init efi_memmap_alloc(unsigned int num_entries, + struct efi_memory_map_data *data); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); -extern int __init efi_memmap_install(phys_addr_t addr, unsigned int nr_map); +extern int __init efi_memmap_install(struct efi_memory_map_data *data); extern int __init efi_memmap_split_count(efi_memory_desc_t *md, struct range *range); extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap, -- cgit v1.2.3 From 484a418d075488c6999528247cc711d12c373447 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 13 Jan 2020 18:22:45 +0100 Subject: efi: Fix handling of multiple efi_fake_mem= entries Dave noticed that when specifying multiple efi_fake_mem= entries only the last entry was successfully being reflected in the efi memory map. This is due to the fact that the efi_memmap_insert() is being called multiple times, but on successive invocations the insertion should be applied to the last new memmap rather than the original map at efi_fake_memmap() entry. Rework efi_fake_memmap() to install the new memory map after each efi_fake_mem= entry is parsed. This also fixes an issue in efi_fake_memmap() that caused it to litter emtpy entries into the end of the efi memory map. An empty entry causes efi_memmap_insert() to attempt more memmap splits / copies than efi_memmap_split_count() accounted for when sizing the new map. When that happens efi_memmap_insert() may overrun its allocation, and if you are lucky will spill over to an unmapped page leading to crash signature like the following rather than silent corruption: BUG: unable to handle page fault for address: ffffffffff281000 [..] RIP: 0010:efi_memmap_insert+0x11d/0x191 [..] Call Trace: ? bgrt_init+0xbe/0xbe ? efi_arch_mem_reserve+0x1cb/0x228 ? acpi_parse_bgrt+0xa/0xd ? acpi_table_parse+0x86/0xb8 ? acpi_boot_init+0x494/0x4e3 ? acpi_parse_x2apic+0x87/0x87 ? setup_acpi_sci+0xa2/0xa2 ? setup_arch+0x8db/0x9e1 ? start_kernel+0x6a/0x547 ? secondary_startup_64+0xb6/0xc0 Commit af1648984828 "x86/efi: Update e820 with reserved EFI boot services data to fix kexec breakage" introduced more occurrences where efi_memmap_insert() is invoked after an efi_fake_mem= configuration has been parsed. Previously the side effects of vestigial empty entries were benign, but with commit af1648984828 that follow-on efi_memmap_insert() invocation triggers efi_memmap_insert() overruns. Reported-by: Dave Young Signed-off-by: Dan Williams Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20191231014630.GA24942@dhcp-128-65.nay.redhat.com Link: https://lore.kernel.org/r/20200113172245.27925-14-ardb@kernel.org --- include/linux/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index adbe421835c1..7efd7072cca5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -976,6 +976,8 @@ extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int __init efi_memmap_alloc(unsigned int num_entries, struct efi_memory_map_data *data); +extern void __efi_memmap_free(u64 phys, unsigned long size, + unsigned long flags); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); -- cgit v1.2.3 From 2ab1d925aa4c0c179dd1eb492e8c03536972707b Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 19 Jan 2020 14:31:55 +0100 Subject: net: phy: add generic ndo_do_ioctl handler phy_do_ioctl A number of network drivers has the same glue code to use phy_mii_ioctl as ndo_do_ioctl handler. So let's add such a generic ndo_do_ioctl handler to phylib. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 99a87f02667f..be6b3a1b03da 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1242,6 +1242,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev, int phy_ethtool_ksettings_set(struct phy_device *phydev, const struct ethtool_link_ksettings *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); +int phy_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); void phy_request_interrupt(struct phy_device *phydev); void phy_free_interrupt(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); -- cgit v1.2.3 From e79f15a4598c1f3f3f7f3319ca308c63c91fdaf2 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 15 Jan 2020 17:28:51 +0800 Subject: x86/resctrl: Add task resctrl information display Monitoring tools that want to find out which resctrl control and monitor groups a task belongs to must currently read the "tasks" file in every group until they locate the process ID. Add an additional file /proc/{pid}/cpu_resctrl_groups to provide this information: 1) res: mon: resctrl is not available. 2) res:/ mon: Task is part of the root resctrl control group, and it is not associated to any monitor group. 3) res:/ mon:mon0 Task is part of the root resctrl control group and monitor group mon0. 4) res:group0 mon: Task is part of resctrl control group group0, and it is not associated to any monitor group. 5) res:group0 mon:mon1 Task is part of resctrl control group group0 and monitor group mon1. Signed-off-by: Chen Yu Signed-off-by: Borislav Petkov Tested-by: Jinshi Chen Link: https://lkml.kernel.org/r/20200115092851.14761-1-yu.c.chen@intel.com --- include/linux/resctrl.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/linux/resctrl.h (limited to 'include/linux') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h new file mode 100644 index 000000000000..daf5cf64c6a6 --- /dev/null +++ b/include/linux/resctrl.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _RESCTRL_H +#define _RESCTRL_H + +#ifdef CONFIG_PROC_CPU_RESCTRL + +int proc_resctrl_show(struct seq_file *m, + struct pid_namespace *ns, + struct pid *pid, + struct task_struct *tsk); + +#endif + +#endif /* _RESCTRL_H */ -- cgit v1.2.3 From e837dfde15a49c97dcbb059757d96c71e9e7bd54 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Fri, 13 Dec 2019 16:22:10 -0800 Subject: bitmap: genericize percpu bitmap region iterators Bitmaps are fairly popular for their space efficiency, but we don't have generic iterators available. Make percpu's bitmap region iterators available to everyone. Reviewed-by: Josef Bacik Signed-off-by: Dennis Zhou Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/linux/bitmap.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index ff335b22f23c..cb63feb3cfbe 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -456,6 +456,41 @@ static inline int bitmap_parse(const char *buf, unsigned int buflen, return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits); } +static inline void bitmap_next_clear_region(unsigned long *bitmap, + unsigned int *rs, unsigned int *re, + unsigned int end) +{ + *rs = find_next_zero_bit(bitmap, end, *rs); + *re = find_next_bit(bitmap, end, *rs + 1); +} + +static inline void bitmap_next_set_region(unsigned long *bitmap, + unsigned int *rs, unsigned int *re, + unsigned int end) +{ + *rs = find_next_bit(bitmap, end, *rs); + *re = find_next_zero_bit(bitmap, end, *rs + 1); +} + +/* + * Bitmap region iterators. Iterates over the bitmap between [@start, @end). + * @rs and @re should be integer variables and will be set to start and end + * index of the current clear or set region. + */ +#define bitmap_for_each_clear_region(bitmap, rs, re, start, end) \ + for ((rs) = (start), \ + bitmap_next_clear_region((bitmap), &(rs), &(re), (end)); \ + (rs) < (re); \ + (rs) = (re) + 1, \ + bitmap_next_clear_region((bitmap), &(rs), &(re), (end))) + +#define bitmap_for_each_set_region(bitmap, rs, re, start, end) \ + for ((rs) = (start), \ + bitmap_next_set_region((bitmap), &(rs), &(re), (end)); \ + (rs) < (re); \ + (rs) = (re) + 1, \ + bitmap_next_set_region((bitmap), &(rs), &(re), (end))) + /** * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap. * @n: u64 value -- cgit v1.2.3 From 3231e5d2228a2078ce5982d63ea9a617e4972c00 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 20 Jan 2020 22:16:07 +0100 Subject: net: phy: rename phy_do_ioctl to phy_do_ioctl_running We just added phy_do_ioctl, but it turned out that we need another version of this function that doesn't check whether net_device is running. So rename phy_do_ioctl to phy_do_ioctl_running. Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index be6b3a1b03da..f6e714da37d8 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1242,7 +1242,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev, int phy_ethtool_ksettings_set(struct phy_device *phydev, const struct ethtool_link_ksettings *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); -int phy_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); +int phy_do_ioctl_running(struct net_device *dev, struct ifreq *ifr, int cmd); void phy_request_interrupt(struct phy_device *phydev); void phy_free_interrupt(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); -- cgit v1.2.3 From bbbf8430afe6906abbf879352fe10d24d380e588 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 20 Jan 2020 22:17:11 +0100 Subject: net: phy: add new version of phy_do_ioctl Add a new version of phy_do_ioctl that doesn't check whether net_device is running. It will typically be used if suitable drivers attach the PHY in probe already. Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index f6e714da37d8..c570e162e05e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1242,6 +1242,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev, int phy_ethtool_ksettings_set(struct phy_device *phydev, const struct ethtool_link_ksettings *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); +int phy_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); int phy_do_ioctl_running(struct net_device *dev, struct ifreq *ifr, int cmd); void phy_request_interrupt(struct phy_device *phydev); void phy_free_interrupt(struct phy_device *phydev); -- cgit v1.2.3 From b46f36c05ab40d8339ec08809f89e45b1817cf86 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 15 Jan 2020 12:53:53 +0000 Subject: crypto: atmel-{aes,sha,tdes} - Retire crypto_platform_data These drivers no longer need it as they are only probed via DT. crypto_platform_data was allocated but unused, so remove it. This is a follow up for: commit 45a536e3a7e0 ("crypto: atmel-tdes - Retire dma_request_slave_channel_compat()") commit db28512f48e2 ("crypto: atmel-sha - Retire dma_request_slave_channel_compat()") commit 62f72cbdcf02 ("crypto: atmel-aes - Retire dma_request_slave_channel_compat()") Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu --- include/linux/platform_data/crypto-atmel.h | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 include/linux/platform_data/crypto-atmel.h (limited to 'include/linux') diff --git a/include/linux/platform_data/crypto-atmel.h b/include/linux/platform_data/crypto-atmel.h deleted file mode 100644 index 0471aaf6999b..000000000000 --- a/include/linux/platform_data/crypto-atmel.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_CRYPTO_ATMEL_H -#define __LINUX_CRYPTO_ATMEL_H - -#include - -/** - * struct crypto_dma_data - DMA data for AES/TDES/SHA - */ -struct crypto_dma_data { - struct at_dma_slave txdata; - struct at_dma_slave rxdata; -}; - -/** - * struct crypto_platform_data - board-specific AES/TDES/SHA configuration - * @dma_slave: DMA slave interface to use in data transfers. - */ -struct crypto_platform_data { - struct crypto_dma_data *dma_slave; -}; - -#endif /* __LINUX_CRYPTO_ATMEL_H */ -- cgit v1.2.3 From 6beea7afcc72b86986080ea1d228a42f2000f2a9 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Mon, 13 Jan 2020 10:42:44 +0100 Subject: ima: add the ability to query the cached hash of a given file This allows other parts of the kernel (perhaps a stacked LSM allowing system monitoring, eg. the proposed KRSI LSM [1]) to retrieve the hash of a given file from IMA if it's present in the iint cache. It's true that the existence of the hash means that it's also in the audit logs or in /sys/kernel/security/ima/ascii_runtime_measurements, but it can be difficult to pull that information out for every subsequent exec. This is especially true if a given host has been up for a long time and the file was first measured a long time ago. It should be kept in mind that this function gives access to cached entries which can be removed, for instance on security_inode_free(). This is based on Peter Moody's patch: https://sourceforge.net/p/linux-ima/mailman/message/33036180/ [1] https://lkml.org/lkml/2019/9/10/393 Signed-off-by: Florent Revest Reviewed-by: KP Singh Signed-off-by: Mimi Zohar --- include/linux/ima.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index f4644c54f648..1659217e9b60 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -23,6 +23,7 @@ extern int ima_read_file(struct file *file, enum kernel_read_file_id id); extern int ima_post_read_file(struct file *file, void *buf, loff_t size, enum kernel_read_file_id id); extern void ima_post_path_mknod(struct dentry *dentry); +extern int ima_file_hash(struct file *file, char *buf, size_t buf_size); extern void ima_kexec_cmdline(const void *buf, int size); #ifdef CONFIG_IMA_KEXEC @@ -91,6 +92,11 @@ static inline void ima_post_path_mknod(struct dentry *dentry) return; } +static inline int ima_file_hash(struct file *file, char *buf, size_t buf_size) +{ + return -EOPNOTSUPP; +} + static inline void ima_kexec_cmdline(const void *buf, int size) {} #endif /* CONFIG_IMA */ -- cgit v1.2.3 From be8704ff07d2374bcc5c675526f95e70c6459683 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 20 Jan 2020 16:53:46 -0800 Subject: bpf: Introduce dynamic program extensions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce dynamic program extensions. The users can load additional BPF functions and replace global functions in previously loaded BPF programs while these programs are executing. Global functions are verified individually by the verifier based on their types only. Hence the global function in the new program which types match older function can safely replace that corresponding function. This new function/program is called 'an extension' of old program. At load time the verifier uses (attach_prog_fd, attach_btf_id) pair to identify the function to be replaced. The BPF program type is derived from the target program into extension program. Technically bpf_verifier_ops is copied from target program. The BPF_PROG_TYPE_EXT program type is a placeholder. It has empty verifier_ops. The extension program can call the same bpf helper functions as target program. Single BPF_PROG_TYPE_EXT type is used to extend XDP, SKB and all other program types. The verifier allows only one level of replacement. Meaning that the extension program cannot recursively extend an extension. That also means that the maximum stack size is increasing from 512 to 1024 bytes and maximum function nesting level from 8 to 16. The programs don't always consume that much. The stack usage is determined by the number of on-stack variables used by the program. The verifier could have enforced 512 limit for combined original plus extension program, but it makes for difficult user experience. The main use case for extensions is to provide generic mechanism to plug external programs into policy program or function call chaining. BPF trampoline is used to track both fentry/fexit and program extensions because both are using the same nop slot at the beginning of every BPF function. Attaching fentry/fexit to a function that was replaced is not allowed. The opposite is true as well. Replacing a function that currently being analyzed with fentry/fexit is not allowed. The executable page allocated by BPF trampoline is not used by program extensions. This inefficiency will be optimized in future patches. Function by function verification of global function supports scalars and pointer to context only. Hence program extensions are supported for such class of global functions only. In the future the verifier will be extended with support to pointers to structures, arrays with sizes, etc. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20200121005348.2769920-2-ast@kernel.org --- include/linux/bpf.h | 10 +++++++++- include/linux/bpf_types.h | 2 ++ include/linux/btf.h | 5 +++++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8e3b8f4ad183..05d16615054c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -465,7 +465,8 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); enum bpf_tramp_prog_type { BPF_TRAMP_FENTRY, BPF_TRAMP_FEXIT, - BPF_TRAMP_MAX + BPF_TRAMP_MAX, + BPF_TRAMP_REPLACE, /* more than MAX */ }; struct bpf_trampoline { @@ -480,6 +481,11 @@ struct bpf_trampoline { void *addr; bool ftrace_managed; } func; + /* if !NULL this is BPF_PROG_TYPE_EXT program that extends another BPF + * program by replacing one of its functions. func.addr is the address + * of the function it replaced. + */ + struct bpf_prog *extension_prog; /* list of BPF programs using this trampoline */ struct hlist_head progs_hlist[BPF_TRAMP_MAX]; /* Number of attached programs. A counter per kind. */ @@ -1107,6 +1113,8 @@ int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *reg); +int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog, + struct btf *btf, const struct btf_type *t); struct bpf_prog *bpf_prog_by_id(u32 id); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 9f326e6ef885..c81d4ece79a4 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -68,6 +68,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport, #if defined(CONFIG_BPF_JIT) BPF_PROG_TYPE(BPF_PROG_TYPE_STRUCT_OPS, bpf_struct_ops, void *, void *) +BPF_PROG_TYPE(BPF_PROG_TYPE_EXT, bpf_extension, + void *, void *) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) diff --git a/include/linux/btf.h b/include/linux/btf.h index 881e9b76ef49..5c1ea99b480f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -107,6 +107,11 @@ static inline u16 btf_type_vlen(const struct btf_type *t) return BTF_INFO_VLEN(t->info); } +static inline u16 btf_func_linkage(const struct btf_type *t) +{ + return BTF_INFO_VLEN(t->info); +} + static inline bool btf_type_kflag(const struct btf_type *t) { return BTF_INFO_KFLAG(t->info); -- cgit v1.2.3 From 6e1918cfb263acacd3fc9239127732b69de64695 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 20 Jan 2020 14:31:56 -0800 Subject: fscrypt: don't allow v1 policies with casefolding Casefolded encrypted directories will use a new dirhash method that requires a secret key. If the directory uses a v2 encryption policy, it's easy to derive this key from the master key using HKDF. However, v1 encryption policies don't provide a way to derive additional keys. Therefore, don't allow casefolding on directories that use a v1 policy. Specifically, make it so that trying to enable casefolding on a directory that has a v1 policy fails, trying to set a v1 policy on a casefolded directory fails, and trying to open a casefolded directory that has a v1 policy (if one somehow exists on-disk) fails. Signed-off-by: Daniel Rosenberg [EB: improved commit message, updated fscrypt.rst, and other cleanups] Link: https://lore.kernel.org/r/20200120223201.241390-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 6fe8d0f96a4a..3984eadd7023 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -263,6 +263,8 @@ extern int __fscrypt_prepare_rename(struct inode *old_dir, unsigned int flags); extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname); +extern int fscrypt_prepare_setflags(struct inode *inode, + unsigned int oldflags, unsigned int flags); extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link); @@ -519,6 +521,13 @@ static inline int __fscrypt_prepare_lookup(struct inode *dir, return -EOPNOTSUPP; } +static inline int fscrypt_prepare_setflags(struct inode *inode, + unsigned int oldflags, + unsigned int flags) +{ + return 0; +} + static inline int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, -- cgit v1.2.3 From aa408f835d025a839033988d3f5a2866314414ef Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 20 Jan 2020 14:31:57 -0800 Subject: fscrypt: derive dirhash key for casefolded directories When we allow indexed directories to use both encryption and casefolding, for the dirhash we can't just hash the ciphertext filenames that are stored on-disk (as is done currently) because the dirhash must be case insensitive, but the stored names are case-preserving. Nor can we hash the plaintext names with an unkeyed hash (or a hash keyed with a value stored on-disk like ext4's s_hash_seed), since that would leak information about the names that encryption is meant to protect. Instead, if we can accept a dirhash that's only computable when the fscrypt key is available, we can hash the plaintext names with a keyed hash using a secret key derived from the directory's fscrypt master key. We'll use SipHash-2-4 for this purpose. Prepare for this by deriving a SipHash key for each casefolded encrypted directory. Make sure to handle deriving the key not only when setting up the directory's fscrypt_info, but also in the case where the casefold flag is enabled after the fscrypt_info was already set up. (We could just always derive the key regardless of casefolding, but that would introduce unnecessary overhead for people not using casefolding.) Signed-off-by: Daniel Rosenberg [EB: improved commit message, updated fscrypt.rst, squashed with change that avoids unnecessarily deriving the key, and many other cleanups] Link: https://lore.kernel.org/r/20200120223201.241390-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 3984eadd7023..34bc5f73200c 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -247,6 +247,9 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname, return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); } +extern u64 fscrypt_fname_siphash(const struct inode *dir, + const struct qstr *name); + /* bio.c */ extern void fscrypt_decrypt_bio(struct bio *); extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, @@ -479,6 +482,13 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname, return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); } +static inline u64 fscrypt_fname_siphash(const struct inode *dir, + const struct qstr *name) +{ + WARN_ON_ONCE(1); + return 0; +} + /* bio.c */ static inline void fscrypt_decrypt_bio(struct bio *bio) { -- cgit v1.2.3 From edc440e3d27fb31e6f9663cf413fad97d714c060 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 20 Jan 2020 14:32:01 -0800 Subject: fscrypt: improve format of no-key names When an encrypted directory is listed without the key, the filesystem must show "no-key names" that uniquely identify directory entries, are at most 255 (NAME_MAX) bytes long, and don't contain '/' or '\0'. Currently, for short names the no-key name is the base64 encoding of the ciphertext filename, while for long names it's the base64 encoding of the ciphertext filename's dirhash and second-to-last 16-byte block. This format has the following problems: - Since it doesn't always include the dirhash, it's incompatible with directories that will use a secret-keyed dirhash over the plaintext filenames. In this case, the dirhash won't be computable from the ciphertext name without the key, so it instead must be retrieved from the directory entry and always included in the no-key name. Casefolded encrypted directories will use this type of dirhash. - It's ambiguous: it's possible to craft two filenames that map to the same no-key name, since the method used to abbreviate long filenames doesn't use a proper cryptographic hash function. Solve both these problems by switching to a new no-key name format that is the base64 encoding of a variable-length structure that contains the dirhash, up to 149 bytes of the ciphertext filename, and (if any bytes remain) the SHA-256 of the remaining bytes of the ciphertext filename. This ensures that each no-key name contains everything needed to find the directory entry again, contains only legal characters, doesn't exceed NAME_MAX, is unambiguous unless there's a SHA-256 collision, and that we only take the performance hit of SHA-256 on very long filenames. Note: this change does *not* address the existing issue where users can modify the 'dirhash' part of a no-key name and the filesystem may still accept the name. Signed-off-by: Daniel Rosenberg [EB: improved comments and commit message, fixed checking return value of base64_decode(), check for SHA-256 error, continue to set disk_name for short names to keep matching simpler, and many other cleanups] Link: https://lore.kernel.org/r/20200120223201.241390-7-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 77 ++----------------------------------------------- 1 file changed, 2 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 34bc5f73200c..556f4adf5dc5 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -172,81 +172,8 @@ extern int fscrypt_fname_disk_to_usr(const struct inode *inode, u32 hash, u32 minor_hash, const struct fscrypt_str *iname, struct fscrypt_str *oname); - -#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 - -/* Extracts the second-to-last ciphertext block; see explanation below */ -#define FSCRYPT_FNAME_DIGEST(name, len) \ - ((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \ - FS_CRYPTO_BLOCK_SIZE)) - -#define FSCRYPT_FNAME_DIGEST_SIZE FS_CRYPTO_BLOCK_SIZE - -/** - * fscrypt_digested_name - alternate identifier for an on-disk filename - * - * When userspace lists an encrypted directory without access to the key, - * filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE - * bytes are shown in this abbreviated form (base64-encoded) rather than as the - * full ciphertext (base64-encoded). This is necessary to allow supporting - * filenames up to NAME_MAX bytes, since base64 encoding expands the length. - * - * To make it possible for filesystems to still find the correct directory entry - * despite not knowing the full on-disk name, we encode any filesystem-specific - * 'hash' and/or 'minor_hash' which the filesystem may need for its lookups, - * followed by the second-to-last ciphertext block of the filename. Due to the - * use of the CBC-CTS encryption mode, the second-to-last ciphertext block - * depends on the full plaintext. (Note that ciphertext stealing causes the - * last two blocks to appear "flipped".) This makes accidental collisions very - * unlikely: just a 1 in 2^128 chance for two filenames to collide even if they - * share the same filesystem-specific hashes. - * - * However, this scheme isn't immune to intentional collisions, which can be - * created by anyone able to create arbitrary plaintext filenames and view them - * without the key. Making the "digest" be a real cryptographic hash like - * SHA-256 over the full ciphertext would prevent this, although it would be - * less efficient and harder to implement, especially since the filesystem would - * need to calculate it for each directory entry examined during a search. - */ -struct fscrypt_digested_name { - u32 hash; - u32 minor_hash; - u8 digest[FSCRYPT_FNAME_DIGEST_SIZE]; -}; - -/** - * fscrypt_match_name() - test whether the given name matches a directory entry - * @fname: the name being searched for - * @de_name: the name from the directory entry - * @de_name_len: the length of @de_name in bytes - * - * Normally @fname->disk_name will be set, and in that case we simply compare - * that to the name stored in the directory entry. The only exception is that - * if we don't have the key for an encrypted directory and a filename in it is - * very long, then we won't have the full disk_name and we'll instead need to - * match against the fscrypt_digested_name. - * - * Return: %true if the name matches, otherwise %false. - */ -static inline bool fscrypt_match_name(const struct fscrypt_name *fname, - const u8 *de_name, u32 de_name_len) -{ - if (unlikely(!fname->disk_name.name)) { - const struct fscrypt_digested_name *n = - (const void *)fname->crypto_buf.name; - if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_')) - return false; - if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) - return false; - return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len), - n->digest, FSCRYPT_FNAME_DIGEST_SIZE); - } - - if (de_name_len != fname->disk_name.len) - return false; - return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); -} - +extern bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len); extern u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); -- cgit v1.2.3 From 5576b991e9c1a11d2cc21c4b94fc75ec27603896 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 22 Jan 2020 15:36:46 -0800 Subject: bpf: Add BPF_FUNC_jiffies64 This patch adds a helper to read the 64bit jiffies. It will be used in a later patch to implement the bpf_cubic.c. The helper is inlined for jit_requested and 64 BITS_PER_LONG as the map_gen_lookup(). Other cases could be considered together with map_gen_lookup() if needed. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200122233646.903260-1-kafai@fb.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 05d16615054c..a9687861fd7e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1414,6 +1414,7 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto; extern const struct bpf_func_proto bpf_strtol_proto; extern const struct bpf_func_proto bpf_strtoul_proto; extern const struct bpf_func_proto bpf_tcp_sock_proto; +extern const struct bpf_func_proto bpf_jiffies64_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); -- cgit v1.2.3 From eda7acddf8080bb2d022a8d4b8b2345eb80c63ec Mon Sep 17 00:00:00 2001 From: Peter Krystad Date: Tue, 21 Jan 2020 16:56:16 -0800 Subject: mptcp: Handle MPTCP TCP options Add hooks to parse and format the MP_CAPABLE option. This option is handled according to MPTCP version 0 (RFC6824). MPTCP version 1 MP_CAPABLE (RFC6824bis/RFC8684) will be added later in coordination with related code changes. Co-developed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Co-developed-by: Florian Westphal Signed-off-by: Florian Westphal Co-developed-by: Davide Caratti Signed-off-by: Davide Caratti Signed-off-by: Peter Krystad Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- include/linux/tcp.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ca6f01531e64..52798ab00394 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -78,6 +78,16 @@ struct tcp_sack_block { #define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */ #define TCP_DSACK_SEEN (1 << 2) /*1 = DSACK was received from peer*/ +#if IS_ENABLED(CONFIG_MPTCP) +struct mptcp_options_received { + u64 sndr_key; + u64 rcvr_key; + u8 mp_capable : 1, + mp_join : 1, + dss : 1; +}; +#endif + struct tcp_options_received { /* PAWS/RTTM data */ int ts_recent_stamp;/* Time we stored ts_recent (for aging) */ @@ -95,6 +105,9 @@ struct tcp_options_received { u8 num_sacks; /* Number of SACK blocks */ u16 user_mss; /* mss requested by user in ioctl */ u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ +#if IS_ENABLED(CONFIG_MPTCP) + struct mptcp_options_received mptcp; +#endif }; static inline void tcp_clear_options(struct tcp_options_received *rx_opt) @@ -104,6 +117,11 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt) #if IS_ENABLED(CONFIG_SMC) rx_opt->smc_ok = 0; #endif +#if IS_ENABLED(CONFIG_MPTCP) + rx_opt->mptcp.mp_capable = 0; + rx_opt->mptcp.mp_join = 0; + rx_opt->mptcp.dss = 0; +#endif } /* This is the max number of SACKS that we'll generate and process. It's safe -- cgit v1.2.3 From 2303f994b3e187091fd08148066688b08f837efc Mon Sep 17 00:00:00 2001 From: Peter Krystad Date: Tue, 21 Jan 2020 16:56:17 -0800 Subject: mptcp: Associate MPTCP context with TCP socket Use ULP to associate a subflow_context structure with each TCP subflow socket. Creating these sockets requires new bind and connect functions to make sure ULP is set up immediately when the subflow sockets are created. Co-developed-by: Florian Westphal Signed-off-by: Florian Westphal Co-developed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Co-developed-by: Davide Caratti Signed-off-by: Davide Caratti Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Signed-off-by: Peter Krystad Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 52798ab00394..877947475814 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -397,6 +397,9 @@ struct tcp_sock { u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG * while socket was owned by user. */ +#if IS_ENABLED(CONFIG_MPTCP) + bool is_mptcp; +#endif #ifdef CONFIG_TCP_MD5SIG /* TCP AF-Specific parts; only used by MD5 Signature support so far */ -- cgit v1.2.3 From cec37a6e41aae7bf3df9a3da783380a4d9325fd8 Mon Sep 17 00:00:00 2001 From: Peter Krystad Date: Tue, 21 Jan 2020 16:56:18 -0800 Subject: mptcp: Handle MP_CAPABLE options for outgoing connections Add hooks to tcp_output.c to add MP_CAPABLE to an outgoing SYN request, to capture the MP_CAPABLE in the received SYN-ACK, to add MP_CAPABLE to the final ACK of the three-way handshake. Use the .sk_rx_dst_set() handler in the subflow proto to capture when the responding SYN-ACK is received and notify the MPTCP connection layer. Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Co-developed-by: Florian Westphal Signed-off-by: Florian Westphal Signed-off-by: Peter Krystad Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 877947475814..e9ee06d887fa 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -137,6 +137,9 @@ struct tcp_request_sock { const struct tcp_request_sock_ops *af_specific; u64 snt_synack; /* first SYNACK sent time */ bool tfo_listener; +#if IS_ENABLED(CONFIG_MPTCP) + bool is_mptcp; +#endif u32 txhash; u32 rcv_isn; u32 snt_isn; -- cgit v1.2.3 From 648ef4b88673dadb8463bf0d4b10fbf33d55def8 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 21 Jan 2020 16:56:24 -0800 Subject: mptcp: Implement MPTCP receive path Parses incoming DSS options and populates outgoing MPTCP ACK fields. MPTCP fields are parsed from the TCP option header and placed in an skb extension, allowing the upper MPTCP layer to access MPTCP options after the skb has gone through the TCP stack. The subflow implements its own data_ready() ops, which ensures that the pending data is in sequence - according to MPTCP seq number - dropping out-of-seq skbs. The DATA_READY bit flag is set if this is the case. This allows the MPTCP socket layer to determine if more data is available without having to consult the individual subflows. It additionally validates the current mapping and propagates EoF events to the connection socket. Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Co-developed-by: Peter Krystad Signed-off-by: Peter Krystad Co-developed-by: Davide Caratti Signed-off-by: Davide Caratti Co-developed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Co-developed-by: Florian Westphal Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- include/linux/tcp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e9ee06d887fa..0d00dad4b85d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -82,9 +82,19 @@ struct tcp_sack_block { struct mptcp_options_received { u64 sndr_key; u64 rcvr_key; + u64 data_ack; + u64 data_seq; + u32 subflow_seq; + u16 data_len; u8 mp_capable : 1, mp_join : 1, dss : 1; + u8 use_map:1, + dsn64:1, + data_fin:1, + use_ack:1, + ack64:1, + __unused:3; }; #endif -- cgit v1.2.3 From cc7972ea1932335e0a0ee00ac8a24b3e8304630d Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Tue, 21 Jan 2020 16:56:31 -0800 Subject: mptcp: parse and emit MP_CAPABLE option according to v1 spec This implements MP_CAPABLE options parsing and writing according to RFC 6824 bis / RFC 8684: MPTCP v1. Local key is sent on syn/ack, and both keys are sent on 3rd ack. MP_CAPABLE messages len are updated accordingly. We need the skbuff to correctly emit the above, so we push the skbuff struct as an argument all the way from tcp code to the relevant mptcp callbacks. When processing incoming MP_CAPABLE + data, build a full blown DSS-like map info, to simplify later processing. On child socket creation, we need to record the remote key, if available. Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 0d00dad4b85d..4e2124607d32 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -94,7 +94,8 @@ struct mptcp_options_received { data_fin:1, use_ack:1, ack64:1, - __unused:3; + mpc_map:1, + __unused:2; }; #endif -- cgit v1.2.3 From a35d16905efc6ad5523d864a5c6efcb1e657e386 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 5 Aug 2019 18:22:27 -0400 Subject: rcu: Add basic support for kfree_rcu() batching Recently a discussion about stability and performance of a system involving a high rate of kfree_rcu() calls surfaced on the list [1] which led to another discussion how to prepare for this situation. This patch adds basic batching support for kfree_rcu(). It is "basic" because we do none of the slab management, dynamic allocation, code moving or any of the other things, some of which previous attempts did [2]. These fancier improvements can be follow-up patches and there are different ideas being discussed in those regards. This is an effort to start simple, and build up from there. In the future, an extension to use kfree_bulk and possibly per-slab batching could be done to further improve performance due to cache-locality and slab-specific bulk free optimizations. By using an array of pointers, the worker thread processing the work would need to read lesser data since it does not need to deal with large rcu_head(s) any longer. Torture tests follow in the next patch and show improvements of around 5x reduction in number of grace periods on a 16 CPU system. More details and test data are in that patch. There is an implication with rcu_barrier() with this patch. Since the kfree_rcu() calls can be batched, and may not be handed yet to the RCU machinery in fact, the monitor may not have even run yet to do the queue_rcu_work(), there seems no easy way of implementing rcu_barrier() to wait for those kfree_rcu()s that are already made. So this means a kfree_rcu() followed by an rcu_barrier() does not imply that memory will be freed once rcu_barrier() returns. Another implication is higher active memory usage (although not run-away..) until the kfree_rcu() flooding ends, in comparison to without batching. More details about this are in the second patch which adds an rcuperf test. Finally, in the near future we will get rid of kfree_rcu() special casing within RCU such as in rcu_do_batch and switch everything to just batching. Currently we don't do that since timer subsystem is not yet up and we cannot schedule the kfree_rcu() monitor as the timer subsystem's lock are not initialized. That would also mean getting rid of kfree_call_rcu_nobatch() entirely. [1] http://lore.kernel.org/lkml/20190723035725-mutt-send-email-mst@kernel.org [2] https://lkml.org/lkml/2017/12/19/824 Cc: kernel-team@android.com Cc: kernel-team@lge.com Co-developed-by: Byungchul Park Signed-off-by: Byungchul Park Signed-off-by: Joel Fernandes (Google) [ paulmck: Applied 0day and Paul Walmsley feedback on ->monitor_todo. ] [ paulmck: Make it work during early boot. ] [ paulmck: Add a crude early boot self-test. ] [ paulmck: Style adjustments and experimental docbook structure header. ] Link: https://lore.kernel.org/lkml/alpine.DEB.2.21.9999.1908161931110.32497@viisi.sifive.com/T/#me9956f66cb611b95d26ae92700e1d901f46e8c59 Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 6 ++++++ include/linux/rcutree.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 37b6f0c2b79d..1bd166aab6f3 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -39,6 +39,11 @@ static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func) call_rcu(head, func); } +static inline void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func) +{ + call_rcu(head, func); +} + void rcu_qs(void); static inline void rcu_softirq_qs(void) @@ -85,6 +90,7 @@ static inline void rcu_scheduler_starting(void) { } static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_is_watching(void) { return true; } static inline void rcu_momentary_dyntick_idle(void) { } +static inline void kfree_rcu_scheduler_running(void) { } /* Avoid RCU read-side critical sections leaking across. */ static inline void rcu_all_qs(void) { barrier(); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index c5147de885ec..6a65d3a16dbd 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -34,10 +34,12 @@ static inline void rcu_virt_note_context_switch(int cpu) void synchronize_rcu_expedited(void); void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func); +void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func); void rcu_barrier(void); bool rcu_eqs_special_set(int cpu); void rcu_momentary_dyntick_idle(void); +void kfree_rcu_scheduler_running(void); unsigned long get_state_synchronize_rcu(void); void cond_synchronize_rcu(unsigned long oldstate); -- cgit v1.2.3 From 77a40f97030b27b3fc1640a3ed203870f0817f57 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Fri, 30 Aug 2019 12:36:32 -0400 Subject: rcu: Remove kfree_rcu() special casing and lazy-callback handling This commit removes kfree_rcu() special-casing and the lazy-callback handling from Tree RCU. It moves some of this special casing to Tiny RCU, the removal of which will be the subject of later commits. This results in a nice negative delta. Suggested-by: Paul E. McKenney Signed-off-by: Joel Fernandes (Google) [ paulmck: Add slab.h #include, thanks to kbuild test robot . ] Signed-off-by: Paul E. McKenney --- include/linux/rcu_segcblist.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 646759042333..b36afe7b22c9 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -22,7 +22,6 @@ struct rcu_cblist { struct rcu_head *head; struct rcu_head **tail; long len; - long len_lazy; }; #define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head } @@ -73,7 +72,6 @@ struct rcu_segcblist { #else long len; #endif - long len_lazy; u8 enabled; u8 offloaded; }; -- cgit v1.2.3 From 189a6883dcf7fa70e17403ae4225c60ffc9e404b Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Fri, 30 Aug 2019 12:36:33 -0400 Subject: rcu: Remove kfree_call_rcu_nobatch() Now that the kfree_rcu() special-casing has been removed from tree RCU, this commit removes kfree_call_rcu_nobatch() since it is no longer needed. Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 5 ----- include/linux/rcutree.h | 1 - 2 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 1bd166aab6f3..b2b2dc990da9 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -39,11 +39,6 @@ static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func) call_rcu(head, func); } -static inline void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func) -{ - call_rcu(head, func); -} - void rcu_qs(void); static inline void rcu_softirq_qs(void) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 6a65d3a16dbd..2f787b9029d1 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -34,7 +34,6 @@ static inline void rcu_virt_note_context_switch(int cpu) void synchronize_rcu_expedited(void); void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func); -void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func); void rcu_barrier(void); bool rcu_eqs_special_set(int cpu); -- cgit v1.2.3 From e1350e8e0ea5d959c23c5e593ff3026a67dbb049 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 15 Oct 2019 14:48:22 +0100 Subject: rcu: Move rcu_{expedited,normal} definitions into rcupdate.h This commit moves the rcu_{expedited,normal} definitions from kernel/rcu/update.c to include/linux/rcupdate.h to make sure they are in sync, and also to avoid the following warning from sparse: kernel/ksysfs.c:150:5: warning: symbol 'rcu_expedited' was not declared. Should it be static? kernel/ksysfs.c:167:5: warning: symbol 'rcu_normal' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index fe470243acdd..bb36379606d0 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -896,4 +896,8 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f) return false; } +/* kernel/ksysfs.c definitions */ +extern int rcu_expedited; +extern int rcu_normal; + #endif /* __LINUX_RCUPDATE_H */ -- cgit v1.2.3 From ef6aadcc76c97e25f62adc4e9d19684d3e5d0b87 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 24 Jan 2020 15:23:06 +0200 Subject: net: sched: Make TBF Qdisc offloadable Invoke ndo_setup_tc as appropriate to signal init / replacement, destroying and dumping of TBF Qdisc. Signed-off-by: Petr Machata Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5ec3537fbdb1..11bdf6cb30bd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -850,6 +850,7 @@ enum tc_setup_type { TC_SETUP_QDISC_TAPRIO, TC_SETUP_FT, TC_SETUP_QDISC_ETS, + TC_SETUP_QDISC_TBF, }; /* These structures hold the attributes of bpf state that are being passed -- cgit v1.2.3 From e9b4e606c2289d6610113253922bb8c9ac7f68b0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 23 Jan 2020 17:15:07 +0100 Subject: bpf: Allow to resolve bpf trampoline and dispatcher in unwind When unwinding the stack we need to identify each address to successfully continue. Adding latch tree to keep trampolines for quick lookup during the unwind. The patch uses first 48 bytes for latch tree node, leaving 4048 bytes from the rest of the page for trampoline or dispatcher generated code. It's still enough not to affect trampoline and dispatcher progs maximum counts. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200123161508.915203-3-jolsa@kernel.org --- include/linux/bpf.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a9687861fd7e..8e9ad3943cd9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -525,7 +525,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); -void *bpf_jit_alloc_exec_page(void); #define BPF_DISPATCHER_INIT(name) { \ .mutex = __MUTEX_INITIALIZER(name.mutex), \ .func = &name##func, \ @@ -557,6 +556,13 @@ void *bpf_jit_alloc_exec_page(void); #define BPF_DISPATCHER_PTR(name) (&name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); +struct bpf_image { + struct latch_tree_node tnode; + unsigned char data[]; +}; +#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image)) +bool is_bpf_image_address(unsigned long address); +void *bpf_image_alloc(void); #else static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { @@ -578,6 +584,10 @@ static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to) {} +static inline bool is_bpf_image_address(unsigned long address) +{ + return false; +} #endif struct bpf_func_info_aux { -- cgit v1.2.3 From 32efcc06d2a15fa87585614d12d6c2308cc2d3f3 Mon Sep 17 00:00:00 2001 From: Abdul Kabbani Date: Fri, 24 Jan 2020 16:34:02 -0500 Subject: tcp: export count for rehash attempts Using IPv6 flow-label to swiftly route around avoid congested or disconnected network path can greatly improve TCP reliability. This patch adds SNMP counters and a OPT_STATS counter to track both host-level and connection-level statistics. Network administrators can use these counters to evaluate the impact of this new ability better. Export count for rehash attempts to 1) two SNMP counters: TcpTimeoutRehash (rehash due to timeouts), and TcpDuplicateDataRehash (rehash due to receiving duplicate packets) 2) Timestamping API SOF_TIMESTAMPING_OPT_STATS. Signed-off-by: Abdul Kabbani Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Kevin(Yudong) Yang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4e2124607d32..1cf73e6f85ca 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -386,6 +386,8 @@ struct tcp_sock { #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0 #endif + u16 timeout_rehash; /* Timeout-triggered rehash attempts */ + u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */ /* Receiver side RTT estimation */ -- cgit v1.2.3 From 2092767168f0681aa03727448b801600a364c013 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 22 Jan 2020 00:17:54 +0100 Subject: bitmap: Introduce bitmap_cut(): cut bits and shift remaining The new bitmap function bitmap_cut() copies bits from source to destination by removing the region specified by parameters first and cut, and remapping the bits above the cut region by right shifting them. Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- include/linux/bitmap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index ff335b22f23c..f0f3a9fffa6a 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -53,6 +53,7 @@ * bitmap_find_next_zero_area_off(buf, len, pos, n, mask) as above * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n + * bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest * bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask) * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) @@ -133,6 +134,9 @@ extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits); extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits); +extern void bitmap_cut(unsigned long *dst, const unsigned long *src, + unsigned int first, unsigned int cut, + unsigned int nbits); extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, -- cgit v1.2.3 From 2b586feab44f41db605924db15c5b039535b1f9b Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 4 Dec 2019 16:39:27 +0100 Subject: thermal/drivers/Kconfig: Convert the CPU cooling device to a choice The next changes will add a new way to cool down a CPU by injecting idle cycles. With the current configuration, a CPU cooling device is the cpufreq cooling device. As we want to add a new CPU cooling device, let's convert the CPU cooling to a choice giving a list of CPU cooling devices. At this point, there is obviously only one CPU cooling device. There is no functional changes. Signed-off-by: Daniel Lezcano Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20191204153930.9128-1-daniel.lezcano@linaro.org --- include/linux/cpu_cooling.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index b74732535e4b..3cdd85f987d7 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -19,7 +19,7 @@ struct cpufreq_policy; -#ifdef CONFIG_CPU_THERMAL +#ifdef CONFIG_CPU_FREQ_THERMAL /** * cpufreq_cooling_register - function to create cpufreq cooling device. * @policy: cpufreq policy. @@ -40,7 +40,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev); struct thermal_cooling_device * of_cpufreq_cooling_register(struct cpufreq_policy *policy); -#else /* !CONFIG_CPU_THERMAL */ +#else /* !CONFIG_CPU_FREQ_THERMAL */ static inline struct thermal_cooling_device * cpufreq_cooling_register(struct cpufreq_policy *policy) { @@ -58,6 +58,6 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy) { return NULL; } -#endif /* CONFIG_CPU_THERMAL */ +#endif /* CONFIG_CPU_FREQ_THERMAL */ #endif /* __CPU_COOLING_H__ */ -- cgit v1.2.3 From a4c428e523490bf53e9c4ba2d809130c58c06ac7 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Dec 2019 23:53:16 +0100 Subject: thermal/drivers/cpu_cooling: Introduce the cpu idle cooling driver The cpu idle cooling device offers a new method to cool down a CPU by injecting idle cycles at runtime. It has some similarities with the intel power clamp driver but it is actually designed to be more generic and relying on the idle injection powercap framework. The idle injection duration is fixed while the running duration is variable. That allows to have control on the device reactivity for the user experience. An idle state powering down the CPU or the cluster will allow to drop the static leakage, thus restoring the heat capacity of the SoC. It can be set with a trip point between the hot and the critical points, giving the opportunity to prevent a hard reset of the system when the cpufreq cooling fails to cool down the CPU. With more sophisticated boards having a per core sensor, the idle cooling device allows to cool down a single core without throttling the compute capacity of several cpus belonging to the same clock line, so it could be used in collaboration with the cpufreq cooling device. Signed-off-by: Daniel Lezcano Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20191219225317.17158-2-daniel.lezcano@linaro.org --- include/linux/cpu_cooling.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index 3cdd85f987d7..65501d8f9778 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -60,4 +60,22 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy) } #endif /* CONFIG_CPU_FREQ_THERMAL */ +struct cpuidle_driver; + +#ifdef CONFIG_CPU_IDLE_THERMAL +int cpuidle_cooling_register(struct cpuidle_driver *drv); +int cpuidle_of_cooling_register(struct device_node *np, + struct cpuidle_driver *drv); +#else /* CONFIG_CPU_IDLE_THERMAL */ +static inline int cpuidle_cooling_register(struct cpuidle_driver *drv) +{ + return 0; +} +static inline int cpuidle_of_cooling_register(struct device_node *np, + struct cpuidle_driver *drv) +{ + return 0; +} +#endif /* CONFIG_CPU_IDLE_THERMAL */ + #endif /* __CPU_COOLING_H__ */ -- cgit v1.2.3 From 23affa2e29c5faa8cb59778f71e3bce2c8b3aa5c Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 19 Dec 2019 23:53:17 +0100 Subject: thermal/drivers/cpu_cooling: Rename to cpufreq_cooling As we introduced the idle injection cooling device called cpuidle_cooling, let's be consistent and rename the cpu_cooling to cpufreq_cooling as this one mitigates with OPPs changes. Signed-off-by: Daniel Lezcano Acked-by: Viresh Kumar Reviewed-by: Amit Kucheria Link: https://lore.kernel.org/r/20191219225317.17158-3-daniel.lezcano@linaro.org --- include/linux/clock_cooling.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clock_cooling.h b/include/linux/clock_cooling.h index b5cebf766e02..4b0a69863656 100644 --- a/include/linux/clock_cooling.h +++ b/include/linux/clock_cooling.h @@ -7,7 +7,7 @@ * Copyright (C) 2013 Texas Instruments Inc. * Contact: Eduardo Valentin * - * Highly based on cpu_cooling.c. + * Highly based on cpufreq_cooling.c. * Copyright (C) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com) * Copyright (C) 2012 Amit Daniel */ -- cgit v1.2.3 From 3b33583265ed3b0ae76eddbabf9d038b4076d1a9 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Sat, 25 Jan 2020 11:26:42 +0100 Subject: net: Add fraglist GRO/GSO feature flags This adds new Fraglist GRO/GSO feature flags. They will be used to configure fraglist GRO/GSO what will be implemented with some followup paches. Signed-off-by: Steffen Klassert Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 6 +++++- include/linux/netdevice.h | 1 + include/linux/skbuff.h | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 4b19c544c59a..b239507da2a0 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -53,8 +53,9 @@ enum { NETIF_F_GSO_ESP_BIT, /* ... ESP with TSO */ NETIF_F_GSO_UDP_BIT, /* ... UFO, deprecated except tuntap */ NETIF_F_GSO_UDP_L4_BIT, /* ... UDP payload GSO (not UFO) */ + NETIF_F_GSO_FRAGLIST_BIT, /* ... Fraglist GSO */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ - NETIF_F_GSO_UDP_L4_BIT, + NETIF_F_GSO_FRAGLIST_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ @@ -80,6 +81,7 @@ enum { NETIF_F_GRO_HW_BIT, /* Hardware Generic receive offload */ NETIF_F_HW_TLS_RECORD_BIT, /* Offload TLS record */ + NETIF_F_GRO_FRAGLIST_BIT, /* Fraglist GRO */ /* * Add your fresh new feature above and remember to update @@ -150,6 +152,8 @@ enum { #define NETIF_F_GSO_UDP_L4 __NETIF_F(GSO_UDP_L4) #define NETIF_F_HW_TLS_TX __NETIF_F(HW_TLS_TX) #define NETIF_F_HW_TLS_RX __NETIF_F(HW_TLS_RX) +#define NETIF_F_GRO_FRAGLIST __NETIF_F(GRO_FRAGLIST) +#define NETIF_F_GSO_FRAGLIST __NETIF_F(GSO_FRAGLIST) /* Finds the next feature with the highest number of the range of start till 0. */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 78e9c6c1b131..fcc76b890f50 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4570,6 +4570,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_FRAGLIST != (NETIF_F_GSO_FRAGLIST >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 26beae7db264..23aaaf08e1e9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -592,6 +592,8 @@ enum { SKB_GSO_UDP = 1 << 16, SKB_GSO_UDP_L4 = 1 << 17, + + SKB_GSO_FRAGLIST = 1 << 18, }; #if BITS_PER_LONG > 32 -- cgit v1.2.3 From 1a3c998f3a27ab6ecf56bdbb17e27e55fd6d47cd Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Sat, 25 Jan 2020 11:26:43 +0100 Subject: net: Add a netdev software feature set that defaults to off. The previous patch added the NETIF_F_GRO_FRAGLIST feature. This is a software feature that should default to off. Current software features default to on, so add a new feature set that defaults to off. Signed-off-by: Steffen Klassert Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index b239507da2a0..34d050bb1ae6 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -230,6 +230,9 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) /* changeable features with no special hardware requirements */ #define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO) +/* Changeable features with no special hardware requirements that defaults to off. */ +#define NETIF_F_SOFT_FEATURES_OFF NETIF_F_GRO_FRAGLIST + #define NETIF_F_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \ NETIF_F_HW_VLAN_CTAG_RX | \ NETIF_F_HW_VLAN_CTAG_TX | \ -- cgit v1.2.3 From 3a1296a38d0cf62bffb9a03c585cbd5dbf15d596 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Sat, 25 Jan 2020 11:26:44 +0100 Subject: net: Support GRO/GSO fraglist chaining. This patch adds the core functions to chain/unchain GSO skbs at the frag_list pointer. This also adds a new GSO type SKB_GSO_FRAGLIST and a is_flist flag to napi_gro_cb which indicates that this flow will be GROed by fraglist chaining. Signed-off-by: Steffen Klassert Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +++- include/linux/skbuff.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fcc76b890f50..20445f94eb1c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2326,7 +2326,8 @@ struct napi_gro_cb { /* Number of gro_receive callbacks this packet already went through */ u8 recursion_counter:4; - /* 1 bit hole */ + /* GRO is done by frag_list pointer chaining. */ + u8 is_flist:1; /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -2694,6 +2695,7 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id); int netdev_get_name(struct net *net, char *name, int ifindex); int dev_restart(struct net_device *dev); int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); +int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); static inline unsigned int skb_gro_offset(const struct sk_buff *skb) { diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 23aaaf08e1e9..3d13a4b717e9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3535,6 +3535,8 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet); bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu); bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); +struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features, + unsigned int offset); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, int write_len); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); -- cgit v1.2.3 From 93642e14bd50e59b11cf6389ce3fc243e932777a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 25 Jan 2020 12:17:08 +0100 Subject: net: introduce dev_net notifier register/unregister variants Introduce dev_net variants of netdev notifier register/unregister functions and allow per-net notifier to follow the netdevice into the namespace it is moved to. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 20445f94eb1c..4626188a754b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -939,6 +939,11 @@ struct netdev_name_node { int netdev_name_node_alt_create(struct net_device *dev, const char *name); int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); +struct netdev_net_notifier { + struct list_head list; + struct notifier_block *nb; +}; + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1793,6 +1798,10 @@ enum netdev_priv_flags { * * @wol_enabled: Wake-on-LAN is enabled * + * @net_notifier_list: List of per-net netdev notifier block + * that follow this device when it is moved + * to another network namespace. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2085,6 +2094,8 @@ struct net_device { struct lock_class_key addr_list_lock_key; bool proto_down; unsigned wol_enabled:1; + + struct list_head net_notifier_list; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -2529,6 +2540,12 @@ int unregister_netdevice_notifier(struct notifier_block *nb); int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb); int unregister_netdevice_notifier_net(struct net *net, struct notifier_block *nb); +int register_netdevice_notifier_dev_net(struct net_device *dev, + struct notifier_block *nb, + struct netdev_net_notifier *nn); +int unregister_netdevice_notifier_dev_net(struct net_device *dev, + struct notifier_block *nb, + struct netdev_net_notifier *nn); struct netdev_notifier_info { struct net_device *dev; -- cgit v1.2.3 From a85dd3a5170c8812cd835ea968ccadf0ebf1648e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 25 Jan 2020 13:42:14 +0100 Subject: net: remove eth_change_mtu All usage of this function was removed three years ago, and the function was marked as deprecated: a52ad514fdf3 ("net: deprecate eth_change_mtu, remove usage") So I think we can remove it now. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index f6564b572d77..8801f1f986e5 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -43,7 +43,6 @@ __be16 eth_header_parse_protocol(const struct sk_buff *skb); int eth_prepare_mac_addr_change(struct net_device *dev, void *p); void eth_commit_mac_addr_change(struct net_device *dev, void *p); int eth_mac_addr(struct net_device *dev, void *p); -int eth_change_mtu(struct net_device *dev, int new_mtu); int eth_validate_addr(struct net_device *dev); struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, -- cgit v1.2.3 From 6a94b8ccf6b77f005ab1b36a878e1d81df0c033e Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Sun, 26 Jan 2020 23:11:04 +0100 Subject: ethtool: provide message mask with DEBUG_GET request Implement DEBUG_GET request to get debugging settings for a device. At the moment, only message mask corresponding to message level as reported by ETHTOOL_GMSGLVL ioctl request is provided. (It is called message level in ioctl interface but almost all drivers interpret it as a bit mask.) As part of the implementation, provide symbolic names for message mask bits as ETH_SS_MSG_CLASSES string set. Signed-off-by: Michal Kubecek Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/netdevice.h | 56 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4626188a754b..a9c6b5c61d27 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3913,22 +3913,48 @@ void netif_device_attach(struct net_device *dev); */ enum { - NETIF_MSG_DRV = 0x0001, - NETIF_MSG_PROBE = 0x0002, - NETIF_MSG_LINK = 0x0004, - NETIF_MSG_TIMER = 0x0008, - NETIF_MSG_IFDOWN = 0x0010, - NETIF_MSG_IFUP = 0x0020, - NETIF_MSG_RX_ERR = 0x0040, - NETIF_MSG_TX_ERR = 0x0080, - NETIF_MSG_TX_QUEUED = 0x0100, - NETIF_MSG_INTR = 0x0200, - NETIF_MSG_TX_DONE = 0x0400, - NETIF_MSG_RX_STATUS = 0x0800, - NETIF_MSG_PKTDATA = 0x1000, - NETIF_MSG_HW = 0x2000, - NETIF_MSG_WOL = 0x4000, + NETIF_MSG_DRV_BIT, + NETIF_MSG_PROBE_BIT, + NETIF_MSG_LINK_BIT, + NETIF_MSG_TIMER_BIT, + NETIF_MSG_IFDOWN_BIT, + NETIF_MSG_IFUP_BIT, + NETIF_MSG_RX_ERR_BIT, + NETIF_MSG_TX_ERR_BIT, + NETIF_MSG_TX_QUEUED_BIT, + NETIF_MSG_INTR_BIT, + NETIF_MSG_TX_DONE_BIT, + NETIF_MSG_RX_STATUS_BIT, + NETIF_MSG_PKTDATA_BIT, + NETIF_MSG_HW_BIT, + NETIF_MSG_WOL_BIT, + + /* When you add a new bit above, update netif_msg_class_names array + * in net/ethtool/common.c + */ + NETIF_MSG_CLASS_COUNT, }; +/* Both ethtool_ops interface and internal driver implementation use u32 */ +static_assert(NETIF_MSG_CLASS_COUNT <= 32); + +#define __NETIF_MSG_BIT(bit) ((u32)1 << (bit)) +#define __NETIF_MSG(name) __NETIF_MSG_BIT(NETIF_MSG_ ## name ## _BIT) + +#define NETIF_MSG_DRV __NETIF_MSG(DRV) +#define NETIF_MSG_PROBE __NETIF_MSG(PROBE) +#define NETIF_MSG_LINK __NETIF_MSG(LINK) +#define NETIF_MSG_TIMER __NETIF_MSG(TIMER) +#define NETIF_MSG_IFDOWN __NETIF_MSG(IFDOWN) +#define NETIF_MSG_IFUP __NETIF_MSG(IFUP) +#define NETIF_MSG_RX_ERR __NETIF_MSG(RX_ERR) +#define NETIF_MSG_TX_ERR __NETIF_MSG(TX_ERR) +#define NETIF_MSG_TX_QUEUED __NETIF_MSG(TX_QUEUED) +#define NETIF_MSG_INTR __NETIF_MSG(INTR) +#define NETIF_MSG_TX_DONE __NETIF_MSG(TX_DONE) +#define NETIF_MSG_RX_STATUS __NETIF_MSG(RX_STATUS) +#define NETIF_MSG_PKTDATA __NETIF_MSG(PKTDATA) +#define NETIF_MSG_HW __NETIF_MSG(HW) +#define NETIF_MSG_WOL __NETIF_MSG(WOL) #define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV) #define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE) -- cgit v1.2.3 From 2924e0699963b839f88f8c4e855929ea49185870 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 27 Jan 2020 15:26:07 +0200 Subject: qed: FW 8.42.2.0 Internal ram offsets modifications IRO stands for internal RAM offsets. Updating the FW binary produces different iro offsets. This file contains the different values, and a new representation of the values. Update the FW version Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 03f59a28fefd..3f437e826a4c 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -109,8 +109,8 @@ #define MAX_NUM_LL2_TX_STATS_COUNTERS 48 #define FW_MAJOR_VERSION 8 -#define FW_MINOR_VERSION 37 -#define FW_REVISION_VERSION 7 +#define FW_MINOR_VERSION 42 +#define FW_REVISION_VERSION 2 #define FW_ENGINEERING_VERSION 0 /***********************/ -- cgit v1.2.3 From 997af5df230e3288ec1f5b332955f9be643e450b Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 27 Jan 2020 15:26:12 +0200 Subject: qed: FW 8.42.2.0 Additional ll2 type LL2 queues were a limited resource due to FW constraints. This FW introduced a new resource which is a context based ll2 queue (memory on host). The additional ll2 queues are required for RDMA SRIOV. The code refers to the previous ll2 queues as ram-based or legacy, and the new queues as ctx-based. This change decreased the "legacy" ram-based queues therefore the first ll2 queue used for iWARP was converted to the ctx-based ll2 queue. This feature also exposed a bug in the DIRECT_REG_WR64 macro implementation which didn't have an effect in other use cases. Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 15 +++++++++++++-- include/linux/qed/qed_if.h | 4 +++- include/linux/qed/qed_ll2_if.h | 7 +++++++ 3 files changed, 23 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 3f437e826a4c..a2b7826b36f0 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -105,8 +105,15 @@ #define CORE_SPQE_PAGE_SIZE_BYTES 4096 -#define MAX_NUM_LL2_RX_QUEUES 48 -#define MAX_NUM_LL2_TX_STATS_COUNTERS 48 +/* Number of LL2 RAM based queues */ +#define MAX_NUM_LL2_RX_RAM_QUEUES 32 + +/* Number of LL2 context based queues */ +#define MAX_NUM_LL2_RX_CTX_QUEUES 208 +#define MAX_NUM_LL2_RX_QUEUES \ + (MAX_NUM_LL2_RX_RAM_QUEUES + MAX_NUM_LL2_RX_CTX_QUEUES) + +#define MAX_NUM_LL2_TX_STATS_COUNTERS 48 #define FW_MAJOR_VERSION 8 #define FW_MINOR_VERSION 42 @@ -340,6 +347,10 @@ #define DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD (DQ_PWM_OFFSET_TCM16_BASE + 1) #define DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD (DQ_PWM_OFFSET_TCM16_BASE + 3) +/* DQ_DEMS_AGG_VAL_BASE */ +#define DQ_PWM_OFFSET_TCM_LL2_PROD_UPDATE \ + (DQ_PWM_OFFSET_TCM32_BASE + DQ_TCM_AGG_VAL_SEL_REG9 - 4) + #define DQ_REGION_SHIFT (12) /* DPM */ diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index b5db1ee96d78..9bcb2f419004 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -463,7 +463,7 @@ enum qed_db_rec_space { #define DIRECT_REG_RD(reg_addr) readl((void __iomem *)(reg_addr)) -#define DIRECT_REG_WR64(reg_addr, val) writeq((u32)val, \ +#define DIRECT_REG_WR64(reg_addr, val) writeq((u64)val, \ (void __iomem *)(reg_addr)) #define QED_COALESCE_MAX 0x1FF @@ -1177,6 +1177,8 @@ struct qed_common_ops { #define GET_FIELD(value, name) \ (((value) >> (name ## _SHIFT)) & name ## _MASK) +#define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) + /* Debug print definitions */ #define DP_ERR(cdev, fmt, ...) \ do { \ diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index 5eb022953aca..1313c34d9a68 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -52,6 +52,12 @@ enum qed_ll2_conn_type { QED_LL2_TYPE_ROCE, QED_LL2_TYPE_IWARP, QED_LL2_TYPE_RESERVED3, + MAX_QED_LL2_CONN_TYPE +}; + +enum qed_ll2_rx_conn_type { + QED_LL2_RX_TYPE_LEGACY, + QED_LL2_RX_TYPE_CTX, MAX_QED_LL2_RX_CONN_TYPE }; @@ -165,6 +171,7 @@ struct qed_ll2_cbs { }; struct qed_ll2_acquire_data_inputs { + enum qed_ll2_rx_conn_type rx_conn_type; enum qed_ll2_conn_type conn_type; u16 mtu; u16 rx_num_desc; -- cgit v1.2.3 From 1392d19ff1d6ddd370cefa73b552a0262f9c35ea Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 27 Jan 2020 15:26:13 +0200 Subject: qed: Add abstraction for different hsi values per chip The number of BTB blocks was modified to be different between the two chip flavors supported (BB/K2) as a result, this lead to a re-write of selecting the default hsi value based on the chip. This patch creates a lookup table for hsi values per chip rather than ask again and again for every value. Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index a2b7826b36f0..718ce72e5965 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -663,8 +663,8 @@ #define PBF_MAX_CMD_LINES 3328 /* Number of BTB blocks. Each block is 256B. */ -#define BTB_MAX_BLOCKS 1440 - +#define BTB_MAX_BLOCKS_BB 1440 +#define BTB_MAX_BLOCKS_K2 1840 /*****************/ /* PRS CONSTANTS */ /*****************/ -- cgit v1.2.3 From 6459d93619b5bc21f775e7eb12bc4d051743d7aa Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 27 Jan 2020 15:26:14 +0200 Subject: qed: FW 8.42.2.0 iscsi/fcoe changes - Remove struct iscsi_slow_path_hdr and field fw_cid from several structs - Remove struct iscsi_spe_func_dstry - Remove fields pbe_page_size_log and pbl_page_size_log from struct iscsi_conn_offload_param Signed-off-by: Manish Rangankar Signed-off-by: Saurav Kashyap Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/iscsi_common.h | 64 ++++++++++++++------------------------ include/linux/qed/storage_common.h | 3 +- 2 files changed, 26 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h index 66aba505ec56..2f0a771a9176 100644 --- a/include/linux/qed/iscsi_common.h +++ b/include/linux/qed/iscsi_common.h @@ -999,7 +999,6 @@ struct iscsi_conn_offload_params { struct regpair r2tq_pbl_addr; struct regpair xhq_pbl_addr; struct regpair uhq_pbl_addr; - __le32 initial_ack; __le16 physical_q0; __le16 physical_q1; u8 flags; @@ -1011,10 +1010,10 @@ struct iscsi_conn_offload_params { #define ISCSI_CONN_OFFLOAD_PARAMS_RESTRICTED_MODE_SHIFT 2 #define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_MASK 0x1F #define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_SHIFT 3 - u8 pbl_page_size_log; - u8 pbe_page_size_log; u8 default_cq; + __le16 reserved0; __le32 stat_sn; + __le32 initial_ack; }; /* iSCSI connection statistics */ @@ -1029,25 +1028,14 @@ struct iscsi_conn_stats_params { __le32 reserved; }; -/* spe message header */ -struct iscsi_slow_path_hdr { - u8 op_code; - u8 flags; -#define ISCSI_SLOW_PATH_HDR_RESERVED0_MASK 0xF -#define ISCSI_SLOW_PATH_HDR_RESERVED0_SHIFT 0 -#define ISCSI_SLOW_PATH_HDR_LAYER_CODE_MASK 0x7 -#define ISCSI_SLOW_PATH_HDR_LAYER_CODE_SHIFT 4 -#define ISCSI_SLOW_PATH_HDR_RESERVED1_MASK 0x1 -#define ISCSI_SLOW_PATH_HDR_RESERVED1_SHIFT 7 -}; /* iSCSI connection update params passed by driver to FW in ISCSI update *ramrod. */ struct iscsi_conn_update_ramrod_params { - struct iscsi_slow_path_hdr hdr; + __le16 reserved0; __le16 conn_id; - __le32 fw_cid; + __le32 reserved1; u8 flags; #define ISCSI_CONN_UPDATE_RAMROD_PARAMS_HD_EN_MASK 0x1 #define ISCSI_CONN_UPDATE_RAMROD_PARAMS_HD_EN_SHIFT 0 @@ -1065,7 +1053,7 @@ struct iscsi_conn_update_ramrod_params { #define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DIF_ON_IMM_EN_SHIFT 6 #define ISCSI_CONN_UPDATE_RAMROD_PARAMS_LUN_MAPPER_EN_MASK 0x1 #define ISCSI_CONN_UPDATE_RAMROD_PARAMS_LUN_MAPPER_EN_SHIFT 7 - u8 reserved0[3]; + u8 reserved3[3]; __le32 max_seq_size; __le32 max_send_pdu_length; __le32 max_recv_pdu_length; @@ -1251,22 +1239,22 @@ enum iscsi_ramrod_cmd_id { /* iSCSI connection termination request */ struct iscsi_spe_conn_mac_update { - struct iscsi_slow_path_hdr hdr; + __le16 reserved0; __le16 conn_id; - __le32 fw_cid; + __le32 reserved1; __le16 remote_mac_addr_lo; __le16 remote_mac_addr_mid; __le16 remote_mac_addr_hi; - u8 reserved0[2]; + u8 reserved2[2]; }; /* iSCSI and TCP connection (Option 1) offload params passed by driver to FW in * iSCSI offload ramrod. */ struct iscsi_spe_conn_offload { - struct iscsi_slow_path_hdr hdr; + __le16 reserved0; __le16 conn_id; - __le32 fw_cid; + __le32 reserved1; struct iscsi_conn_offload_params iscsi; struct tcp_offload_params tcp; }; @@ -1275,44 +1263,36 @@ struct iscsi_spe_conn_offload { * iSCSI offload ramrod. */ struct iscsi_spe_conn_offload_option2 { - struct iscsi_slow_path_hdr hdr; + __le16 reserved0; __le16 conn_id; - __le32 fw_cid; + __le32 reserved1; struct iscsi_conn_offload_params iscsi; struct tcp_offload_params_opt2 tcp; }; /* iSCSI collect connection statistics request */ struct iscsi_spe_conn_statistics { - struct iscsi_slow_path_hdr hdr; + __le16 reserved0; __le16 conn_id; - __le32 fw_cid; + __le32 reserved1; u8 reset_stats; - u8 reserved0[7]; + u8 reserved2[7]; struct regpair stats_cnts_addr; }; /* iSCSI connection termination request */ struct iscsi_spe_conn_termination { - struct iscsi_slow_path_hdr hdr; + __le16 reserved0; __le16 conn_id; - __le32 fw_cid; + __le32 reserved1; u8 abortive; - u8 reserved0[7]; + u8 reserved2[7]; struct regpair queue_cnts_addr; struct regpair query_params_addr; }; -/* iSCSI firmware function destroy parameters */ -struct iscsi_spe_func_dstry { - struct iscsi_slow_path_hdr hdr; - __le16 reserved0; - __le32 reserved1; -}; - /* iSCSI firmware function init parameters */ struct iscsi_spe_func_init { - struct iscsi_slow_path_hdr hdr; __le16 half_way_close_timeout; u8 num_sq_pages_in_ring; u8 num_r2tq_pages_in_ring; @@ -1324,8 +1304,12 @@ struct iscsi_spe_func_init { #define ISCSI_SPE_FUNC_INIT_RESERVED0_MASK 0x7F #define ISCSI_SPE_FUNC_INIT_RESERVED0_SHIFT 1 struct iscsi_debug_modes debug_mode; - __le16 reserved1; - __le32 reserved2; + u8 params; +#define ISCSI_SPE_FUNC_INIT_MAX_SYN_RT_MASK 0xF +#define ISCSI_SPE_FUNC_INIT_MAX_SYN_RT_SHIFT 0 +#define ISCSI_SPE_FUNC_INIT_RESERVED1_MASK 0xF +#define ISCSI_SPE_FUNC_INIT_RESERVED1_SHIFT 4 + u8 reserved2[7]; struct scsi_init_func_params func_params; struct scsi_init_func_queues q_params; }; diff --git a/include/linux/qed/storage_common.h b/include/linux/qed/storage_common.h index 505c0b48a761..9a973ffbbff5 100644 --- a/include/linux/qed/storage_common.h +++ b/include/linux/qed/storage_common.h @@ -107,8 +107,9 @@ struct scsi_drv_cmdq { struct scsi_init_func_params { __le16 num_tasks; u8 log_page_size; + u8 log_page_size_conn; u8 debug_mode; - u8 reserved2[12]; + u8 reserved2[11]; }; /* SCSI RQ/CQ/CMDQ firmware function init parameters */ -- cgit v1.2.3 From 0500a70d6e071040ffdaadebb966986afa83c5e9 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 27 Jan 2020 15:26:15 +0200 Subject: qed: FW 8.42.2.0 HSI changes This patch contains several HSI changes. The changes are part of features like RDMA VF and OVS, the patch also contains a fix to how the init code determines if the dmae is ready to be used. Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 21 +++++++----- include/linux/qed/eth_common.h | 78 ++++++++++++++++++++++++++++++------------ 2 files changed, 68 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 718ce72e5965..2c4737e6694a 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -76,7 +76,6 @@ #define FW_ASSERT_GENERAL_ATTN_IDX 32 -#define MAX_PINNED_CCFC 32 /* Queue Zone sizes in bytes */ #define TSTORM_QZONE_SIZE 8 @@ -139,10 +138,10 @@ #define MAX_NUM_VFS (MAX_NUM_VFS_K2) #define MAX_NUM_FUNCTIONS_BB (MAX_NUM_PFS_BB + MAX_NUM_VFS_BB) -#define MAX_NUM_FUNCTIONS (MAX_NUM_PFS + MAX_NUM_VFS) #define MAX_FUNCTION_NUMBER_BB (MAX_NUM_PFS + MAX_NUM_VFS_BB) -#define MAX_FUNCTION_NUMBER (MAX_NUM_PFS + MAX_NUM_VFS) +#define MAX_FUNCTION_NUMBER_K2 (MAX_NUM_PFS + MAX_NUM_VFS_K2) +#define MAX_NUM_FUNCTIONS (MAX_FUNCTION_NUMBER_K2) #define MAX_NUM_VPORTS_K2 (208) #define MAX_NUM_VPORTS_BB (160) @@ -229,6 +228,7 @@ #define DQ_XCM_TOE_TX_BD_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 #define DQ_XCM_TOE_MORE_TO_SEND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG3 #define DQ_XCM_TOE_LOCAL_ADV_WND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG4 +#define DQ_XCM_ROCE_ACK_EDPM_DORQ_SEQ_CMD DQ_XCM_AGG_VAL_SEL_WORD5 /* UCM agg val selection (HW) */ #define DQ_UCM_AGG_VAL_SEL_WORD0 0 @@ -406,6 +406,7 @@ /* Number of Protocol Indices per Status Block */ #define PIS_PER_SB_E4 12 +#define MAX_PIS_PER_SB PIS_PER_SB #define CAU_HC_STOPPED_STATE 3 #define CAU_HC_DISABLE_STATE 4 @@ -436,8 +437,6 @@ #define IGU_MEM_PBA_MSIX_RESERVED_UPPER 0x03ff #define IGU_CMD_INT_ACK_BASE 0x0400 -#define IGU_CMD_INT_ACK_UPPER (IGU_CMD_INT_ACK_BASE + \ - MAX_TOT_SB_PER_PATH - 1) #define IGU_CMD_INT_ACK_RESERVED_UPPER 0x05ff #define IGU_CMD_ATTN_BIT_UPD_UPPER 0x05f0 @@ -450,8 +449,6 @@ #define IGU_REG_SISR_MDPC_WOMASK_UPPER 0x05f6 #define IGU_CMD_PROD_UPD_BASE 0x0600 -#define IGU_CMD_PROD_UPD_UPPER (IGU_CMD_PROD_UPD_BASE +\ - MAX_TOT_SB_PER_PATH - 1) #define IGU_CMD_PROD_UPD_RESERVED_UPPER 0x07ff /*****************/ @@ -741,6 +738,8 @@ enum protocol_type { PROTOCOLID_PREROCE, PROTOCOLID_COMMON, PROTOCOLID_RESERVED1, + PROTOCOLID_RDMA, + PROTOCOLID_SCSI, MAX_PROTOCOL_TYPE }; @@ -761,6 +760,10 @@ union rdma_eqe_data { struct rdma_eqe_destroy_qp rdma_destroy_qp_data; }; +struct tstorm_queue_zone { + __le32 reserved[2]; +}; + /* Ustorm Queue Zone */ struct ustorm_eth_queue_zone { struct coalescing_timeset int_coalescing_timeset; @@ -883,8 +886,8 @@ struct db_l2_dpm_data { #define DB_L2_DPM_DATA_RESERVED0_SHIFT 27 #define DB_L2_DPM_DATA_SGE_NUM_MASK 0x7 #define DB_L2_DPM_DATA_SGE_NUM_SHIFT 28 -#define DB_L2_DPM_DATA_GFS_SRC_EN_MASK 0x1 -#define DB_L2_DPM_DATA_GFS_SRC_EN_SHIFT 31 +#define DB_L2_DPM_DATA_TGFS_SRC_EN_MASK 0x1 +#define DB_L2_DPM_DATA_TGFS_SRC_EN_SHIFT 31 }; /* Structure for SGE in a DPM doorbell of type DPM_L2_BD */ diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h index d9416ad5ef59..95f5fd615852 100644 --- a/include/linux/qed/eth_common.h +++ b/include/linux/qed/eth_common.h @@ -38,9 +38,11 @@ /********************/ #define ETH_HSI_VER_MAJOR 3 -#define ETH_HSI_VER_MINOR 10 +#define ETH_HSI_VER_MINOR 11 -#define ETH_HSI_VER_NO_PKT_LEN_TUNN 5 +#define ETH_HSI_VER_NO_PKT_LEN_TUNN 5 +/* Maximum number of pinned L2 connections (CIDs) */ +#define ETH_PINNED_CONN_MAX_NUM 32 #define ETH_CACHE_LINE_SIZE 64 #define ETH_RX_CQE_GAP 32 @@ -61,6 +63,7 @@ #define ETH_TX_MIN_BDS_PER_TUNN_IPV6_WITH_EXT_PKT 3 #define ETH_TX_MIN_BDS_PER_IPV6_WITH_EXT_PKT 2 #define ETH_TX_MIN_BDS_PER_PKT_W_LOOPBACK_MODE 2 +#define ETH_TX_MIN_BDS_PER_PKT_W_VPORT_FORWARDING 4 #define ETH_TX_MAX_NON_LSO_PKT_LEN (9700 - (4 + 4 + 12 + 8)) #define ETH_TX_MAX_LSO_HDR_BYTES 510 #define ETH_TX_LSO_WINDOW_BDS_NUM (18 - 1) @@ -75,9 +78,8 @@ #define ETH_NUM_STATISTIC_COUNTERS_QUAD_VF_ZONE \ (ETH_NUM_STATISTIC_COUNTERS - 3 * MAX_NUM_VFS / 4) -/* Maximum number of buffers, used for RX packet placement */ #define ETH_RX_MAX_BUFF_PER_PKT 5 -#define ETH_RX_BD_THRESHOLD 12 +#define ETH_RX_BD_THRESHOLD 16 /* Num of MAC/VLAN filters */ #define ETH_NUM_MAC_FILTERS 512 @@ -96,24 +98,24 @@ #define ETH_RSS_ENGINE_NUM_BB 127 /* TPA constants */ -#define ETH_TPA_MAX_AGGS_NUM 64 -#define ETH_TPA_CQE_START_LEN_LIST_SIZE ETH_RX_MAX_BUFF_PER_PKT -#define ETH_TPA_CQE_CONT_LEN_LIST_SIZE 6 -#define ETH_TPA_CQE_END_LEN_LIST_SIZE 4 +#define ETH_TPA_MAX_AGGS_NUM 64 +#define ETH_TPA_CQE_START_BW_LEN_LIST_SIZE 2 +#define ETH_TPA_CQE_CONT_LEN_LIST_SIZE 6 +#define ETH_TPA_CQE_END_LEN_LIST_SIZE 4 /* Control frame check constants */ -#define ETH_CTL_FRAME_ETH_TYPE_NUM 4 +#define ETH_CTL_FRAME_ETH_TYPE_NUM 4 /* GFS constants */ #define ETH_GFT_TRASHCAN_VPORT 0x1FF /* GFT drop flow vport number */ /* Destination port mode */ -enum dest_port_mode { - DEST_PORT_PHY, - DEST_PORT_LOOPBACK, - DEST_PORT_PHY_LOOPBACK, - DEST_PORT_DROP, - MAX_DEST_PORT_MODE +enum dst_port_mode { + DST_PORT_PHY, + DST_PORT_LOOPBACK, + DST_PORT_PHY_LOOPBACK, + DST_PORT_DROP, + MAX_DST_PORT_MODE }; /* Ethernet address type */ @@ -167,8 +169,8 @@ struct eth_tx_data_2nd_bd { #define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_SHIFT 0 #define ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_MASK 0x3 #define ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_SHIFT 4 -#define ETH_TX_DATA_2ND_BD_DEST_PORT_MODE_MASK 0x3 -#define ETH_TX_DATA_2ND_BD_DEST_PORT_MODE_SHIFT 6 +#define ETH_TX_DATA_2ND_BD_DST_PORT_MODE_MASK 0x3 +#define ETH_TX_DATA_2ND_BD_DST_PORT_MODE_SHIFT 6 #define ETH_TX_DATA_2ND_BD_START_BD_MASK 0x1 #define ETH_TX_DATA_2ND_BD_START_BD_SHIFT 8 #define ETH_TX_DATA_2ND_BD_TUNN_TYPE_MASK 0x3 @@ -244,8 +246,9 @@ struct eth_fast_path_rx_reg_cqe { struct eth_tunnel_parsing_flags tunnel_pars_flags; u8 bd_num; u8 reserved; - __le16 flow_id; - u8 reserved1[11]; + __le16 reserved2; + __le32 flow_id_or_resource_id; + u8 reserved1[7]; struct eth_pmd_flow_flags pmd_flags; }; @@ -296,9 +299,10 @@ struct eth_fast_path_rx_tpa_start_cqe { struct eth_tunnel_parsing_flags tunnel_pars_flags; u8 tpa_agg_index; u8 header_len; - __le16 ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE]; - __le16 flow_id; - u8 reserved; + __le16 bw_ext_bd_len_list[ETH_TPA_CQE_START_BW_LEN_LIST_SIZE]; + __le16 reserved2; + __le32 flow_id_or_resource_id; + u8 reserved[3]; struct eth_pmd_flow_flags pmd_flags; }; @@ -407,6 +411,29 @@ struct eth_tx_3rd_bd { struct eth_tx_data_3rd_bd data; }; +/* The parsing information data for the forth tx bd of a given packet. */ +struct eth_tx_data_4th_bd { + u8 dst_vport_id; + u8 reserved4; + __le16 bitfields; +#define ETH_TX_DATA_4TH_BD_DST_VPORT_ID_VALID_MASK 0x1 +#define ETH_TX_DATA_4TH_BD_DST_VPORT_ID_VALID_SHIFT 0 +#define ETH_TX_DATA_4TH_BD_RESERVED1_MASK 0x7F +#define ETH_TX_DATA_4TH_BD_RESERVED1_SHIFT 1 +#define ETH_TX_DATA_4TH_BD_START_BD_MASK 0x1 +#define ETH_TX_DATA_4TH_BD_START_BD_SHIFT 8 +#define ETH_TX_DATA_4TH_BD_RESERVED2_MASK 0x7F +#define ETH_TX_DATA_4TH_BD_RESERVED2_SHIFT 9 + __le16 reserved3; +}; + +/* The forth tx bd of a given packet */ +struct eth_tx_4th_bd { + struct regpair addr; /* Single continuous buffer */ + __le16 nbytes; /* Number of bytes in this BD */ + struct eth_tx_data_4th_bd data; /* Parsing information data */ +}; + /* Complementary information for the regular tx bd of a given packet */ struct eth_tx_data_bd { __le16 reserved0; @@ -431,6 +458,7 @@ union eth_tx_bd_types { struct eth_tx_1st_bd first_bd; struct eth_tx_2nd_bd second_bd; struct eth_tx_3rd_bd third_bd; + struct eth_tx_4th_bd fourth_bd; struct eth_tx_bd reg_bd; }; @@ -443,6 +471,12 @@ enum eth_tx_tunn_type { MAX_ETH_TX_TUNN_TYPE }; +/* Mstorm Queue Zone */ +struct mstorm_eth_queue_zone { + struct eth_rx_prod_data rx_producers; + __le32 reserved[3]; +}; + /* Ystorm Queue Zone */ struct xstorm_eth_queue_zone { struct coalescing_timeset int_coalescing_timeset; -- cgit v1.2.3 From 8a52bbab39c9791480cbae86c69ad0d47f62972e Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 27 Jan 2020 15:26:17 +0200 Subject: qed: Debug feature: ilt and mdump Part of the FW drop includes new debug capabilities implemented in the qed_debug file. This patch dumps additional information during ethtool -d for better debugging. The data dumped is the ilt (internal logical table) and information gathered by the management firmware incase there was a crash and driver was not able to extract the information (mdump). Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 9bcb2f419004..1b27c22d39af 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -159,6 +159,7 @@ struct qed_dcbx_get { enum qed_nvm_images { QED_NVM_IMAGE_ISCSI_CFG, QED_NVM_IMAGE_FCOE_CFG, + QED_NVM_IMAGE_MDUMP, QED_NVM_IMAGE_NVM_CFG1, QED_NVM_IMAGE_DEFAULT_CFG, QED_NVM_IMAGE_NVM_META, -- cgit v1.2.3 From 2d22bc8354b15abe413dff76cfe0f7aeb88ef9aa Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 27 Jan 2020 15:26:19 +0200 Subject: qed: FW 8.42.2.0 debug features Add to debug dump more information on the platform it was collected from (pci func, path id). Provide human readable reg fifo erros. Removed static debug arrays from HSI Functions, and move them to the hwfn. Some structures were slightly changed (removing reserved chip id for example) which lead to many long initializations being modified with one parameter less during initialization. This leads to some long diffs that don't really change anything. Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 1b27c22d39af..8f29e0d8a7b3 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -1178,6 +1178,15 @@ struct qed_common_ops { #define GET_FIELD(value, name) \ (((value) >> (name ## _SHIFT)) & name ## _MASK) +#define GET_MFW_FIELD(name, field) \ + (((name) & (field ## _MASK)) >> (field ## _OFFSET)) + +#define SET_MFW_FIELD(name, field, value) \ + do { \ + (name) &= ~(field ## _MASK); \ + (name) |= (((value) << (field ## _OFFSET)) & (field ## _MASK));\ + } while (0) + #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) /* Debug print definitions */ -- cgit v1.2.3