From 30bfce109420912f201d4f295f9130ff44f04b41 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Jan 2021 13:06:36 -0800 Subject: net: remove ndo_udp_tunnel_* callbacks All UDP tunnel port management is now routed via udp_tunnel_nic infra directly. Remove the old callbacks. Reviewed-by: Alexander Duyck Reviewed-by: Jacob Keller Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 259be67644e3..1ec3ac5d5bbf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1213,19 +1213,6 @@ struct netdev_net_notifier { * struct netdev_phys_item_id *ppid) * Called to get the parent ID of the physical port of this device. * - * void (*ndo_udp_tunnel_add)(struct net_device *dev, - * struct udp_tunnel_info *ti); - * Called by UDP tunnel to notify a driver about the UDP port and socket - * address family that a UDP tunnel is listnening to. It is called only - * when a new port starts listening. The operation is protected by the - * RTNL. - * - * void (*ndo_udp_tunnel_del)(struct net_device *dev, - * struct udp_tunnel_info *ti); - * Called by UDP tunnel to notify the driver about a UDP port and socket - * address family that the UDP tunnel is not listening to anymore. The - * operation is protected by the RTNL. - * * void* (*ndo_dfwd_add_station)(struct net_device *pdev, * struct net_device *dev) * Called by upper layer devices to accelerate switching or other @@ -1464,10 +1451,6 @@ struct net_device_ops { struct netdev_phys_item_id *ppid); int (*ndo_get_phys_port_name)(struct net_device *dev, char *name, size_t len); - void (*ndo_udp_tunnel_add)(struct net_device *dev, - struct udp_tunnel_info *ti); - void (*ndo_udp_tunnel_del)(struct net_device *dev, - struct udp_tunnel_info *ti); void* (*ndo_dfwd_add_station)(struct net_device *pdev, struct net_device *dev); void (*ndo_dfwd_del_station)(struct net_device *pdev, -- cgit v1.2.3 From 8b86850bf9ef259e194ce49c776aded3b8c281fb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Jan 2021 09:09:44 -0800 Subject: net: phy: bcm7xxx: Add an entry for BCM72116 BCM72116 features a 28nm integrated EPHY, add an entry to match this PHY OUI. Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210106170944.1253046-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index d0bd226d6bd9..de9430d55c90 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -31,6 +31,7 @@ #define PHY_ID_BCM89610 0x03625cd0 #define PHY_ID_BCM72113 0x35905310 +#define PHY_ID_BCM72116 0x35905350 #define PHY_ID_BCM7250 0xae025280 #define PHY_ID_BCM7255 0xae025120 #define PHY_ID_BCM7260 0xae025190 -- cgit v1.2.3 From f46b9b8ee89b52f6ee1f4da49d392e609746ab10 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 7 Jan 2021 03:24:00 +0200 Subject: net: dsa: move the Broadcom tag information in a separate header file It is a bit strange to see something as specific as Broadcom SYSTEMPORT bits in the main DSA include file. Move these away into a separate header, and have the tagger and the SYSTEMPORT driver include them. Signed-off-by: Vladimir Oltean Acked-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- include/linux/dsa/brcm.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/dsa/brcm.h (limited to 'include/linux') diff --git a/include/linux/dsa/brcm.h b/include/linux/dsa/brcm.h new file mode 100644 index 000000000000..47545a948784 --- /dev/null +++ b/include/linux/dsa/brcm.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2014 Broadcom Corporation + */ + +/* Included by drivers/net/ethernet/broadcom/bcmsysport.c and + * net/dsa/tag_brcm.c + */ +#ifndef _NET_DSA_BRCM_H +#define _NET_DSA_BRCM_H + +/* Broadcom tag specific helpers to insert and extract queue/port number */ +#define BRCM_TAG_SET_PORT_QUEUE(p, q) ((p) << 8 | q) +#define BRCM_TAG_GET_PORT(v) ((v) >> 8) +#define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff) + +#endif -- cgit v1.2.3 From 424f481f06dc7f1528447cc3224f5fd3c5e11f65 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Wed, 6 Jan 2021 14:18:29 -0800 Subject: skbuff: remove unused skb_zcopy_abort function skb_zcopy_abort() has no in-tree consumers, remove it. Signed-off-by: Jonathan Lemon Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 333bcdc39635..3ca8d7c7b30c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1490,17 +1490,6 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) } } -/* Abort a zerocopy operation and revert zckey on error in send syscall */ -static inline void skb_zcopy_abort(struct sk_buff *skb) -{ - struct ubuf_info *uarg = skb_zcopy(skb); - - if (uarg) { - sock_zerocopy_put_abort(uarg, false); - skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; - } -} - static inline void skb_mark_not_on_list(struct sk_buff *skb) { skb->next = NULL; -- cgit v1.2.3 From 75518851a2a0c047def521aaf87db401de098352 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Wed, 6 Jan 2021 14:18:31 -0800 Subject: skbuff: Push status and refcounts into sock_zerocopy_callback Before this change, the caller of sock_zerocopy_callback would need to save the zerocopy status, decrement and check the refcount, and then call the callback function - the callback was only invoked when the refcount reached zero. Now, the caller just passes the status into the callback function, which saves the status and handles its own refcounts. This makes the behavior of the sock_zerocopy_callback identical to the tpacket and vhost callbacks. Signed-off-by: Jonathan Lemon Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3ca8d7c7b30c..52e96c35f5af 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1479,9 +1479,6 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) if (uarg) { if (skb_zcopy_is_nouarg(skb)) { /* no notification callback */ - } else if (uarg->callback == sock_zerocopy_callback) { - uarg->zerocopy = uarg->zerocopy && zerocopy; - sock_zerocopy_put(uarg); } else { uarg->callback(uarg, zerocopy); } -- cgit v1.2.3 From 59776362b14b81dc45c6bbd4e7cb3871f390fc1b Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Wed, 6 Jan 2021 14:18:32 -0800 Subject: skbuff: replace sock_zerocopy_put() with skb_zcopy_put() Replace sock_zerocopy_put with the generic skb_zcopy_put() function. Pass 'true' as the success argument, as this is identical to no change. Signed-off-by: Jonathan Lemon Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 52e96c35f5af..a6c86839035b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -496,7 +496,6 @@ static inline void sock_zerocopy_get(struct ubuf_info *uarg) refcount_inc(&uarg->refcnt); } -void sock_zerocopy_put(struct ubuf_info *uarg); void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); void sock_zerocopy_callback(struct ubuf_info *uarg, bool success); @@ -1471,6 +1470,12 @@ static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb) return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL); } +static inline void skb_zcopy_put(struct ubuf_info *uarg) +{ + if (uarg) + uarg->callback(uarg, true); +} + /* Release a reference on a zerocopy structure */ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) { -- cgit v1.2.3 From e76d46cfff8d2335f363f58bd7387de4059d871d Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Wed, 6 Jan 2021 14:18:33 -0800 Subject: skbuff: replace sock_zerocopy_get with skb_zcopy_get Rename the get routines for consistency. Signed-off-by: Jonathan Lemon Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a6c86839035b..5b8a53ab51fd 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -491,11 +491,6 @@ struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size); struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, struct ubuf_info *uarg); -static inline void sock_zerocopy_get(struct ubuf_info *uarg) -{ - refcount_inc(&uarg->refcnt); -} - void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); void sock_zerocopy_callback(struct ubuf_info *uarg, bool success); @@ -1441,6 +1436,11 @@ static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb) return is_zcopy ? skb_uarg(skb) : NULL; } +static inline void skb_zcopy_get(struct ubuf_info *uarg) +{ + refcount_inc(&uarg->refcnt); +} + static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg, bool *have_ref) { @@ -1448,7 +1448,7 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg, if (unlikely(have_ref && *have_ref)) *have_ref = false; else - sock_zerocopy_get(uarg); + skb_zcopy_get(uarg); skb_shinfo(skb)->destructor_arg = uarg; skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG; } -- cgit v1.2.3 From 36177832f42d9c7b222ab039678398a9d4070fff Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Wed, 6 Jan 2021 14:18:34 -0800 Subject: skbuff: Add skb parameter to the ubuf zerocopy callback Add an optional skb parameter to the zerocopy callback parameter, which is passed down from skb_zcopy_clear(). This gives access to the original skb, which is needed for upcoming RX zero-copy error handling. Signed-off-by: Jonathan Lemon Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5b8a53ab51fd..b23c3b4b3209 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -461,7 +461,8 @@ enum { * The desc field is used to track userspace buffer index. */ struct ubuf_info { - void (*callback)(struct ubuf_info *, bool zerocopy_success); + void (*callback)(struct sk_buff *, struct ubuf_info *, + bool zerocopy_success); union { struct { unsigned long desc; @@ -493,7 +494,8 @@ struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); -void sock_zerocopy_callback(struct ubuf_info *uarg, bool success); +void sock_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, + bool success); int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len); int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, @@ -1473,20 +1475,17 @@ static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb) static inline void skb_zcopy_put(struct ubuf_info *uarg) { if (uarg) - uarg->callback(uarg, true); + uarg->callback(NULL, uarg, true); } /* Release a reference on a zerocopy structure */ -static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) +static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success) { struct ubuf_info *uarg = skb_zcopy(skb); if (uarg) { - if (skb_zcopy_is_nouarg(skb)) { - /* no notification callback */ - } else { - uarg->callback(uarg, zerocopy); - } + if (!skb_zcopy_is_nouarg(skb)) + uarg->callback(skb, uarg, zerocopy_success); skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; } -- cgit v1.2.3 From 236a6b1cd585a408139550201343f3f16f9324b9 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Wed, 6 Jan 2021 14:18:35 -0800 Subject: skbuff: Call sock_zerocopy_put_abort from skb_zcopy_put_abort The sock_zerocopy_put_abort function contains logic which is specific to the current zerocopy implementation. Add a wrapper which checks the callback and dispatches apppropriately. Signed-off-by: Jonathan Lemon Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b23c3b4b3209..9f7393167f0a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1478,6 +1478,16 @@ static inline void skb_zcopy_put(struct ubuf_info *uarg) uarg->callback(NULL, uarg, true); } +static inline void skb_zcopy_put_abort(struct ubuf_info *uarg, bool have_uref) +{ + if (uarg) { + if (uarg->callback == sock_zerocopy_callback) + sock_zerocopy_put_abort(uarg, have_uref); + else if (have_uref) + skb_zcopy_put(uarg); + } +} + /* Release a reference on a zerocopy structure */ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success) { -- cgit v1.2.3 From 8c793822c5803e01d03f71c431f59316f0b278b7 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Wed, 6 Jan 2021 14:18:37 -0800 Subject: skbuff: rename sock_zerocopy_* to msg_zerocopy_* At Willem's suggestion, rename the sock_zerocopy_* functions so that they match the MSG_ZEROCOPY flag, which makes it clear they are specific to this zerocopy implementation. Signed-off-by: Jonathan Lemon Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9f7393167f0a..f6a3cee5c967 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -488,14 +488,14 @@ struct ubuf_info { int mm_account_pinned_pages(struct mmpin *mmp, size_t size); void mm_unaccount_pinned_pages(struct mmpin *mmp); -struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size); -struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, - struct ubuf_info *uarg); +struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size); +struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, + struct ubuf_info *uarg); -void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); +void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); -void sock_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, - bool success); +void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, + bool success); int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len); int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, @@ -1481,8 +1481,8 @@ static inline void skb_zcopy_put(struct ubuf_info *uarg) static inline void skb_zcopy_put_abort(struct ubuf_info *uarg, bool have_uref) { if (uarg) { - if (uarg->callback == sock_zerocopy_callback) - sock_zerocopy_put_abort(uarg, have_uref); + if (uarg->callback == msg_zerocopy_callback) + msg_zerocopy_put_abort(uarg, have_uref); else if (have_uref) skb_zcopy_put(uarg); } @@ -2776,7 +2776,7 @@ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask) if (likely(!skb_zcopy(skb))) return 0; if (!skb_zcopy_is_nouarg(skb) && - skb_uarg(skb)->callback == sock_zerocopy_callback) + skb_uarg(skb)->callback == msg_zerocopy_callback) return 0; return skb_copy_ubufs(skb, gfp_mask); } -- cgit v1.2.3 From 06b4feb37e64e543714c971a4162a75e2e4024d4 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Wed, 6 Jan 2021 14:18:38 -0800 Subject: net: group skb_shinfo zerocopy related bits together. In preparation for expanded zerocopy (TX and RX), move the zerocopy related bits out of tx_flags into their own flag word. Signed-off-by: Jonathan Lemon Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f6a3cee5c967..7b5adc511ac6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -430,28 +430,32 @@ enum { /* device driver is going to provide hardware time stamp */ SKBTX_IN_PROGRESS = 1 << 2, - /* device driver supports TX zero-copy buffers */ - SKBTX_DEV_ZEROCOPY = 1 << 3, - /* generate wifi status information (where possible) */ SKBTX_WIFI_STATUS = 1 << 4, - /* This indicates at least one fragment might be overwritten - * (as in vmsplice(), sendfile() ...) - * If we need to compute a TX checksum, we'll need to copy - * all frags to avoid possible bad checksum - */ - SKBTX_SHARED_FRAG = 1 << 5, - /* generate software time stamp when entering packet scheduling */ SKBTX_SCHED_TSTAMP = 1 << 6, }; -#define SKBTX_ZEROCOPY_FRAG (SKBTX_DEV_ZEROCOPY | SKBTX_SHARED_FRAG) #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ SKBTX_SCHED_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) +/* Definitions for flags in struct skb_shared_info */ +enum { + /* use zcopy routines */ + SKBFL_ZEROCOPY_ENABLE = BIT(0), + + /* This indicates at least one fragment might be overwritten + * (as in vmsplice(), sendfile() ...) + * If we need to compute a TX checksum, we'll need to copy + * all frags to avoid possible bad checksum + */ + SKBFL_SHARED_FRAG = BIT(1), +}; + +#define SKBFL_ZEROCOPY_FRAG (SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG) + /* * The callback notifies userspace to release buffers when skb DMA is done in * lower device, the skb last reference should be 0 when calling this. @@ -506,7 +510,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, * the end of the header data, ie. at skb->end. */ struct skb_shared_info { - __u8 __unused; + __u8 flags; __u8 meta_len; __u8 nr_frags; __u8 tx_flags; @@ -1433,7 +1437,7 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb) { - bool is_zcopy = skb && skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY; + bool is_zcopy = skb && skb_shinfo(skb)->flags & SKBFL_ZEROCOPY_ENABLE; return is_zcopy ? skb_uarg(skb) : NULL; } @@ -1452,14 +1456,14 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg, else skb_zcopy_get(uarg); skb_shinfo(skb)->destructor_arg = uarg; - skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG; + skb_shinfo(skb)->flags |= SKBFL_ZEROCOPY_FRAG; } } static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val) { skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL); - skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG; + skb_shinfo(skb)->flags |= SKBFL_ZEROCOPY_FRAG; } static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb) @@ -1497,7 +1501,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success) if (!skb_zcopy_is_nouarg(skb)) uarg->callback(skb, uarg, zerocopy_success); - skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; + skb_shinfo(skb)->flags &= ~SKBFL_ZEROCOPY_FRAG; } } @@ -3323,7 +3327,7 @@ static inline int skb_linearize(struct sk_buff *skb) static inline bool skb_has_shared_frag(const struct sk_buff *skb) { return skb_is_nonlinear(skb) && - skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; + skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG; } /** -- cgit v1.2.3 From 04c2d33eabdc76df730e1e356f6b2c656381d7d5 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Wed, 6 Jan 2021 14:18:39 -0800 Subject: skbuff: add flags to ubuf_info for ubuf setup Currently, when an ubuf is attached to a new skb, the shared flags word is initialized to a fixed value. Instead of doing this, set the default flags in the ubuf, and have new skbs inherit from this default. This is needed when setting up different zerocopy types. Signed-off-by: Jonathan Lemon Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7b5adc511ac6..a7bc71d2debb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -480,6 +480,7 @@ struct ubuf_info { }; }; refcount_t refcnt; + u8 flags; struct mmpin { struct user_struct *user; @@ -1456,7 +1457,7 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg, else skb_zcopy_get(uarg); skb_shinfo(skb)->destructor_arg = uarg; - skb_shinfo(skb)->flags |= SKBFL_ZEROCOPY_FRAG; + skb_shinfo(skb)->flags |= uarg->flags; } } -- cgit v1.2.3 From 9ee5e5ade033875191a2d2e470033e9cdde44a6a Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Wed, 6 Jan 2021 14:18:40 -0800 Subject: tap/tun: add skb_zcopy_init() helper for initialization. Replace direct assignments with skb_zcopy_init() for zerocopy cases where a new skb is initialized, without changing the reference counts. Signed-off-by: Jonathan Lemon Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a7bc71d2debb..68ad45a98810 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1448,6 +1448,12 @@ static inline void skb_zcopy_get(struct ubuf_info *uarg) refcount_inc(&uarg->refcnt); } +static inline void skb_zcopy_init(struct sk_buff *skb, struct ubuf_info *uarg) +{ + skb_shinfo(skb)->destructor_arg = uarg; + skb_shinfo(skb)->flags |= uarg->flags; +} + static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg, bool *have_ref) { @@ -1456,8 +1462,7 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg, *have_ref = false; else skb_zcopy_get(uarg); - skb_shinfo(skb)->destructor_arg = uarg; - skb_shinfo(skb)->flags |= uarg->flags; + skb_zcopy_init(skb, uarg); } } -- cgit v1.2.3 From 8e0449172497a915e79da66b222255dc9b4a5f31 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Wed, 6 Jan 2021 14:18:41 -0800 Subject: skbuff: Rename skb_zcopy_{get|put} to net_zcopy_{get|put} Unlike the rest of the skb_zcopy_ functions, these routines operate on a 'struct ubuf', not a skb. Remove the 'skb_' prefix from the naming to make things clearer. Suggested-by: Willem de Bruijn Signed-off-by: Jonathan Lemon Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 68ad45a98810..7a057b1f1eb8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1443,7 +1443,7 @@ static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb) return is_zcopy ? skb_uarg(skb) : NULL; } -static inline void skb_zcopy_get(struct ubuf_info *uarg) +static inline void net_zcopy_get(struct ubuf_info *uarg) { refcount_inc(&uarg->refcnt); } @@ -1461,7 +1461,7 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg, if (unlikely(have_ref && *have_ref)) *have_ref = false; else - skb_zcopy_get(uarg); + net_zcopy_get(uarg); skb_zcopy_init(skb, uarg); } } @@ -1482,19 +1482,19 @@ static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb) return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL); } -static inline void skb_zcopy_put(struct ubuf_info *uarg) +static inline void net_zcopy_put(struct ubuf_info *uarg) { if (uarg) uarg->callback(NULL, uarg, true); } -static inline void skb_zcopy_put_abort(struct ubuf_info *uarg, bool have_uref) +static inline void net_zcopy_put_abort(struct ubuf_info *uarg, bool have_uref) { if (uarg) { if (uarg->callback == msg_zerocopy_callback) msg_zerocopy_put_abort(uarg, have_uref); else if (have_uref) - skb_zcopy_put(uarg); + net_zcopy_put(uarg); } } -- cgit v1.2.3 From 994122211665301080cd232c06ae219b681dcb57 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 7 Jan 2021 17:34:01 -0600 Subject: remoteproc: qcom: expose types for COMPILE_TEST Stub functions are defined for SSR notifier registration in case QCOM_RPROC_COMMON is not configured. As a result, code that uses these functions can link successfully even if the common remoteproc code is not built. Code that registers an SSR notifier function likely needs the types defined in "qcom_rproc.h", but those are only exposed if QCOM_RPROC_COMMON is enabled. Rearrange the conditional definition so the qcom_ssr_notify_data structure and qcom_ssr_notify_type enumerated type are defined whether or not QCOM_RPROC_COMMON is enabled. Reviewed-by: Bjorn Andersson Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- include/linux/remoteproc/qcom_rproc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc/qcom_rproc.h b/include/linux/remoteproc/qcom_rproc.h index 647051662174..82b211518136 100644 --- a/include/linux/remoteproc/qcom_rproc.h +++ b/include/linux/remoteproc/qcom_rproc.h @@ -3,8 +3,6 @@ struct notifier_block; -#if IS_ENABLED(CONFIG_QCOM_RPROC_COMMON) - /** * enum qcom_ssr_notify_type - Startup/Shutdown events related to a remoteproc * processor. @@ -26,6 +24,8 @@ struct qcom_ssr_notify_data { bool crashed; }; +#if IS_ENABLED(CONFIG_QCOM_RPROC_COMMON) + void *qcom_register_ssr_notifier(const char *name, struct notifier_block *nb); int qcom_unregister_ssr_notifier(void *notify, struct notifier_block *nb); -- cgit v1.2.3 From ce2ceb9b1cff777c7d8beb368eddc3b8e45381f6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 7 Jan 2021 17:34:02 -0600 Subject: soc: qcom: mdt_loader: define stubs for COMPILE_TEST Define stub functions for the exposed MDT functions in case QCOM_MDT_LOADER is not configured. This allows users of these functions to link correctly for COMPILE_TEST builds without QCOM_SCM enabled. Reviewed-by: Bjorn Andersson Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- include/linux/soc/qcom/mdt_loader.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h index e600baec6825..afd47217996b 100644 --- a/include/linux/soc/qcom/mdt_loader.h +++ b/include/linux/soc/qcom/mdt_loader.h @@ -11,6 +11,8 @@ struct device; struct firmware; +#if IS_ENABLED(CONFIG_QCOM_MDT_LOADER) + ssize_t qcom_mdt_get_size(const struct firmware *fw); int qcom_mdt_load(struct device *dev, const struct firmware *fw, const char *fw_name, int pas_id, void *mem_region, @@ -23,4 +25,37 @@ int qcom_mdt_load_no_init(struct device *dev, const struct firmware *fw, phys_addr_t *reloc_base); void *qcom_mdt_read_metadata(const struct firmware *fw, size_t *data_len); +#else /* !IS_ENABLED(CONFIG_QCOM_MDT_LOADER) */ + +static inline ssize_t qcom_mdt_get_size(const struct firmware *fw) +{ + return -ENODEV; +} + +static inline int qcom_mdt_load(struct device *dev, const struct firmware *fw, + const char *fw_name, int pas_id, + void *mem_region, phys_addr_t mem_phys, + size_t mem_size, phys_addr_t *reloc_base) +{ + return -ENODEV; +} + +static inline int qcom_mdt_load_no_init(struct device *dev, + const struct firmware *fw, + const char *fw_name, int pas_id, + void *mem_region, phys_addr_t mem_phys, + size_t mem_size, + phys_addr_t *reloc_base) +{ + return -ENODEV; +} + +static inline void *qcom_mdt_read_metadata(const struct firmware *fw, + size_t *data_len) +{ + return ERR_PTR(-ENODEV); +} + +#endif /* !IS_ENABLED(CONFIG_QCOM_MDT_LOADER) */ + #endif -- cgit v1.2.3 From 1d11fa696733ffb9ac24771716b1b1b9953e5a48 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Jan 2021 03:39:03 -0800 Subject: net-gro: remove GRO_DROP GRO_DROP can only be returned from napi_gro_frags() if the skb has not been allocated by a prior napi_get_frags() Since drivers must use napi_get_frags() and test its result before populating the skb with metadata, we can safely remove GRO_DROP since it offers no practical use. Signed-off-by: Eric Dumazet Cc: Jesse Brandeburg Acked-by: Edward Cree Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1ec3ac5d5bbf..5b949076ed23 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -376,7 +376,6 @@ enum gro_result { GRO_MERGED_FREE, GRO_HELD, GRO_NORMAL, - GRO_DROP, GRO_CONSUMED, }; typedef enum gro_result gro_result_t; -- cgit v1.2.3 From a643bff752dcf72a07e1b2ab2f8587e4f51118be Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 11 Jan 2021 23:55:14 -0800 Subject: bpf: Add bpf_patch_call_args prototype to include/linux/bpf.h Add bpf_patch_call_args() prototype. This function is called from BPF verifier and only if CONFIG_BPF_JIT_ALWAYS_ON is not defined. This fixes compiler warning about missing prototype in some kernel configurations. Fixes: 1ea47e01ad6e ("bpf: add support for bpf_call to interpreter") Reported-by: kernel test robot Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210112075520.4103414-2-andrii@kernel.org --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 07cb5d15e743..ef9309604b3e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1403,7 +1403,10 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, union bpf_attr __user *uattr); + +#ifndef CONFIG_BPF_JIT_ALWAYS_ON void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); +#endif struct btf *bpf_get_btf_vmlinux(void); -- cgit v1.2.3 From 6943c2b05bf09fd5c5729f7d7d803bf3f126cb9a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 11 Jan 2021 23:55:15 -0800 Subject: bpf: Avoid warning when re-casting __bpf_call_base into __bpf_call_base_args BPF interpreter uses extra input argument, so re-casts __bpf_call_base into __bpf_call_base_args. Avoid compiler warning about incompatible function prototypes by casting to void * first. Fixes: 1ea47e01ad6e ("bpf: add support for bpf_call to interpreter") Reported-by: kernel test robot Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210112075520.4103414-3-andrii@kernel.org --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 29c27656165b..5edf2b660881 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -886,7 +886,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); #define __bpf_call_base_args \ ((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \ - __bpf_call_base) + (void *)__bpf_call_base) struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); -- cgit v1.2.3 From 936f8946bdb48239f4292812d4d2e26c6d328c95 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 11 Jan 2021 23:55:16 -0800 Subject: bpf: Declare __bpf_free_used_maps() unconditionally __bpf_free_used_maps() is always defined in kernel/bpf/core.c, while include/linux/bpf.h is guarding it behind CONFIG_BPF_SYSCALL. Move it out of that guard region and fix compiler warning. Fixes: a2ea07465c8d ("bpf: Fix missing prog untrack in release_maps") Reported-by: kernel test robot Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210112075520.4103414-4-andrii@kernel.org --- include/linux/bpf.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ef9309604b3e..6e585dbc10df 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1206,8 +1206,6 @@ void bpf_prog_sub(struct bpf_prog *prog, int i); void bpf_prog_inc(struct bpf_prog *prog); struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); -void __bpf_free_used_maps(struct bpf_prog_aux *aux, - struct bpf_map **used_maps, u32 len); void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); @@ -1676,6 +1674,9 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, return bpf_prog_get_type_dev(ufd, type, false); } +void __bpf_free_used_maps(struct bpf_prog_aux *aux, + struct bpf_map **used_maps, u32 len); + bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); int bpf_prog_offload_compile(struct bpf_prog *prog); -- cgit v1.2.3 From 541c3bad8dc51b253ba8686d0cd7628e6b9b5f4c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 11 Jan 2021 23:55:18 -0800 Subject: bpf: Support BPF ksym variables in kernel modules Add support for directly accessing kernel module variables from BPF programs using special ldimm64 instructions. This functionality builds upon vmlinux ksym support, but extends ldimm64 with src_reg=BPF_PSEUDO_BTF_ID to allow specifying kernel module BTF's FD in insn[1].imm field. During BPF program load time, verifier will resolve FD to BTF object and will take reference on BTF object itself and, for module BTFs, corresponding module as well, to make sure it won't be unloaded from under running BPF program. The mechanism used is similar to how bpf_prog keeps track of used bpf_maps. One interesting change is also in how per-CPU variable is determined. The logic is to find .data..percpu data section in provided BTF, but both vmlinux and module each have their own .data..percpu entries in BTF. So for module's case, the search for DATASEC record needs to look at only module's added BTF types. This is implemented with custom search function. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: Hao Luo Link: https://lore.kernel.org/bpf/20210112075520.4103414-6-andrii@kernel.org --- include/linux/bpf.h | 10 ++++++++++ include/linux/bpf_verifier.h | 3 +++ include/linux/btf.h | 3 +++ 3 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6e585dbc10df..1aac2af12fed 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -761,9 +761,15 @@ struct bpf_ctx_arg_aux { u32 btf_id; }; +struct btf_mod_pair { + struct btf *btf; + struct module *module; +}; + struct bpf_prog_aux { atomic64_t refcnt; u32 used_map_cnt; + u32 used_btf_cnt; u32 max_ctx_offset; u32 max_pkt_offset; u32 max_tp_access; @@ -802,6 +808,7 @@ struct bpf_prog_aux { const struct bpf_prog_ops *ops; struct bpf_map **used_maps; struct mutex used_maps_mutex; /* mutex for used_maps and used_map_cnt */ + struct btf_mod_pair *used_btfs; struct bpf_prog *prog; struct user_struct *user; u64 load_time; /* ns since boottime */ @@ -1668,6 +1675,9 @@ bpf_base_func_proto(enum bpf_func_id func_id) } #endif /* CONFIG_BPF_SYSCALL */ +void __bpf_free_used_btfs(struct bpf_prog_aux *aux, + struct btf_mod_pair *used_btfs, u32 len); + static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e941fe1484e5..dfe6f85d97dd 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -340,6 +340,7 @@ struct bpf_insn_aux_data { }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ +#define MAX_USED_BTFS 64 /* max number of BTFs accessed by one BPF program */ #define BPF_VERIFIER_TMP_LOG_SIZE 1024 @@ -398,7 +399,9 @@ struct bpf_verifier_env { struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ struct bpf_verifier_state_list *free_list; struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ + struct btf_mod_pair used_btfs[MAX_USED_BTFS]; /* array of BTF's used by BPF program */ u32 used_map_cnt; /* number of used maps */ + u32 used_btf_cnt; /* number of used BTF objects */ u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; bool allow_ptr_to_map_access; diff --git a/include/linux/btf.h b/include/linux/btf.h index 4c200f5d242b..7fabf1428093 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -91,6 +91,9 @@ int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj, int btf_get_fd_by_id(u32 id); u32 btf_obj_id(const struct btf *btf); bool btf_is_kernel(const struct btf *btf); +bool btf_is_module(const struct btf *btf); +struct module *btf_try_get_module(const struct btf *btf); +u32 btf_nr_types(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); -- cgit v1.2.3 From 5a9d5ecd69ed65fc2d49cdecfc1c1ab1e4d7af34 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:18 +0100 Subject: can: dev: move bittiming related code into seperate file This patch moves the bittiming related code of the CAN device infrastructure into a separate file. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-4-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/bittiming.h | 44 +++++++++++++++++++++++++++++++++++++++++++ include/linux/can/dev.h | 16 +--------------- 2 files changed, 45 insertions(+), 15 deletions(-) create mode 100644 include/linux/can/bittiming.h (limited to 'include/linux') diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h new file mode 100644 index 000000000000..707575c668f4 --- /dev/null +++ b/include/linux/can/bittiming.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2020 Pengutronix, Marc Kleine-Budde + */ + +#ifndef _CAN_BITTIMING_H +#define _CAN_BITTIMING_H + +#include +#include + +#define CAN_SYNC_SEG 1 + +#ifdef CONFIG_CAN_CALC_BITTIMING +int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, + const struct can_bittiming_const *btc); +#else /* !CONFIG_CAN_CALC_BITTIMING */ +static inline int +can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, + const struct can_bittiming_const *btc) +{ + netdev_err(dev, "bit-timing calculation not available\n"); + return -EINVAL; +} +#endif /* CONFIG_CAN_CALC_BITTIMING */ + +int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt, + const struct can_bittiming_const *btc, + const u32 *bitrate_const, + const unsigned int bitrate_const_cnt); + +/* + * can_bit_time() - Duration of one bit + * + * Please refer to ISO 11898-1:2015, section 11.3.1.1 "Bit time" for + * additional information. + * + * Return: the number of time quanta in one bit. + */ +static inline unsigned int can_bit_time(const struct can_bittiming *bt) +{ + return CAN_SYNC_SEG + bt->prop_seg + bt->phase_seg1 + bt->phase_seg2; +} + +#endif /* !_CAN_BITTIMING_H */ diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 197a79535cc2..054c3bed190b 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -15,6 +15,7 @@ #define _CAN_DEV_H #include +#include #include #include #include @@ -82,21 +83,6 @@ struct can_priv { #endif }; -#define CAN_SYNC_SEG 1 - -/* - * can_bit_time() - Duration of one bit - * - * Please refer to ISO 11898-1:2015, section 11.3.1.1 "Bit time" for - * additional information. - * - * Return: the number of time quanta in one bit. - */ -static inline unsigned int can_bit_time(const struct can_bittiming *bt) -{ - return CAN_SYNC_SEG + bt->prop_seg + bt->phase_seg1 + bt->phase_seg2; -} - /* * can_cc_dlc2len(value) - convert a given data length code (dlc) of a * Classical CAN frame into a valid data length of max. 8 bytes. -- cgit v1.2.3 From bdd2e413192dd5f2153d166cd907b048cce872e8 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:19 +0100 Subject: can: dev: move length related code into seperate file This patch moves all CAN frame length related code of the CAN device infrastructure into a separate file. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-5-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 41 +-------------------------------------- include/linux/can/length.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 40 deletions(-) create mode 100644 include/linux/can/length.h (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 054c3bed190b..d75fba1d030a 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -83,15 +84,6 @@ struct can_priv { #endif }; -/* - * can_cc_dlc2len(value) - convert a given data length code (dlc) of a - * Classical CAN frame into a valid data length of max. 8 bytes. - * - * To be used in the CAN netdriver receive path to ensure conformance with - * ISO 11898-1 Chapter 8.4.2.3 (DLC field) - */ -#define can_cc_dlc2len(dlc) (min_t(u8, (dlc), CAN_MAX_DLEN)) - /* Check for outgoing skbs that have not been created by the CAN subsystem */ static inline bool can_skb_headroom_valid(struct net_device *dev, struct sk_buff *skb) @@ -156,31 +148,6 @@ static inline bool can_is_canfd_skb(const struct sk_buff *skb) return skb->len == CANFD_MTU; } -/* helper to get the data length code (DLC) for Classical CAN raw DLC access */ -static inline u8 can_get_cc_dlc(const struct can_frame *cf, const u32 ctrlmode) -{ - /* return len8_dlc as dlc value only if all conditions apply */ - if ((ctrlmode & CAN_CTRLMODE_CC_LEN8_DLC) && - (cf->len == CAN_MAX_DLEN) && - (cf->len8_dlc > CAN_MAX_DLEN && cf->len8_dlc <= CAN_MAX_RAW_DLC)) - return cf->len8_dlc; - - /* return the payload length as dlc value */ - return cf->len; -} - -/* helper to set len and len8_dlc value for Classical CAN raw DLC access */ -static inline void can_frame_set_cc_len(struct can_frame *cf, const u8 dlc, - const u32 ctrlmode) -{ - /* the caller already ensured that dlc is a value from 0 .. 15 */ - if (ctrlmode & CAN_CTRLMODE_CC_LEN8_DLC && dlc > CAN_MAX_DLEN) - cf->len8_dlc = dlc; - - /* limit the payload length 'len' to CAN_MAX_DLEN */ - cf->len = can_cc_dlc2len(dlc); -} - /* helper to define static CAN controller features at device creation time */ static inline void can_set_static_ctrlmode(struct net_device *dev, u32 static_mode) @@ -196,12 +163,6 @@ static inline void can_set_static_ctrlmode(struct net_device *dev, dev->mtu = CANFD_MTU; } -/* get data length from raw data length code (DLC) */ -u8 can_fd_dlc2len(u8 dlc); - -/* map the sanitized data length to an appropriate data length code */ -u8 can_fd_len2dlc(u8 len); - struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, unsigned int txqs, unsigned int rxqs); #define alloc_candev(sizeof_priv, echo_skb_max) \ diff --git a/include/linux/can/length.h b/include/linux/can/length.h new file mode 100644 index 000000000000..156b9d17969a --- /dev/null +++ b/include/linux/can/length.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 Oliver Hartkopp + */ + +#ifndef _CAN_LENGTH_H +#define _CAN_LENGTH_H + +/* + * can_cc_dlc2len(value) - convert a given data length code (dlc) of a + * Classical CAN frame into a valid data length of max. 8 bytes. + * + * To be used in the CAN netdriver receive path to ensure conformance with + * ISO 11898-1 Chapter 8.4.2.3 (DLC field) + */ +#define can_cc_dlc2len(dlc) (min_t(u8, (dlc), CAN_MAX_DLEN)) + +/* helper to get the data length code (DLC) for Classical CAN raw DLC access */ +static inline u8 can_get_cc_dlc(const struct can_frame *cf, const u32 ctrlmode) +{ + /* return len8_dlc as dlc value only if all conditions apply */ + if ((ctrlmode & CAN_CTRLMODE_CC_LEN8_DLC) && + (cf->len == CAN_MAX_DLEN) && + (cf->len8_dlc > CAN_MAX_DLEN && cf->len8_dlc <= CAN_MAX_RAW_DLC)) + return cf->len8_dlc; + + /* return the payload length as dlc value */ + return cf->len; +} + +/* helper to set len and len8_dlc value for Classical CAN raw DLC access */ +static inline void can_frame_set_cc_len(struct can_frame *cf, const u8 dlc, + const u32 ctrlmode) +{ + /* the caller already ensured that dlc is a value from 0 .. 15 */ + if (ctrlmode & CAN_CTRLMODE_CC_LEN8_DLC && dlc > CAN_MAX_DLEN) + cf->len8_dlc = dlc; + + /* limit the payload length 'len' to CAN_MAX_DLEN */ + cf->len = can_cc_dlc2len(dlc); +} + +/* get data length from raw data length code (DLC) */ +u8 can_fd_dlc2len(u8 dlc); + +/* map the sanitized data length to an appropriate data length code */ +u8 can_fd_len2dlc(u8 len); + +#endif /* !_CAN_LENGTH_H */ -- cgit v1.2.3 From 18f2dbfd2232212f53af9d249682f13a8335d54f Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:20 +0100 Subject: can: dev: move skb related into seperate file This patch moves the skb related code of the CAN device infrastructure into a separate file. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-6-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 76 ------------------------------------------------ include/linux/can/skb.h | 77 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 76 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index d75fba1d030a..4a26e128af7f 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -84,69 +84,6 @@ struct can_priv { #endif }; -/* Check for outgoing skbs that have not been created by the CAN subsystem */ -static inline bool can_skb_headroom_valid(struct net_device *dev, - struct sk_buff *skb) -{ - /* af_packet creates a headroom of HH_DATA_MOD bytes which is fine */ - if (WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct can_skb_priv))) - return false; - - /* af_packet does not apply CAN skb specific settings */ - if (skb->ip_summed == CHECKSUM_NONE) { - /* init headroom */ - can_skb_prv(skb)->ifindex = dev->ifindex; - can_skb_prv(skb)->skbcnt = 0; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - - /* perform proper loopback on capable devices */ - if (dev->flags & IFF_ECHO) - skb->pkt_type = PACKET_LOOPBACK; - else - skb->pkt_type = PACKET_HOST; - - skb_reset_mac_header(skb); - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - } - - return true; -} - -/* Drop a given socketbuffer if it does not contain a valid CAN frame. */ -static inline bool can_dropped_invalid_skb(struct net_device *dev, - struct sk_buff *skb) -{ - const struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - - if (skb->protocol == htons(ETH_P_CAN)) { - if (unlikely(skb->len != CAN_MTU || - cfd->len > CAN_MAX_DLEN)) - goto inval_skb; - } else if (skb->protocol == htons(ETH_P_CANFD)) { - if (unlikely(skb->len != CANFD_MTU || - cfd->len > CANFD_MAX_DLEN)) - goto inval_skb; - } else - goto inval_skb; - - if (!can_skb_headroom_valid(dev, skb)) - goto inval_skb; - - return false; - -inval_skb: - kfree_skb(skb); - dev->stats.tx_dropped++; - return true; -} - -static inline bool can_is_canfd_skb(const struct sk_buff *skb) -{ - /* the CAN specific type of skb is identified by its data length */ - return skb->len == CANFD_MTU; -} /* helper to define static CAN controller features at device creation time */ static inline void can_set_static_ctrlmode(struct net_device *dev, @@ -187,23 +124,10 @@ void can_bus_off(struct net_device *dev); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state); -int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, - unsigned int idx); -struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, - u8 *len_ptr); -unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); -void can_free_echo_skb(struct net_device *dev, unsigned int idx); - #ifdef CONFIG_OF void of_can_transceiver(struct net_device *dev); #else static inline void of_can_transceiver(struct net_device *dev) { } #endif -struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); -struct sk_buff *alloc_canfd_skb(struct net_device *dev, - struct canfd_frame **cfd); -struct sk_buff *alloc_can_err_skb(struct net_device *dev, - struct can_frame **cf); - #endif /* !_CAN_DEV_H */ diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index fc61cf4eff1c..c90ebbd3008c 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -16,6 +16,19 @@ #include #include +void can_flush_echo_skb(struct net_device *dev); +int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, + unsigned int idx); +struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, + u8 *len_ptr); +unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); +void can_free_echo_skb(struct net_device *dev, unsigned int idx); +struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); +struct sk_buff *alloc_canfd_skb(struct net_device *dev, + struct canfd_frame **cfd); +struct sk_buff *alloc_can_err_skb(struct net_device *dev, + struct can_frame **cf); + /* * The struct can_skb_priv is used to transport additional information along * with the stored struct can(fd)_frame that can not be contained in existing @@ -74,4 +87,68 @@ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) return nskb; } +/* Check for outgoing skbs that have not been created by the CAN subsystem */ +static inline bool can_skb_headroom_valid(struct net_device *dev, + struct sk_buff *skb) +{ + /* af_packet creates a headroom of HH_DATA_MOD bytes which is fine */ + if (WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct can_skb_priv))) + return false; + + /* af_packet does not apply CAN skb specific settings */ + if (skb->ip_summed == CHECKSUM_NONE) { + /* init headroom */ + can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* perform proper loopback on capable devices */ + if (dev->flags & IFF_ECHO) + skb->pkt_type = PACKET_LOOPBACK; + else + skb->pkt_type = PACKET_HOST; + + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + } + + return true; +} + +/* Drop a given socketbuffer if it does not contain a valid CAN frame. */ +static inline bool can_dropped_invalid_skb(struct net_device *dev, + struct sk_buff *skb) +{ + const struct canfd_frame *cfd = (struct canfd_frame *)skb->data; + + if (skb->protocol == htons(ETH_P_CAN)) { + if (unlikely(skb->len != CAN_MTU || + cfd->len > CAN_MAX_DLEN)) + goto inval_skb; + } else if (skb->protocol == htons(ETH_P_CANFD)) { + if (unlikely(skb->len != CANFD_MTU || + cfd->len > CANFD_MAX_DLEN)) + goto inval_skb; + } else + goto inval_skb; + + if (!can_skb_headroom_valid(dev, skb)) + goto inval_skb; + + return false; + +inval_skb: + kfree_skb(skb); + dev->stats.tx_dropped++; + return true; +} + +static inline bool can_is_canfd_skb(const struct sk_buff *skb) +{ + /* the CAN specific type of skb is identified by its data length */ + return skb->len == CANFD_MTU; +} + #endif /* !_CAN_SKB_H */ -- cgit v1.2.3 From 0a042c6ec991e4d33062ee52e9e23f615bc659f9 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:21 +0100 Subject: can: dev: move netlink related code into seperate file This patch moves the netlink related code of the CAN device infrastructure into a separate file. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-7-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 4a26e128af7f..7faf6a37d5b2 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -100,6 +100,8 @@ static inline void can_set_static_ctrlmode(struct net_device *dev, dev->mtu = CANFD_MTU; } +void can_setup(struct net_device *dev); + struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, unsigned int txqs, unsigned int rxqs); #define alloc_candev(sizeof_priv, echo_skb_max) \ @@ -130,4 +132,8 @@ void of_can_transceiver(struct net_device *dev); static inline void of_can_transceiver(struct net_device *dev) { } #endif +extern struct rtnl_link_ops can_link_ops; +int can_netlink_register(void); +void can_netlink_unregister(void); + #endif /* !_CAN_DEV_H */ -- cgit v1.2.3 From 838b00a2260ab6ec104a2a5696e619c19e8cd9be Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Mon, 11 Jan 2021 23:05:24 -0800 Subject: net/mlx5: Add HW definition of reg_c_preserve Add capability bit to test whether reg_c value is preserved on recirculation. Signed-off-by: Paul Blakey Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 442c0160caab..823411e288c0 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1278,7 +1278,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_a0[0x3]; u8 ece_support[0x1]; - u8 reserved_at_a4[0x7]; + u8 reserved_at_a4[0x5]; + u8 reg_c_preserve[0x1]; + u8 reserved_at_aa[0x1]; u8 log_max_srq[0x5]; u8 reserved_at_b0[0x1]; u8 uplink_follow[0x1]; -- cgit v1.2.3 From 99b7beb0431a4b9728023e9169ed15af678d2c7f Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:24 +0100 Subject: can: length: canfd_sanitize_len(): add function to sanitize CAN-FD data length The data field in CAN-FD frames have specifig frame length (0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 16, 20, 24, 32, 48, 64). This function "rounds" up a given length to the next valid CAN-FD frame length. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-10-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/length.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/length.h b/include/linux/can/length.h index 156b9d17969a..b2313b2a0b02 100644 --- a/include/linux/can/length.h +++ b/include/linux/can/length.h @@ -45,4 +45,10 @@ u8 can_fd_dlc2len(u8 dlc); /* map the sanitized data length to an appropriate data length code */ u8 can_fd_len2dlc(u8 len); +/* map the data length to an appropriate data link layer length */ +static inline u8 canfd_sanitize_len(u8 len) +{ + return can_fd_dlc2len(can_fd_len2dlc(len)); +} + #endif /* !_CAN_LENGTH_H */ -- cgit v1.2.3 From 85d99c3e2a13d542445342a1383a686dadeaac3d Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Mon, 11 Jan 2021 15:19:25 +0100 Subject: can: length: can_skb_get_frame_len(): introduce function to get data length of frame in data link layer This patch adds the function can_skb_get_frame_len() which returns the length of a CAN frame on the data link layer, including Start-of-frame, Identifier, various other bits, the actual data, the CRC, the End-of-frame, the Inter frame spacing. Co-developed-by: Arunachalam Santhanam Signed-off-by: Arunachalam Santhanam Co-developed-by: Vincent Mailhol Signed-off-by: Vincent Mailhol Acked-by: Vincent Mailhol Reviewed-by: Vincent Mailhol Co-developed-by: Marc Kleine-Budde Link: https://lore.kernel.org/r/20210111141930.693847-11-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/length.h | 120 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/length.h b/include/linux/can/length.h index b2313b2a0b02..6995092b774e 100644 --- a/include/linux/can/length.h +++ b/include/linux/can/length.h @@ -1,10 +1,127 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2020 Oliver Hartkopp + * Copyright (C) 2020 Marc Kleine-Budde */ #ifndef _CAN_LENGTH_H #define _CAN_LENGTH_H +/* + * Size of a Classical CAN Standard Frame + * + * Name of Field Bits + * --------------------------------------------------------- + * Start-of-frame 1 + * Identifier 11 + * Remote transmission request (RTR) 1 + * Identifier extension bit (IDE) 1 + * Reserved bit (r0) 1 + * Data length code (DLC) 4 + * Data field 0...64 + * CRC 15 + * CRC delimiter 1 + * ACK slot 1 + * ACK delimiter 1 + * End-of-frame (EOF) 7 + * Inter frame spacing 3 + * + * rounded up and ignoring bitstuffing + */ +#define CAN_FRAME_OVERHEAD_SFF DIV_ROUND_UP(47, 8) + +/* + * Size of a Classical CAN Extended Frame + * + * Name of Field Bits + * --------------------------------------------------------- + * Start-of-frame 1 + * Identifier A 11 + * Substitute remote request (SRR) 1 + * Identifier extension bit (IDE) 1 + * Identifier B 18 + * Remote transmission request (RTR) 1 + * Reserved bits (r1, r0) 2 + * Data length code (DLC) 4 + * Data field 0...64 + * CRC 15 + * CRC delimiter 1 + * ACK slot 1 + * ACK delimiter 1 + * End-of-frame (EOF) 7 + * Inter frame spacing 3 + * + * rounded up and ignoring bitstuffing + */ +#define CAN_FRAME_OVERHEAD_EFF DIV_ROUND_UP(67, 8) + +/* + * Size of a CAN-FD Standard Frame + * + * Name of Field Bits + * --------------------------------------------------------- + * Start-of-frame 1 + * Identifier 11 + * Reserved bit (r1) 1 + * Identifier extension bit (IDE) 1 + * Flexible data rate format (FDF) 1 + * Reserved bit (r0) 1 + * Bit Rate Switch (BRS) 1 + * Error Status Indicator (ESI) 1 + * Data length code (DLC) 4 + * Data field 0...512 + * Stuff Bit Count (SBC) 0...16: 4 20...64:5 + * CRC 0...16: 17 20...64:21 + * CRC delimiter (CD) 1 + * ACK slot (AS) 1 + * ACK delimiter (AD) 1 + * End-of-frame (EOF) 7 + * Inter frame spacing 3 + * + * assuming CRC21, rounded up and ignoring bitstuffing + */ +#define CANFD_FRAME_OVERHEAD_SFF DIV_ROUND_UP(61, 8) + +/* + * Size of a CAN-FD Extended Frame + * + * Name of Field Bits + * --------------------------------------------------------- + * Start-of-frame 1 + * Identifier A 11 + * Substitute remote request (SRR) 1 + * Identifier extension bit (IDE) 1 + * Identifier B 18 + * Reserved bit (r1) 1 + * Flexible data rate format (FDF) 1 + * Reserved bit (r0) 1 + * Bit Rate Switch (BRS) 1 + * Error Status Indicator (ESI) 1 + * Data length code (DLC) 4 + * Data field 0...512 + * Stuff Bit Count (SBC) 0...16: 4 20...64:5 + * CRC 0...16: 17 20...64:21 + * CRC delimiter (CD) 1 + * ACK slot (AS) 1 + * ACK delimiter (AD) 1 + * End-of-frame (EOF) 7 + * Inter frame spacing 3 + * + * assuming CRC21, rounded up and ignoring bitstuffing + */ +#define CANFD_FRAME_OVERHEAD_EFF DIV_ROUND_UP(80, 8) + +/* + * Maximum size of a Classical CAN frame + * (rounded up and ignoring bitstuffing) + */ +#define CAN_FRAME_LEN_MAX (CAN_FRAME_OVERHEAD_EFF + CAN_MAX_DLEN) + +/* + * Maximum size of a CAN-FD frame + * (rounded up and ignoring bitstuffing) + */ +#define CANFD_FRAME_LEN_MAX (CANFD_FRAME_OVERHEAD_EFF + CANFD_MAX_DLEN) + /* * can_cc_dlc2len(value) - convert a given data length code (dlc) of a * Classical CAN frame into a valid data length of max. 8 bytes. @@ -45,6 +162,9 @@ u8 can_fd_dlc2len(u8 dlc); /* map the sanitized data length to an appropriate data length code */ u8 can_fd_len2dlc(u8 len); +/* calculate the CAN Frame length in bytes of a given skb */ +unsigned int can_skb_get_frame_len(const struct sk_buff *skb); + /* map the data length to an appropriate data link layer length */ static inline u8 canfd_sanitize_len(u8 len) { -- cgit v1.2.3 From f0ef72febc9a6a569d92cdf6c7996015dfa8e8bb Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:26 +0100 Subject: can: dev: extend struct can_skb_priv to hold CAN frame length In order to implement byte queue limits (bql) in CAN drivers, the length of the CAN frame needs to be passed into the networking stack after queueing and after transmission completion. To avoid to calculate this length twice, extend the struct can_skb_priv to hold the length of the CAN frame and extend __can_get_echo_skb() to return that value. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-12-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index c90ebbd3008c..5db9da30843c 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -20,7 +20,7 @@ void can_flush_echo_skb(struct net_device *dev); int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, - u8 *len_ptr); + u8 *len_ptr, unsigned int *frame_len_ptr); unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); void can_free_echo_skb(struct net_device *dev, unsigned int idx); struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); @@ -42,11 +42,13 @@ struct sk_buff *alloc_can_err_skb(struct net_device *dev, * struct can_skb_priv - private additional data inside CAN sk_buffs * @ifindex: ifindex of the first interface the CAN frame appeared on * @skbcnt: atomic counter to have an unique id together with skb pointer + * @frame_len: length of CAN frame in data link layer * @cf: align to the following CAN frame at skb->data */ struct can_skb_priv { int ifindex; int skbcnt; + unsigned int frame_len; struct can_frame cf[]; }; -- cgit v1.2.3 From 1dcb6e57db833419483d0df2d956b1cc2a802683 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Mon, 11 Jan 2021 15:19:27 +0100 Subject: can: dev: can_put_echo_skb(): extend to handle frame_len Add a frame_len argument to can_put_echo_skb() which is used to save length of the CAN frame into field frame_len of struct can_skb_priv so that it can be later used after transmission completion. Convert all users of this function, too. Drivers which implement BQL call can_put_echo_skb() with the output of can_skb_get_frame_len(skb) and drivers which do not simply pass zero as an input (in the same way that NULL would be given to can_get_echo_skb()). This way, we have a nice symmetry between the two echo functions. Link: https://lore.kernel.org/r/20210111061335.39983-1-mailhol.vincent@wanadoo.fr Signed-off-by: Marc Kleine-Budde Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-13-mkl@pengutronix.de Signed-off-by: Vincent Mailhol --- include/linux/can/skb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index 5db9da30843c..eaac4a637ae0 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -18,7 +18,7 @@ void can_flush_echo_skb(struct net_device *dev); int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, - unsigned int idx); + unsigned int idx, unsigned int frame_len); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr, unsigned int *frame_len_ptr); unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); -- cgit v1.2.3 From 9420e1d495e2a3b5f673148b7e3ebc861b1441f7 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:28 +0100 Subject: can: dev: can_get_echo_skb(): extend to return can frame length In order to implement byte queue limits (bql) in CAN drivers, the length of the CAN frame needs to be passed into the networking stack after queueing and after transmission completion. To avoid to calculate this length twice, extend can_get_echo_skb() to return that value. Convert all users of this function, too. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-14-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index eaac4a637ae0..685f34cfba20 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -21,7 +21,8 @@ int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx, unsigned int frame_len); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr, unsigned int *frame_len_ptr); -unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); +unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx, + unsigned int *frame_len_ptr); void can_free_echo_skb(struct net_device *dev, unsigned int idx); struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); struct sk_buff *alloc_canfd_skb(struct net_device *dev, -- cgit v1.2.3 From 99842c9685ab00fa8689e8bd12bde62b706b6198 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:29 +0100 Subject: can: dev: can_rx_offload_get_echo_skb(): extend to return can frame length In order to implement byte queue limits (bql) in CAN drivers, the length of the CAN frame needs to be passed into the networking stack after queueing and after transmission completion. To avoid to calculate this length twice, extend can_rx_offload_get_echo_skb() to return that value. Convert all users of this function, too. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-15-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/rx-offload.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index f1b38088b765..40882df7105e 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -44,7 +44,8 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload); int can_rx_offload_queue_sorted(struct can_rx_offload *offload, struct sk_buff *skb, u32 timestamp); unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, - unsigned int idx, u32 timestamp); + unsigned int idx, u32 timestamp, + unsigned int *frame_len_ptr); int can_rx_offload_queue_tail(struct can_rx_offload *offload, struct sk_buff *skb); void can_rx_offload_del(struct can_rx_offload *offload); -- cgit v1.2.3 From b1ae3587d16a8c8fc9453e147c8708d6f006ffbb Mon Sep 17 00:00:00 2001 From: Bjarni Jonasson Date: Wed, 13 Jan 2021 12:56:25 +0100 Subject: net: phy: Add 100 base-x mode Sparx-5 supports this mode and it is missing in the PHY core. Signed-off-by: Bjarni Jonasson Reviewed-by: Russell King Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 9effb511acde..24fcc6456a9e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -104,6 +104,7 @@ extern const int phy_10gbit_features_array[1]; * @PHY_INTERFACE_MODE_MOCA: Multimedia over Coax * @PHY_INTERFACE_MODE_QSGMII: Quad SGMII * @PHY_INTERFACE_MODE_TRGMII: Turbo RGMII + * @PHY_INTERFACE_MODE_100BASEX: 100 BaseX * @PHY_INTERFACE_MODE_1000BASEX: 1000 BaseX * @PHY_INTERFACE_MODE_2500BASEX: 2500 BaseX * @PHY_INTERFACE_MODE_RXAUI: Reduced XAUI @@ -135,6 +136,7 @@ typedef enum { PHY_INTERFACE_MODE_MOCA, PHY_INTERFACE_MODE_QSGMII, PHY_INTERFACE_MODE_TRGMII, + PHY_INTERFACE_MODE_100BASEX, PHY_INTERFACE_MODE_1000BASEX, PHY_INTERFACE_MODE_2500BASEX, PHY_INTERFACE_MODE_RXAUI, @@ -217,6 +219,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "usxgmii"; case PHY_INTERFACE_MODE_10GKR: return "10gbase-kr"; + case PHY_INTERFACE_MODE_100BASEX: + return "100base-x"; default: return "unknown"; } -- cgit v1.2.3 From 91c960b0056672e74627776655c926388350fa30 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 14 Jan 2021 18:17:44 +0000 Subject: bpf: Rename BPF_XADD and prepare to encode other atomics in .imm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A subsequent patch will add additional atomic operations. These new operations will use the same opcode field as the existing XADD, with the immediate discriminating different operations. In preparation, rename the instruction mode BPF_ATOMIC and start calling the zero immediate BPF_ADD. This is possible (doesn't break existing valid BPF progs) because the immediate field is currently reserved MBZ and BPF_ADD is zero. All uses are removed from the tree but the BPF_XADD definition is kept around to avoid breaking builds for people including kernel headers. Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20210114181751.768687-5-jackmanb@google.com --- include/linux/filter.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 5edf2b660881..392e94b79668 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -259,15 +259,23 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .off = OFF, \ .imm = 0 }) -/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ -#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ +/* + * Atomic operations: + * + * BPF_ADD *(uint *) (dst_reg + off16) += src_reg + */ + +#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ ((struct bpf_insn) { \ - .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ - .imm = 0 }) + .imm = OP }) + +/* Legacy alias */ +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) BPF_ATOMIC_OP(SIZE, BPF_ADD, DST, SRC, OFF) /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ -- cgit v1.2.3 From 5ca419f2864a2c60940dcf4bbaeb69546200e36f Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 14 Jan 2021 18:17:46 +0000 Subject: bpf: Add BPF_FETCH field / create atomic_fetch_add instruction The BPF_FETCH field can be set in bpf_insn.imm, for BPF_ATOMIC instructions, in order to have the previous value of the atomically-modified memory location loaded into the src register after an atomic op is carried out. Suggested-by: Yonghong Song Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210114181751.768687-7-jackmanb@google.com --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 392e94b79668..23fca41b8540 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -264,6 +264,7 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) * Atomic operations: * * BPF_ADD *(uint *) (dst_reg + off16) += src_reg + * BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg); */ #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ -- cgit v1.2.3 From 5ffa25502b5ab3d639829a2d1e316cff7f59a41e Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 14 Jan 2021 18:17:47 +0000 Subject: bpf: Add instructions for atomic_[cmp]xchg This adds two atomic opcodes, both of which include the BPF_FETCH flag. XCHG without the BPF_FETCH flag would naturally encode atomic_set. This is not supported because it would be of limited value to userspace (it doesn't imply any barriers). CMPXCHG without BPF_FETCH woulud be an atomic compare-and-write. We don't have such an operation in the kernel so it isn't provided to BPF either. There are two significant design decisions made for the CMPXCHG instruction: - To solve the issue that this operation fundamentally has 3 operands, but we only have two register fields. Therefore the operand we compare against (the kernel's API calls it 'old') is hard-coded to be R0. x86 has similar design (and A64 doesn't have this problem). A potential alternative might be to encode the other operand's register number in the immediate field. - The kernel's atomic_cmpxchg returns the old value, while the C11 userspace APIs return a boolean indicating the comparison result. Which should BPF do? A64 returns the old value. x86 returns the old value in the hard-coded register (and also sets a flag). That means return-old-value is easier to JIT, so that's what we use. Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210114181751.768687-8-jackmanb@google.com --- include/linux/filter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 23fca41b8540..d563820f197d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -265,6 +265,8 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) * * BPF_ADD *(uint *) (dst_reg + off16) += src_reg * BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg); + * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg) + * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) */ #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ -- cgit v1.2.3 From 981f94c3e92146705baf97fb417a5ed1ab1a79a5 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 14 Jan 2021 18:17:49 +0000 Subject: bpf: Add bitwise atomic instructions This adds instructions for atomic[64]_[fetch_]and atomic[64]_[fetch_]or atomic[64]_[fetch_]xor All these operations are isomorphic enough to implement with the same verifier, interpreter, and x86 JIT code, hence being a single commit. The main interesting thing here is that x86 doesn't directly support the fetch_ version these operations, so we need to generate a CMPXCHG loop in the JIT. This requires the use of two temporary registers, IIUC it's safe to use BPF_REG_AX and x86's AUX_REG for this purpose. Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210114181751.768687-10-jackmanb@google.com --- include/linux/filter.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index d563820f197d..7fdce5407214 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -264,7 +264,13 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) * Atomic operations: * * BPF_ADD *(uint *) (dst_reg + off16) += src_reg + * BPF_AND *(uint *) (dst_reg + off16) &= src_reg + * BPF_OR *(uint *) (dst_reg + off16) |= src_reg + * BPF_XOR *(uint *) (dst_reg + off16) ^= src_reg * BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg); + * BPF_AND | BPF_FETCH src_reg = atomic_fetch_and(dst_reg + off16, src_reg); + * BPF_OR | BPF_FETCH src_reg = atomic_fetch_or(dst_reg + off16, src_reg); + * BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg); * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg) * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) */ -- cgit v1.2.3 From bd7525dacd7e204f8cae061941fb9001c89d6988 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 14 Jan 2021 14:40:42 +0100 Subject: bpf: Move stack_map_get_build_id into lib Moving stack_map_get_build_id into lib with declaration in linux/buildid.h header: int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id); This function returns build id for given struct vm_area_struct. There is no functional change to stack_map_get_build_id function. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20210114134044.1418404-2-jolsa@kernel.org --- include/linux/buildid.h | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 include/linux/buildid.h (limited to 'include/linux') diff --git a/include/linux/buildid.h b/include/linux/buildid.h new file mode 100644 index 000000000000..08028a212589 --- /dev/null +++ b/include/linux/buildid.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_BUILDID_H +#define _LINUX_BUILDID_H + +#include + +#define BUILD_ID_SIZE_MAX 20 + +int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id); + +#endif -- cgit v1.2.3 From 921f88fc891922b325b3668cd026a386571ed602 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 14 Jan 2021 14:40:43 +0100 Subject: bpf: Add size arg to build_id_parse function It's possible to have other build id types (other than default SHA1). Currently there's also ld support for MD5 build id. Adding size argument to build_id_parse function, that returns (if defined) size of the parsed build id, so we can recognize the build id type. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210114134044.1418404-3-jolsa@kernel.org --- include/linux/buildid.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buildid.h b/include/linux/buildid.h index 08028a212589..40232f90db6e 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -6,6 +6,7 @@ #define BUILD_ID_SIZE_MAX 20 -int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id); +int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, + __u32 *size); #endif -- cgit v1.2.3 From 719a402cf60311b1cdff3f6320abaecdcc5e46b7 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 17 Jan 2021 16:59:42 +0200 Subject: net: netdevice: Add operation ndo_sk_get_lower_dev ndo_sk_get_lower_dev returns the lower netdev that corresponds to a given socket. Additionally, we implement a helper netdev_sk_get_lowest_dev() to get the lowest one in chain. Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5b949076ed23..02dcef4d66e2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1398,6 +1398,8 @@ struct net_device_ops { struct net_device* (*ndo_get_xmit_slave)(struct net_device *dev, struct sk_buff *skb, bool all_slaves); + struct net_device* (*ndo_sk_get_lower_dev)(struct net_device *dev, + struct sock *sk); netdev_features_t (*ndo_fix_features)(struct net_device *dev, netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, @@ -2858,6 +2860,8 @@ int init_dummy_netdev(struct net_device *dev); struct net_device *netdev_get_xmit_slave(struct net_device *dev, struct sk_buff *skb, bool all_slaves); +struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev, + struct sock *sk); struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); -- cgit v1.2.3 From ab0da5a57188b80d16d3222236c4e184953a5bb4 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 30 Dec 2020 15:01:20 +0200 Subject: net/mlx5: Expose ifc bits for query modify header Expose ifc bits for query_modify_header_context_in to be used by DEVX. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 442c0160caab..cf692fc17f41 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5904,6 +5904,18 @@ struct mlx5_ifc_dealloc_modify_header_context_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_query_modify_header_context_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 modify_header_id[0x20]; + + u8 reserved_at_60[0xa0]; +}; + struct mlx5_ifc_query_dct_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; -- cgit v1.2.3 From 7eab14de73a8028f770e703962c5437a2b0dda82 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sat, 16 Jan 2021 16:13:22 +0000 Subject: mdio, phy: fix -Wshadow warnings triggered by nested container_of() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit container_of() macro hides a local variable '__mptr' inside. This becomes a problem when several container_of() are nested in each other within single line or plain macros. As C preprocessor doesn't support generating random variable names, the sole solution is to avoid defining macros that consist only of container_of() calls, or they will self-shadow '__mptr' each time: In file included from ./include/linux/bitmap.h:10, from drivers/net/phy/phy_device.c:12: drivers/net/phy/phy_device.c: In function ‘phy_device_release’: ./include/linux/kernel.h:693:8: warning: declaration of ‘__mptr’ shadows a previous local [-Wshadow] 693 | void *__mptr = (void *)(ptr); \ | ^~~~~~ ./include/linux/phy.h:647:26: note: in expansion of macro ‘container_of’ 647 | #define to_phy_device(d) container_of(to_mdio_device(d), \ | ^~~~~~~~~~~~ ./include/linux/mdio.h:52:27: note: in expansion of macro ‘container_of’ 52 | #define to_mdio_device(d) container_of(d, struct mdio_device, dev) | ^~~~~~~~~~~~ ./include/linux/phy.h:647:39: note: in expansion of macro ‘to_mdio_device’ 647 | #define to_phy_device(d) container_of(to_mdio_device(d), \ | ^~~~~~~~~~~~~~ drivers/net/phy/phy_device.c:217:8: note: in expansion of macro ‘to_phy_device’ 217 | kfree(to_phy_device(dev)); | ^~~~~~~~~~~~~ ./include/linux/kernel.h:693:8: note: shadowed declaration is here 693 | void *__mptr = (void *)(ptr); \ | ^~~~~~ ./include/linux/phy.h:647:26: note: in expansion of macro ‘container_of’ 647 | #define to_phy_device(d) container_of(to_mdio_device(d), \ | ^~~~~~~~~~~~ drivers/net/phy/phy_device.c:217:8: note: in expansion of macro ‘to_phy_device’ 217 | kfree(to_phy_device(dev)); | ^~~~~~~~~~~~~ As they are declared in header files, these warnings are highly repetitive and very annoying (along with the one from linux/pci.h). Convert the related macros from linux/{mdio,phy}.h to static inlines to avoid self-shadowing and potentially improve bug-catching. No functional changes implied. Signed-off-by: Alexander Lobakin Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210116161246.67075-1-alobakin@pm.me Signed-off-by: Jakub Kicinski --- include/linux/mdio.h | 23 ++++++++++++++++++----- include/linux/phy.h | 7 +++++-- 2 files changed, 23 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index dbd69b3d170b..ffb787d5ebde 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -49,7 +49,11 @@ struct mdio_device { unsigned int reset_assert_delay; unsigned int reset_deassert_delay; }; -#define to_mdio_device(d) container_of(d, struct mdio_device, dev) + +static inline struct mdio_device *to_mdio_device(const struct device *dev) +{ + return container_of(dev, struct mdio_device, dev); +} /* struct mdio_driver_common: Common to all MDIO drivers */ struct mdio_driver_common { @@ -57,8 +61,12 @@ struct mdio_driver_common { int flags; }; #define MDIO_DEVICE_FLAG_PHY 1 -#define to_mdio_common_driver(d) \ - container_of(d, struct mdio_driver_common, driver) + +static inline struct mdio_driver_common * +to_mdio_common_driver(const struct device_driver *driver) +{ + return container_of(driver, struct mdio_driver_common, driver); +} /* struct mdio_driver: Generic MDIO driver */ struct mdio_driver { @@ -73,8 +81,13 @@ struct mdio_driver { /* Clears up any memory if needed */ void (*remove)(struct mdio_device *mdiodev); }; -#define to_mdio_driver(d) \ - container_of(to_mdio_common_driver(d), struct mdio_driver, mdiodrv) + +static inline struct mdio_driver * +to_mdio_driver(const struct device_driver *driver) +{ + return container_of(to_mdio_common_driver(driver), struct mdio_driver, + mdiodrv); +} /* device driver data */ static inline void mdiodev_set_drvdata(struct mdio_device *mdio, void *data) diff --git a/include/linux/phy.h b/include/linux/phy.h index 24fcc6456a9e..bc323fbdd21e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -648,8 +648,11 @@ struct phy_device { const struct macsec_ops *macsec_ops; #endif }; -#define to_phy_device(d) container_of(to_mdio_device(d), \ - struct phy_device, mdio) + +static inline struct phy_device *to_phy_device(const struct device *dev) +{ + return container_of(to_mdio_device(dev), struct phy_device, mdio); +} /** * struct phy_tdr_config - Configuration of a TDR raw test -- cgit v1.2.3 From fa82117010430aff2ce86400f7328f55a31b48a6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 16 Jan 2021 14:13:37 +0800 Subject: net: add inline function skb_csum_is_sctp This patch is to define a inline function skb_csum_is_sctp(), and also replace all places where it checks if it's a SCTP CSUM skb. This function would be used later in many networking drivers in the following patches. Suggested-by: Alexander Duyck Signed-off-by: Xin Long Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c9568cf60c2a..46f901adf1a8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4621,6 +4621,11 @@ static inline void skb_reset_redirect(struct sk_buff *skb) #endif } +static inline bool skb_csum_is_sctp(struct sk_buff *skb) +{ + return skb->csum_not_inet; +} + static inline void skb_set_kcov_handle(struct sk_buff *skb, const u64 kcov_handle) { -- cgit v1.2.3 From 7b8fc0103bb51d1d3e1fb5fd67958612e709f883 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Mon, 18 Jan 2021 20:09:27 -0500 Subject: bonding: add a vlan+srcmac tx hashing option This comes from an end-user request, where they're running multiple VMs on hosts with bonded interfaces connected to some interest switch topologies, where 802.3ad isn't an option. They're currently running a proprietary solution that effectively achieves load-balancing of VMs and bandwidth utilization improvements with a similar form of transmission algorithm. Basically, each VM has it's own vlan, so it always sends its traffic out the same interface, unless that interface fails. Traffic gets split between the interfaces, maintaining a consistent path, with failover still available if an interface goes down. Unlike bond_eth_hash(), this hash function is using the full source MAC address instead of just the last byte, as there are so few components to the hash, and in the no-vlan case, we would be returning just the last byte of the source MAC as the hash value. It's entirely possible to have two NICs in a bond with the same last byte of their MAC, but not the same MAC, so this adjustment should guarantee distinct hashes in all cases. This has been rudimetarily tested to provide similar results to the proprietary solution it is aiming to replace. A patch for iproute2 is also posted, to properly support the new mode there as well. Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Cc: Thomas Davis Signed-off-by: Jarod Wilson Link: https://lore.kernel.org/r/20210119010927.1191922-1-jarod@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 02dcef4d66e2..ef517254367d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2617,6 +2617,7 @@ enum netdev_lag_hash { NETDEV_LAG_HASH_L23, NETDEV_LAG_HASH_E23, NETDEV_LAG_HASH_E34, + NETDEV_LAG_HASH_VLAN_SRCMAC, NETDEV_LAG_HASH_UNKNOWN, }; -- cgit v1.2.3 From 9cacf81f8161111db25f98e78a7a0e32ae142b3f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 15 Jan 2021 08:34:59 -0800 Subject: bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE Add custom implementation of getsockopt hook for TCP_ZEROCOPY_RECEIVE. We skip generic hooks for TCP_ZEROCOPY_RECEIVE and have a custom call in do_tcp_getsockopt using the on-stack data. This removes 3% overhead for locking/unlocking the socket. Without this patch: 3.38% 0.07% tcp_mmap [kernel.kallsyms] [k] __cgroup_bpf_run_filter_getsockopt | --3.30%--__cgroup_bpf_run_filter_getsockopt | --0.81%--__kmalloc With the patch applied: 0.52% 0.12% tcp_mmap [kernel.kallsyms] [k] __cgroup_bpf_run_filter_getsockopt_kern Note, exporting uapi/tcp.h requires removing netinet/tcp.h from test_progs.h because those headers have confliciting definitions. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210115163501.805133-2-sdf@google.com --- include/linux/bpf-cgroup.h | 27 +++++++++++++++++++++++---- include/linux/indirect_call_wrapper.h | 6 ++++++ 2 files changed, 29 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 72e69a0e1e8c..bcb2915e6124 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -147,6 +147,10 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, int __user *optlen, int max_optlen, int retval); +int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, + int optname, void *optval, + int *optlen, int retval); + static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { @@ -364,10 +368,23 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, ({ \ int __ret = retval; \ if (cgroup_bpf_enabled) \ - __ret = __cgroup_bpf_run_filter_getsockopt(sock, level, \ - optname, optval, \ - optlen, max_optlen, \ - retval); \ + if (!(sock)->sk_prot->bpf_bypass_getsockopt || \ + !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \ + tcp_bpf_bypass_getsockopt, \ + level, optname)) \ + __ret = __cgroup_bpf_run_filter_getsockopt( \ + sock, level, optname, optval, optlen, \ + max_optlen, retval); \ + __ret; \ +}) + +#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ + optlen, retval) \ +({ \ + int __ret = retval; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_getsockopt_kern( \ + sock, level, optname, optval, optlen, retval); \ __ret; \ }) @@ -452,6 +469,8 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ optlen, max_optlen, retval) ({ retval; }) +#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ + optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index 54c02c84906a..cfcfef37b2f1 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -60,4 +60,10 @@ #define INDIRECT_CALL_INET(f, f2, f1, ...) f(__VA_ARGS__) #endif +#if IS_ENABLED(CONFIG_INET) +#define INDIRECT_CALL_INET_1(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__) +#else +#define INDIRECT_CALL_INET_1(f, f1, ...) f(__VA_ARGS__) +#endif + #endif -- cgit v1.2.3 From 20f2505fb436cfa674cf1f46aaa624f44d3d1d03 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 15 Jan 2021 08:35:00 -0800 Subject: bpf: Try to avoid kzalloc in cgroup/{s,g}etsockopt When we attach a bpf program to cgroup/getsockopt any other getsockopt() syscall starts incurring kzalloc/kfree cost. Let add a small buffer on the stack and use it for small (majority) {s,g}etsockopt values. The buffer is small enough to fit into the cache line and cover the majority of simple options (most of them are 4 byte ints). It seems natural to do the same for setsockopt, but it's a bit more involved when the BPF program modifies the data (where we have to kmalloc). The assumption is that for the majority of setsockopt calls (which are doing pure BPF options or apply policy) this will bring some benefit as well. Without this patch (we remove about 1% __kmalloc): 3.38% 0.07% tcp_mmap [kernel.kallsyms] [k] __cgroup_bpf_run_filter_getsockopt | --3.30%--__cgroup_bpf_run_filter_getsockopt | --0.81%--__kmalloc Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210115163501.805133-3-sdf@google.com --- include/linux/filter.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 7fdce5407214..5b3137d7b690 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1298,6 +1298,11 @@ struct bpf_sysctl_kern { u64 tmp_reg; }; +#define BPF_SOCKOPT_KERN_BUF_SIZE 32 +struct bpf_sockopt_buf { + u8 data[BPF_SOCKOPT_KERN_BUF_SIZE]; +}; + struct bpf_sockopt_kern { struct sock *sk; u8 *optval; -- cgit v1.2.3 From a9ed15dae0755a0368735e0556a462d8519bdb05 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 15 Jan 2021 08:35:01 -0800 Subject: bpf: Split cgroup_bpf_enabled per attach type When we attach any cgroup hook, the rest (even if unused/unattached) start to contribute small overhead. In particular, the one we want to avoid is __cgroup_bpf_run_filter_skb which does two redirections to get to the cgroup and pushes/pulls skb. Let's split cgroup_bpf_enabled to be per-attach to make sure only used attach types trigger. I've dropped some existing high-level cgroup_bpf_enabled in some places because BPF_PROG_CGROUP_XXX_RUN macros usually have another cgroup_bpf_enabled check. I also had to copy-paste BPF_CGROUP_RUN_SA_PROG_LOCK for GETPEERNAME/GETSOCKNAME because type for cgroup_bpf_enabled[type] has to be constant and known at compile time. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20210115163501.805133-4-sdf@google.com --- include/linux/bpf-cgroup.h | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index bcb2915e6124..0748fd87969e 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -23,8 +23,8 @@ struct ctl_table_header; #ifdef CONFIG_CGROUP_BPF -extern struct static_key_false cgroup_bpf_enabled_key; -#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) +extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE]; +#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type]) DECLARE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); @@ -189,7 +189,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled) \ + if (cgroup_bpf_enabled(BPF_CGROUP_INET_INGRESS)) \ __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ BPF_CGROUP_INET_INGRESS); \ \ @@ -199,7 +199,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled && sk && sk == skb->sk) { \ + if (cgroup_bpf_enabled(BPF_CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \ typeof(sk) __sk = sk_to_full_sk(sk); \ if (sk_fullsock(__sk)) \ __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ @@ -211,7 +211,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_RUN_SK_PROG(sk, type) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled) { \ + if (cgroup_bpf_enabled(type)) { \ __ret = __cgroup_bpf_run_filter_sk(sk, type); \ } \ __ret; \ @@ -232,7 +232,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled) \ + if (cgroup_bpf_enabled(type)) \ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \ NULL); \ __ret; \ @@ -241,7 +241,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled) { \ + if (cgroup_bpf_enabled(type)) { \ lock_sock(sk); \ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \ t_ctx); \ @@ -256,8 +256,10 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL) -#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \ - sk->sk_prot->pre_connect) +#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) \ + ((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) || \ + cgroup_bpf_enabled(BPF_CGROUP_INET6_CONNECT)) && \ + (sk)->sk_prot->pre_connect) #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT) @@ -301,7 +303,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled) \ + if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS)) \ __ret = __cgroup_bpf_run_filter_sock_ops(sk, \ sock_ops, \ BPF_CGROUP_SOCK_OPS); \ @@ -311,7 +313,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled && (sock_ops)->sk) { \ + if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS) && (sock_ops)->sk) { \ typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \ if (__sk && sk_fullsock(__sk)) \ __ret = __cgroup_bpf_run_filter_sock_ops(__sk, \ @@ -324,7 +326,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled) \ + if (cgroup_bpf_enabled(BPF_CGROUP_DEVICE)) \ __ret = __cgroup_bpf_check_dev_permission(type, major, minor, \ access, \ BPF_CGROUP_DEVICE); \ @@ -336,7 +338,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled) \ + if (cgroup_bpf_enabled(BPF_CGROUP_SYSCTL)) \ __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \ buf, count, pos, \ BPF_CGROUP_SYSCTL); \ @@ -347,7 +349,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, kernel_optval) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled) \ + if (cgroup_bpf_enabled(BPF_CGROUP_SETSOCKOPT)) \ __ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \ optname, optval, \ optlen, \ @@ -358,7 +360,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled) \ + if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \ get_user(__ret, optlen); \ __ret; \ }) @@ -367,7 +369,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, max_optlen, retval) \ ({ \ int __ret = retval; \ - if (cgroup_bpf_enabled) \ + if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \ if (!(sock)->sk_prot->bpf_bypass_getsockopt || \ !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \ tcp_bpf_bypass_getsockopt, \ @@ -382,7 +384,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, optlen, retval) \ ({ \ int __ret = retval; \ - if (cgroup_bpf_enabled) \ + if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \ __ret = __cgroup_bpf_run_filter_getsockopt_kern( \ sock, level, optname, optval, optlen, retval); \ __ret; \ @@ -444,7 +446,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, return 0; } -#define cgroup_bpf_enabled (0) +#define cgroup_bpf_enabled(type) (0) #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) ({ 0; }) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) -- cgit v1.2.3 From 7baf2429a1a965369b0ce44efb6315cdd515aa9c Mon Sep 17 00:00:00 2001 From: wenxu Date: Tue, 19 Jan 2021 16:31:50 +0800 Subject: net/sched: cls_flower add CT_FLAGS_INVALID flag support This patch add the TCA_FLOWER_KEY_CT_FLAGS_INVALID flag to match the ct_state with invalid for conntrack. Signed-off-by: wenxu Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/1611045110-682-1-git-send-email-wenxu@ucloud.cn Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 46f901adf1a8..186dad231e30 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1353,8 +1353,8 @@ void skb_flow_dissect_ct(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, - u16 *ctinfo_map, - size_t mapsize); + u16 *ctinfo_map, size_t mapsize, + bool post_ct); void skb_flow_dissect_tunnel_info(const struct sk_buff *skb, struct flow_dissector *flow_dissector, -- cgit v1.2.3 From f3196bb0f14c0ffb5089c15668bda196c98d3900 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:16 -0800 Subject: net/mlx5: Introduce vhca state event notifier vhca state events indicates change in the state of the vhca that may occur due to a SF allocation, deallocation or enabling/disabling the SF HCA. Introduce vhca state event handler which will be used by SF devlink port manager and SF hardware id allocator in subsequent patches to act on the event. This enables single entity to subscribe, query and rearm the event for a function. Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f93bfe7473aa..ffba0786051e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -507,6 +507,7 @@ struct mlx5_devcom; struct mlx5_fw_reset; struct mlx5_eq_table; struct mlx5_irq_table; +struct mlx5_vhca_state_notifier; struct mlx5_rate_limit { u32 rate; @@ -603,6 +604,9 @@ struct mlx5_priv { struct mlx5_bfreg_data bfregs; struct mlx5_uars_page *uar; +#ifdef CONFIG_MLX5_SF + struct mlx5_vhca_state_notifier *vhca_state_notifier; +#endif }; enum mlx5_device_state { -- cgit v1.2.3 From 90d010b8634b89a97ca3b7aa6a88fd566fc77717 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:17 -0800 Subject: net/mlx5: SF, Add auxiliary device support Introduce API to add and delete an auxiliary device for an SF. Each SF has its own dedicated window in the PCI BAR 2. SF device is similar to PCI PF and VF that supports multiple class of devices such as net, rdma and vdpa. SF device will be added or removed in subsequent patch during SF devlink port function state change command. A subfunction device exposes user supplied subfunction number which will be further used by systemd/udev to have deterministic name for its netdevice and rdma device. An mlx5 subfunction auxiliary device example: $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev $ devlink port show pci/0000:06:00.0/65535: type eth netdev ens2f0np0 flavour physical port 0 splittable false $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88 pci/0000:08:00.0/32768: type eth netdev eth6 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:00:00 state inactive opstate detached $ devlink port show ens2f0npf0sf88 pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:88:88 state inactive opstate detached $ devlink port function set ens2f0npf0sf88 hw_addr 00:00:00:00:88:88 state active On activation, $ ls -l /sys/bus/auxiliary/devices/ mlx5_core.sf.4 -> ../../../devices/pci0000:00/0000:00:03.0/0000:06:00.0/mlx5_core.sf.4 $ cat /sys/bus/auxiliary/devices/mlx5_core.sf.4/sfnum 88 Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ffba0786051e..08e5fbe97df0 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -508,6 +508,7 @@ struct mlx5_fw_reset; struct mlx5_eq_table; struct mlx5_irq_table; struct mlx5_vhca_state_notifier; +struct mlx5_sf_dev_table; struct mlx5_rate_limit { u32 rate; @@ -606,6 +607,7 @@ struct mlx5_priv { struct mlx5_uars_page *uar; #ifdef CONFIG_MLX5_SF struct mlx5_vhca_state_notifier *vhca_state_notifier; + struct mlx5_sf_dev_table *sf_dev_table; #endif }; -- cgit v1.2.3 From 1958fc2f0712ae771bfd2351b55e5a5b6b0bcfa4 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:18 -0800 Subject: net/mlx5: SF, Add auxiliary device driver Add auxiliary device driver for mlx5 subfunction auxiliary device. A mlx5 subfunction is similar to PCI PF and VF. For a subfunction an auxiliary device is created. As a result, when mlx5 SF auxiliary device binds to the driver, its netdev and rdma device are created, they appear as $ ls -l /sys/bus/auxiliary/devices/ mlx5_core.sf.4 -> ../../../devices/pci0000:00/0000:00:03.0/0000:06:00.0/mlx5_core.sf.4 $ ls -l /sys/class/net/eth1/device /sys/class/net/eth1/device -> ../../../mlx5_core.sf.4 $ cat /sys/bus/auxiliary/devices/mlx5_core.sf.4/sfnum 88 $ devlink dev show pci/0000:06:00.0 auxiliary/mlx5_core.sf.4 $ devlink port show auxiliary/mlx5_core.sf.4/1 auxiliary/mlx5_core.sf.4/1: type eth netdev p0sf88 flavour virtual port 0 splittable false $ rdma link show mlx5_0/1 link mlx5_0/1 state ACTIVE physical_state LINK_UP netdev p0sf88 $ rdma dev show 8: rocep6s0f1: node_type ca fw 16.29.0550 node_guid 248a:0703:00b3:d113 sys_image_guid 248a:0703:00b3:d112 13: mlx5_0: node_type ca fw 16.29.0550 node_guid 0000:00ff:fe00:8888 sys_image_guid 248a:0703:00b3:d112 In future, devlink device instance name will adapt to have sfnum annotation using either an alias or as devlink instance name described in RFC [1]. [1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho/ Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 08e5fbe97df0..48e3638b1185 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -193,7 +193,8 @@ enum port_state_policy { enum mlx5_coredev_type { MLX5_COREDEV_PF, - MLX5_COREDEV_VF + MLX5_COREDEV_VF, + MLX5_COREDEV_SF, }; struct mlx5_field_desc { @@ -608,6 +609,7 @@ struct mlx5_priv { #ifdef CONFIG_MLX5_SF struct mlx5_vhca_state_notifier *vhca_state_notifier; struct mlx5_sf_dev_table *sf_dev_table; + struct mlx5_core_dev *parent_mdev; #endif }; -- cgit v1.2.3 From 8f01054186683fe0986d54d584aa13723d51edce Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:21 -0800 Subject: net/mlx5: SF, Add port add delete functionality To handle SF port management outside of the eswitch as independent software layer, introduce eswitch notifier APIs so that mlx5 upper layer who wish to support sf port management in switchdev mode can perform its task whenever eswitch mode is set to switchdev or before eswitch is disabled. Initialize sf port table on such eswitch event. Add SF port add and delete functionality in switchdev mode. Destroy all SF ports when eswitch is disabled. Expose SF port add and delete to user via devlink commands. $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev $ devlink port show pci/0000:06:00.0/65535: type eth netdev ens2f0np0 flavour physical port 0 splittable false $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88 pci/0000:06:00.0/32768: type eth netdev eth6 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:00:00 state inactive opstate detached $ devlink port show ens2f0npf0sf88 pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:00:00 state inactive opstate detached or by its unique port index: $ devlink port show pci/0000:06:00.0/32768 pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:00:00 state inactive opstate detached $ devlink port show ens2f0npf0sf88 -jp { "port": { "pci/0000:06:00.0/32768": { "type": "eth", "netdev": "ens2f0npf0sf88", "flavour": "pcisf", "controller": 0, "pfnum": 0, "sfnum": 88, "external": false, "splittable": false, "function": { "hw_addr": "00:00:00:00:00:00", "state": "inactive", "opstate": "detached" } } } } Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 48e3638b1185..7e357c7f0d5e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -510,6 +510,8 @@ struct mlx5_eq_table; struct mlx5_irq_table; struct mlx5_vhca_state_notifier; struct mlx5_sf_dev_table; +struct mlx5_sf_hw_table; +struct mlx5_sf_table; struct mlx5_rate_limit { u32 rate; @@ -611,6 +613,10 @@ struct mlx5_priv { struct mlx5_sf_dev_table *sf_dev_table; struct mlx5_core_dev *parent_mdev; #endif +#ifdef CONFIG_MLX5_SF_MANAGER + struct mlx5_sf_hw_table *sf_hw_table; + struct mlx5_sf_table *sf_table; +#endif }; enum mlx5_device_state { -- cgit v1.2.3 From e7ed11ee945438b737e2ae2370e35591e16ec371 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Wed, 20 Jan 2021 12:41:55 -0800 Subject: tcp: add TTL to SCM_TIMESTAMPING_OPT_STATS This patch adds TCP_NLA_TTL to SCM_TIMESTAMPING_OPT_STATS that exports the time-to-live or hop limit of the latest incoming packet with SCM_TSTAMP_ACK. The value exported may not be from the packet that acks the sequence when incoming packets are aggregated. Exporting the time-to-live or hop limit value of incoming packets helps to estimate the hop count of the path of the flow that may change over time. Signed-off-by: Yousuk Seung Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Link: https://lore.kernel.org/r/20210120204155.552275-1-ysseung@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 +- include/linux/tcp.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 186dad231e30..9313b5aaf45b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3859,7 +3859,7 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps); -void __skb_tstamp_tx(struct sk_buff *orig_skb, +void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb, struct skb_shared_hwtstamps *hwtstamps, struct sock *sk, int tstype); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2f87377e9af7..48d8a363319e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -496,7 +496,8 @@ static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn) } struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, - const struct sk_buff *orig_skb); + const struct sk_buff *orig_skb, + const struct sk_buff *ack_skb); static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss) { -- cgit v1.2.3 From d03b195b5aa015f6c11988b86a3625f8d5dbac52 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 19 Jan 2021 14:08:13 +0200 Subject: sch_htb: Hierarchical QoS hardware offload HTB doesn't scale well because of contention on a single lock, and it also consumes CPU. This patch adds support for offloading HTB to hardware that supports hierarchical rate limiting. In the offload mode, HTB passes control commands to the driver using ndo_setup_tc. The driver has to replicate the whole hierarchy of classes and their settings (rate, ceil) in the NIC. Every modification of the HTB tree caused by the admin results in ndo_setup_tc being called. After this setup, the HTB algorithm is done completely in the NIC. An SQ (send queue) is created for every leaf class and attached to the hierarchy, so that the NIC can calculate and obey aggregated rate limits, too. In the future, it can be changed, so that multiple SQs will back a single leaf class. ndo_select_queue is responsible for selecting the right queue that serves the traffic class of each packet. The data path works as follows: a packet is classified by clsact, the driver selects a hardware queue according to its class, and the packet is enqueued into this queue's qdisc. This solution addresses two main problems of scaling HTB: 1. Contention by flow classification. Currently the filters are attached to the HTB instance as follows: # tc filter add dev eth0 parent 1:0 protocol ip flower dst_port 80 classid 1:10 It's possible to move classification to clsact egress hook, which is thread-safe and lock-free: # tc filter add dev eth0 egress protocol ip flower dst_port 80 action skbedit priority 1:10 This way classification still happens in software, but the lock contention is eliminated, and it happens before selecting the TX queue, allowing the driver to translate the class to the corresponding hardware queue in ndo_select_queue. Note that this is already compatible with non-offloaded HTB and doesn't require changes to the kernel nor iproute2. 2. Contention by handling packets. HTB is not multi-queue, it attaches to a whole net device, and handling of all packets takes the same lock. When HTB is offloaded, it registers itself as a multi-queue qdisc, similarly to mq: HTB is attached to the netdev, and each queue has its own qdisc. Some features of HTB may be not supported by some particular hardware, for example, the maximum number of classes may be limited, the granularity of rate and ceil parameters may be different, etc. - so, the offload is not enabled by default, a new parameter is used to enable it: # tc qdisc replace dev eth0 root handle 1: htb offload Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ef517254367d..9e8572533d8e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -858,6 +858,7 @@ enum tc_setup_type { TC_SETUP_QDISC_ETS, TC_SETUP_QDISC_TBF, TC_SETUP_QDISC_FIFO, + TC_SETUP_QDISC_HTB, }; /* These structures hold the attributes of bpf state that are being passed -- cgit v1.2.3 From 214baf22870cfa437522f3bd4fbae56338674b04 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 19 Jan 2021 14:08:15 +0200 Subject: net/mlx5e: Support HTB offload This commit adds support for HTB offload in the mlx5e driver. Performance: NIC: Mellanox ConnectX-6 Dx CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz (24 cores with HT) 100 Gbit/s line rate, 500 UDP streams @ ~200 Mbit/s each 48 traffic classes, flower used for steering No shaping (rate limits set to 4 Gbit/s per TC) - checking for max throughput. Baseline: 98.7 Gbps, 8.25 Mpps HTB: 6.7 Gbps, 0.56 Mpps HTB offload: 95.6 Gbps, 8.00 Mpps Limitations: 1. 256 leaf nodes, 3 levels of depth. 2. Granularity for ceil is 1 Mbit/s. Rates are converted to weights, and the bandwidth is split among the siblings according to these weights. Other parameters for classes are not supported. Ethtool statistics support for QoS SQs are also added. The counters are called qos_txN_*, where N is the QoS queue number (starting from 0, the numeration is separate from the normal SQs), and * is the counter name (the counters are the same as for the normal SQs). Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 823411e288c0..71ae6aac3410 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -842,11 +842,16 @@ struct mlx5_ifc_qos_cap_bits { u8 reserved_at_4[0x1]; u8 packet_pacing_burst_bound[0x1]; u8 packet_pacing_typical_size[0x1]; - u8 reserved_at_7[0x4]; + u8 reserved_at_7[0x1]; + u8 nic_sq_scheduling[0x1]; + u8 nic_bw_share[0x1]; + u8 nic_rate_limit[0x1]; u8 packet_pacing_uid[0x1]; u8 reserved_at_c[0x14]; - u8 reserved_at_20[0x20]; + u8 reserved_at_20[0xb]; + u8 log_max_qos_nic_queue_group[0x5]; + u8 reserved_at_30[0x10]; u8 packet_pacing_max_rate[0x20]; @@ -3347,7 +3352,7 @@ struct mlx5_ifc_sqc_bits { u8 reserved_at_e0[0x10]; u8 packet_pacing_rate_limit_index[0x10]; u8 tis_lst_sz[0x10]; - u8 reserved_at_110[0x10]; + u8 qos_queue_group_id[0x10]; u8 reserved_at_120[0x40]; @@ -3362,6 +3367,7 @@ enum { SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2, SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, + SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP = 0x4, }; enum { @@ -4805,6 +4811,7 @@ struct mlx5_ifc_query_scheduling_element_out_bits { enum { SCHEDULING_HIERARCHY_E_SWITCH = 0x2, + SCHEDULING_HIERARCHY_NIC = 0x3, }; struct mlx5_ifc_query_scheduling_element_in_bits { -- cgit v1.2.3 From 6f1c0ea133a6e4a193a7b285efe209664caeea43 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 22 Jan 2021 18:19:48 +0000 Subject: net: introduce a netdev feature for UDP GRO forwarding Introduce a new netdev feature, NETIF_F_GRO_UDP_FWD, to allow user to turn UDP GRO on and off for forwarding. Defaults to off to not change current datapath. Suggested-by: Paolo Abeni Signed-off-by: Alexander Lobakin Signed-off-by: Jakub Kicinski --- include/linux/netdev_features.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 934de56644e7..c06d6aaba9df 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -84,6 +84,7 @@ enum { NETIF_F_GRO_FRAGLIST_BIT, /* Fraglist GRO */ NETIF_F_HW_MACSEC_BIT, /* Offload MACsec operations */ + NETIF_F_GRO_UDP_FWD_BIT, /* Allow UDP GRO for forwarding */ /* * Add your fresh new feature above and remember to update @@ -157,6 +158,7 @@ enum { #define NETIF_F_GRO_FRAGLIST __NETIF_F(GRO_FRAGLIST) #define NETIF_F_GSO_FRAGLIST __NETIF_F(GSO_FRAGLIST) #define NETIF_F_HW_MACSEC __NETIF_F(HW_MACSEC) +#define NETIF_F_GRO_UDP_FWD __NETIF_F(GRO_UDP_FWD) /* Finds the next feature with the highest number of the range of start till 0. */ @@ -234,7 +236,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) #define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO) /* Changeable features with no special hardware requirements that defaults to off. */ -#define NETIF_F_SOFT_FEATURES_OFF NETIF_F_GRO_FRAGLIST +#define NETIF_F_SOFT_FEATURES_OFF (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD) #define NETIF_F_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \ NETIF_F_HW_VLAN_CTAG_RX | \ -- cgit v1.2.3 From 83ace77f51175023c3757e2d08a92565f9b1c7f3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Jan 2021 16:30:03 +0100 Subject: netfilter: ctnetlink: remove get_ct indirection Use nf_ct_get() directly, its a small inline helper without dependencies. Add CONFIG_NF_CONNTRACK guards to elide the relevant part when conntrack isn't available at all. v2: add ifdef guard around nf_ct_get call (kernel test robot) Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 0101747de549..f0f3a8354c3c 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -463,8 +463,6 @@ extern struct nf_ct_hook __rcu *nf_ct_hook; struct nlattr; struct nfnl_ct_hook { - struct nf_conn *(*get_ct)(const struct sk_buff *skb, - enum ip_conntrack_info *ctinfo); size_t (*build_size)(const struct nf_conn *ct); int (*build)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, -- cgit v1.2.3 From 6b2e04bc240fe9be9e690059f710e9f95346d34d Mon Sep 17 00:00:00 2001 From: Praveen Chaudhary Date: Mon, 25 Jan 2021 13:44:30 -0800 Subject: net: allow user to set metric on default route learned via Router Advertisement For IPv4, default route is learned via DHCPv4 and user is allowed to change metric using config etc/network/interfaces. But for IPv6, default route can be learned via RA, for which, currently a fixed metric value 1024 is used. Ideally, user should be able to configure metric on default route for IPv6 similar to IPv4. This patch adds sysctl for the same. Logs: For IPv4: Config in etc/network/interfaces: auto eth0 iface eth0 inet dhcp metric 4261413864 IPv4 Kernel Route Table: $ ip route list default via 172.21.47.1 dev eth0 metric 4261413864 FRR Table, if a static route is configured: [In real scenario, it is useful to prefer BGP learned default route over DHCPv4 default route.] Codes: K - kernel route, C - connected, S - static, R - RIP, O - OSPF, I - IS-IS, B - BGP, P - PIM, E - EIGRP, N - NHRP, T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP, > - selected route, * - FIB route S>* 0.0.0.0/0 [20/0] is directly connected, eth0, 00:00:03 K 0.0.0.0/0 [254/1000] via 172.21.47.1, eth0, 6d08h51m i.e. User can prefer Default Router learned via Routing Protocol in IPv4. Similar behavior is not possible for IPv6, without this fix. After fix [for IPv6]: sudo sysctl -w net.ipv6.conf.eth0.net.ipv6.conf.eth0.ra_defrtr_metric=1996489705 IP monitor: [When IPv6 RA is received] default via fe80::xx16:xxxx:feb3:ce8e dev eth0 proto ra metric 1996489705 pref high Kernel IPv6 routing table $ ip -6 route list default via fe80::be16:65ff:feb3:ce8e dev eth0 proto ra metric 1996489705 expires 21sec hoplimit 64 pref high FRR Table, if a static route is configured: [In real scenario, it is useful to prefer BGP learned default route over IPv6 RA default route.] Codes: K - kernel route, C - connected, S - static, R - RIPng, O - OSPFv3, I - IS-IS, B - BGP, N - NHRP, T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP, > - selected route, * - FIB route S>* ::/0 [20/0] is directly connected, eth0, 00:00:06 K ::/0 [119/1001] via fe80::xx16:xxxx:feb3:ce8e, eth0, 6d07h43m If the metric is changed later, the effect will be seen only when next IPv6 RA is received, because the default route must be fully controlled by RA msg. Below metric is changed from 1996489705 to 1996489704. $ sudo sysctl -w net.ipv6.conf.eth0.ra_defrtr_metric=1996489704 net.ipv6.conf.eth0.ra_defrtr_metric = 1996489704 IP monitor: [On next IPv6 RA msg, Kernel deletes prev route and installs new route with updated metric] Deleted default via fe80::xx16:xxxx:feb3:ce8e dev eth0 proto ra metric 1996489705 expires 3sec hoplimit 64 pref high default via fe80::xx16:xxxx:feb3:ce8e dev eth0 proto ra metric 1996489704 pref high Signed-off-by: Praveen Chaudhary Signed-off-by: Zhenggen Xu Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210125214430.24079-1-pchaudhary@linkedin.com Signed-off-by: Jakub Kicinski --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index dda61d150a13..9d1f29f0c512 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -31,6 +31,7 @@ struct ipv6_devconf { __s32 max_desync_factor; __s32 max_addresses; __s32 accept_ra_defrtr; + __u32 ra_defrtr_metric; __s32 accept_ra_min_hop_limit; __s32 accept_ra_pinfo; __s32 ignore_routes_with_linkdown; -- cgit v1.2.3 From 6fe27d68b45666381691b6a841a2adbda8c8d153 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 20 Jan 2021 02:03:55 +0900 Subject: can: dev: export can_get_state_str() function The can_get_state_str() function is also relevant to the drivers. Export the symbol and make it visible in the can/dev.h header. Link: https://lore.kernel.org/r/20210119170355.12040-1-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 7faf6a37d5b2..ac4d83a1ab81 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -123,6 +123,7 @@ void unregister_candev(struct net_device *dev); int can_restart_now(struct net_device *dev); void can_bus_off(struct net_device *dev); +const char *can_get_state_str(const enum can_state state); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state); -- cgit v1.2.3 From 87baa23e0236bc1c41ce744f524bf12dbe7fde66 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Mon, 11 Jan 2021 19:07:40 +0100 Subject: bus: mhi: core: Add helper API to return number of free TREs Introduce mhi_get_free_desc_count() API to return number of TREs available to queue buffer. MHI clients can use this API to know before hand if ring is full without calling queue API. Signed-off-by: Hemant Kumar Reviewed-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1610388462-16322-1-git-send-email-loic.poulain@linaro.org Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 562862ff819c..ece53a252217 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -598,6 +598,15 @@ void mhi_set_mhi_state(struct mhi_controller *mhi_cntrl, */ void mhi_notify(struct mhi_device *mhi_dev, enum mhi_callback cb_reason); +/** + * mhi_get_free_desc_count - Get transfer ring length + * Get # of TD available to queue buffers + * @mhi_dev: Device associated with the channels + * @dir: Direction of the channel + */ +int mhi_get_free_desc_count(struct mhi_device *mhi_dev, + enum dma_data_direction dir); + /** * mhi_prepare_for_power_up - Do pre-initialization before power up. * This is optional, call this before power up if -- cgit v1.2.3 From 8063e184e49011f6f3f34f6c358dc8a83890bb5b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 27 Jan 2021 14:15:01 -0800 Subject: skmsg: Make sk_psock_destroy() static sk_psock_destroy() is a RCU callback, I can't see any reason why it could be used outside. Signed-off-by: Cong Wang Signed-off-by: Daniel Borkmann Cc: John Fastabend Cc: Jakub Sitnicki Cc: Lorenz Bauer Link: https://lore.kernel.org/bpf/20210127221501.46866-1-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index fec0c5ac1c4f..8edbbf5f2f93 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -390,7 +390,6 @@ static inline struct sk_psock *sk_psock_get(struct sock *sk) } void sk_psock_stop(struct sock *sk, struct sk_psock *psock); -void sk_psock_destroy(struct rcu_head *rcu); void sk_psock_drop(struct sock *sk, struct sk_psock *psock); static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) -- cgit v1.2.3 From 772412176fb98493158929b220fe250127f611af Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 27 Jan 2021 11:31:39 -0800 Subject: bpf: Allow rewriting to ports under ip_unprivileged_port_start At the moment, BPF_CGROUP_INET{4,6}_BIND hooks can rewrite user_port to the privileged ones (< ip_unprivileged_port_start), but it will be rejected later on in the __inet_bind or __inet6_bind. Let's add another return value to indicate that CAP_NET_BIND_SERVICE check should be ignored. Use the same idea as we currently use in cgroup/egress where bit #1 indicates CN. Instead, for cgroup/bind{4,6}, bit #1 indicates that CAP_NET_BIND_SERVICE should be bypassed. v5: - rename flags to be less confusing (Andrey Ignatov) - rework BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY to work on flags and accept BPF_RET_SET_CN (no behavioral changes) v4: - Add missing IPv6 support (Martin KaFai Lau) v3: - Update description (Martin KaFai Lau) - Fix capability restore in selftest (Martin KaFai Lau) v2: - Switch to explicit return code (Martin KaFai Lau) Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Reviewed-by: Martin KaFai Lau Acked-by: Andrey Ignatov Link: https://lore.kernel.org/bpf/20210127193140.3170382-1-sdf@google.com --- include/linux/bpf-cgroup.h | 38 ++++++++++++++++++++++++--------- include/linux/bpf.h | 52 +++++++++++++++++++++++++++++----------------- 2 files changed, 61 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 0748fd87969e..c42e02b4d84b 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -125,7 +125,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, enum bpf_attach_type type, - void *t_ctx); + void *t_ctx, + u32 *flags); int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, struct bpf_sock_ops_kern *sock_ops, @@ -231,30 +232,48 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \ ({ \ + u32 __unused_flags; \ int __ret = 0; \ if (cgroup_bpf_enabled(type)) \ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \ - NULL); \ + NULL, \ + &__unused_flags); \ __ret; \ }) #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \ ({ \ + u32 __unused_flags; \ int __ret = 0; \ if (cgroup_bpf_enabled(type)) { \ lock_sock(sk); \ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \ - t_ctx); \ + t_ctx, \ + &__unused_flags); \ release_sock(sk); \ } \ __ret; \ }) -#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL) - -#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL) +/* BPF_CGROUP_INET4_BIND and BPF_CGROUP_INET6_BIND can return extra flags + * via upper bits of return code. The only flag that is supported + * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check + * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE). + */ +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, bind_flags) \ +({ \ + u32 __flags = 0; \ + int __ret = 0; \ + if (cgroup_bpf_enabled(type)) { \ + lock_sock(sk); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \ + NULL, &__flags); \ + release_sock(sk); \ + if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \ + *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \ + } \ + __ret; \ +}) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) \ ((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) || \ @@ -453,8 +472,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1aac2af12fed..321966fc35db 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1073,6 +1073,34 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, struct bpf_prog *include_prog, struct bpf_prog_array **new_array); +/* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */ +#define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0) +/* BPF program asks to set CN on the packet. */ +#define BPF_RET_SET_CN (1 << 0) + +#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \ + ({ \ + struct bpf_prog_array_item *_item; \ + struct bpf_prog *_prog; \ + struct bpf_prog_array *_array; \ + u32 _ret = 1; \ + u32 func_ret; \ + migrate_disable(); \ + rcu_read_lock(); \ + _array = rcu_dereference(array); \ + _item = &_array->items[0]; \ + while ((_prog = READ_ONCE(_item->prog))) { \ + bpf_cgroup_storage_set(_item->cgroup_storage); \ + func_ret = func(_prog, ctx); \ + _ret &= (func_ret & 1); \ + *(ret_flags) |= (func_ret >> 1); \ + _item++; \ + } \ + rcu_read_unlock(); \ + migrate_enable(); \ + _ret; \ + }) + #define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \ ({ \ struct bpf_prog_array_item *_item; \ @@ -1120,25 +1148,11 @@ _out: \ */ #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \ ({ \ - struct bpf_prog_array_item *_item; \ - struct bpf_prog *_prog; \ - struct bpf_prog_array *_array; \ - u32 ret; \ - u32 _ret = 1; \ - u32 _cn = 0; \ - migrate_disable(); \ - rcu_read_lock(); \ - _array = rcu_dereference(array); \ - _item = &_array->items[0]; \ - while ((_prog = READ_ONCE(_item->prog))) { \ - bpf_cgroup_storage_set(_item->cgroup_storage); \ - ret = func(_prog, ctx); \ - _ret &= (ret & 1); \ - _cn |= (ret & 2); \ - _item++; \ - } \ - rcu_read_unlock(); \ - migrate_enable(); \ + u32 _flags = 0; \ + bool _cn; \ + u32 _ret; \ + _ret = BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, &_flags); \ + _cn = _flags & BPF_RET_SET_CN; \ if (_ret) \ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ else \ -- cgit v1.2.3 From 3d347b1b19da20f973d1d3c6bb60c11185606afd Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:07 -0800 Subject: net/mlx5: Add support for devlink traps in mlx5 core driver Add devlink traps infra-structure to mlx5 core driver. Add traps list to mlx5_priv and corresponding API: - mlx5_devlink_trap_report() to wrap trap reports to devlink - mlx5_devlink_trap_get_num_active() to decide whether to open/close trap resources. Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f93bfe7473aa..c4615dc51b6f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -564,6 +564,7 @@ struct mlx5_priv { int host_pf_pages; struct mlx5_core_health health; + struct list_head traps; /* start: qp staff */ struct dentry *qp_debugfs; -- cgit v1.2.3 From 241dc159391fb9d351362d911a39dff84074cc92 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:11 -0800 Subject: net/mlx5: Notify on trap action by blocking event In order to allow mlx5 core driver to trigger synchronous operations to its consumers, add a blocking events handler. Add wrappers to blocking_notifier_[call_chain/chain_register/chain_unregister]. Add trap callback for action set and notify about this change. Following patches in the set add a listener for this event. Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- include/linux/mlx5/device.h | 4 ++++ include/linux/mlx5/driver.h | 15 +++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f1de49d64a98..77ba54d38772 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -359,6 +359,10 @@ enum mlx5_event { MLX5_EVENT_TYPE_MAX = 0x100, }; +enum mlx5_driver_event { + MLX5_DRIVER_EVENT_TYPE_TRAP = 0, +}; + enum { MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE = 0x0, MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE = 0x1, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c4615dc51b6f..45df5b465ba8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1073,11 +1073,26 @@ enum { MAX_MR_CACHE_ENTRIES }; +/* Async-atomic event notifier used by mlx5 core to forward FW + * evetns recived from event queue to mlx5 consumers. + * Optimise event queue dipatching. + */ int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb); int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb); + +/* Async-atomic event notifier used for forwarding + * evetns from the event queue into the to mlx5 events dispatcher, + * eswitch, clock and others. + */ int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb); int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb); +/* Blocking event notifier used to forward SW events, used for slow path */ +int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb); +int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb); +int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event, + void *data); + int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); -- cgit v1.2.3 From 5543e989fe5e2fe6a5829ee42c00152cac2bb8a0 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:16 -0800 Subject: net/mlx5e: Add trap entity to ETH driver Introduce mlx5e_trap which includes a dedicated RQ and NAPI for trapped packets. Trap-RQ processes packets that were destined to be dropped, but for debug and visibility sake these packets are trapped and reported to devlink. Trap-RQ connects between the HW and the driver and is not a part of a channel. Open mlx5e_create_rq() and mlx5_core_destroy_rq() as API and add dedicate RQ handlers which report to devlink of trapped packets. Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- include/linux/mlx5/device.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 77ba54d38772..00057eae89ab 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -903,6 +903,11 @@ static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe) return (u64)lo | ((u64)hi << 32); } +static inline u16 get_cqe_flow_tag(struct mlx5_cqe64 *cqe) +{ + return be32_to_cpu(cqe->sop_drop_qpn) & 0xFFF; +} + #define MLX5_MPWQE_LOG_NUM_STRIDES_BASE (9) #define MLX5_MPWQE_LOG_STRIDE_SZ_BASE (6) -- cgit v1.2.3 From 28af22c6c8dff6a16163e5b6a56211d5b535c97b Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 26 Jan 2021 18:39:39 +0100 Subject: net: adjust net_device layout for cacheline usage The current layout of net_device is not optimal for cacheline usage. The member adj_list.lower linked list is split between cacheline 2 and 3. The ifindex is placed together with stats (struct net_device_stats), although most modern drivers don't update this stats member. The members netdev_ops, mtu and hard_header_len are placed on three different cachelines. These members are accessed for XDP redirect into devmap, which were noticeably with perf tool. When not using the map redirect variant (like TC-BPF does), then ifindex is also used, which is placed on a separate fourth cacheline. These members are also accessed during forwarding with regular network stack. The members priv_flags and flags are on fast-path for network stack transmit path in __dev_queue_xmit (currently located together with mtu cacheline). This patch creates a read mostly cacheline, with the purpose of keeping the above mentioned members on the same cacheline. Some netdev_features_t members also becomes part of this cacheline, which is on purpose, as function netif_skb_features() is on fast-path via validate_xmit_skb(). Signed-off-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/161168277983.410784.12401225493601624417.stgit@firesoul Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 53 ++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9e8572533d8e..e9e7ada07ea1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1858,7 +1858,6 @@ struct net_device { unsigned long mem_end; unsigned long mem_start; unsigned long base_addr; - int irq; /* * Some hardware also needs these fields (state,dev_list, @@ -1880,6 +1879,23 @@ struct net_device { struct list_head lower; } adj_list; + /* Read-mostly cache-line for fast-path access */ + unsigned int flags; + unsigned int priv_flags; + const struct net_device_ops *netdev_ops; + int ifindex; + unsigned short gflags; + unsigned short hard_header_len; + + /* Note : dev->mtu is often read without holding a lock. + * Writers usually hold RTNL. + * It is recommended to use READ_ONCE() to annotate the reads, + * and to use WRITE_ONCE() to annotate the writes. + */ + unsigned int mtu; + unsigned short needed_headroom; + unsigned short needed_tailroom; + netdev_features_t features; netdev_features_t hw_features; netdev_features_t wanted_features; @@ -1888,10 +1904,15 @@ struct net_device { netdev_features_t mpls_features; netdev_features_t gso_partial_features; - int ifindex; + unsigned int min_mtu; + unsigned int max_mtu; + unsigned short type; + unsigned char min_header_len; + unsigned char name_assign_type; + int group; - struct net_device_stats stats; + struct net_device_stats stats; /* not used by modern drivers */ atomic_long_t rx_dropped; atomic_long_t tx_dropped; @@ -1905,7 +1926,6 @@ struct net_device { const struct iw_handler_def *wireless_handlers; struct iw_public_data *wireless_data; #endif - const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; #ifdef CONFIG_NET_L3_MASTER_DEV const struct l3mdev_ops *l3mdev_ops; @@ -1924,34 +1944,12 @@ struct net_device { const struct header_ops *header_ops; - unsigned int flags; - unsigned int priv_flags; - - unsigned short gflags; - unsigned short padded; - unsigned char operstate; unsigned char link_mode; unsigned char if_port; unsigned char dma; - /* Note : dev->mtu is often read without holding a lock. - * Writers usually hold RTNL. - * It is recommended to use READ_ONCE() to annotate the reads, - * and to use WRITE_ONCE() to annotate the writes. - */ - unsigned int mtu; - unsigned int min_mtu; - unsigned int max_mtu; - unsigned short type; - unsigned short hard_header_len; - unsigned char min_header_len; - unsigned char name_assign_type; - - unsigned short needed_headroom; - unsigned short needed_tailroom; - /* Interface address info. */ unsigned char perm_addr[MAX_ADDR_LEN]; unsigned char addr_assign_type; @@ -1962,7 +1960,10 @@ struct net_device { unsigned short neigh_priv_len; unsigned short dev_id; unsigned short dev_port; + unsigned short padded; + spinlock_t addr_list_lock; + int irq; struct netdev_hw_addr_list uc; struct netdev_hw_addr_list mc; -- cgit v1.2.3 From 1d3f9bb1be8556bce237934ae82b3cdeda3242fd Mon Sep 17 00:00:00 2001 From: dingsenjie Date: Wed, 27 Jan 2021 10:28:01 +0800 Subject: linux/qed: fix spelling typo in qed_chain.h allocted -> allocated Signed-off-by: dingsenjie Link: https://lore.kernel.org/r/20210127022801.8028-1-dingsenjie@163.com Signed-off-by: Jakub Kicinski --- include/linux/qed/qed_chain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 4d58dc8943f0..e339b48de32d 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -470,7 +470,7 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain) /** * @brief qed_chain_reset - Resets the chain to its start state * - * @param p_chain pointer to a previously allocted chain + * @param p_chain pointer to a previously allocated chain */ static inline void qed_chain_reset(struct qed_chain *p_chain) { -- cgit v1.2.3 From 9c7caf28068421c9e0d1faea437e35e6b8983ac6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 29 Jan 2021 02:59:59 +0200 Subject: net: dsa: tag_8021q: add helpers to deduce whether a VLAN ID is RX or TX VLAN The sja1105 implementation can be blind about this, but the felix driver doesn't do exactly what it's being told, so it needs to know whether it is a TX or an RX VLAN, so it can install the appropriate type of TCAM rule. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- include/linux/dsa/8021q.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 88cd72dfa4e0..b12b05f1c8b4 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -64,6 +64,10 @@ int dsa_8021q_rx_source_port(u16 vid); u16 dsa_8021q_rx_subvlan(u16 vid); +bool vid_is_dsa_8021q_rxvlan(u16 vid); + +bool vid_is_dsa_8021q_txvlan(u16 vid); + bool vid_is_dsa_8021q(u16 vid); #else @@ -123,6 +127,16 @@ u16 dsa_8021q_rx_subvlan(u16 vid) return 0; } +bool vid_is_dsa_8021q_rxvlan(u16 vid) +{ + return false; +} + +bool vid_is_dsa_8021q_txvlan(u16 vid) +{ + return false; +} + bool vid_is_dsa_8021q(u16 vid) { return false; -- cgit v1.2.3 From 4f4e54366eae20d5867864001db57c5d90693d8c Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sun, 31 Jan 2021 00:46:37 +0100 Subject: net: usb: cdc_ncm: use new API for bh tasklet This converts the driver to use the new tasklet API introduced in commit 12cc923f1ccc ("tasklet: Introduce new initialization API") It is unfortunate that we need to add a pointer to the driver context to get back to the usbnet device, but the space will be reclaimed once there are no more users of the old API left and we can remove the data value and flag from the tasklet struct. Signed-off-by: Emil Renner Berthing Link: https://lore.kernel.org/r/20210130234637.26505-1-kernel@esmil.dk Signed-off-by: Jakub Kicinski --- include/linux/usb/cdc_ncm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 0ce4377545f8..f7cb3ddce7fb 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -98,6 +98,8 @@ struct cdc_ncm_ctx { struct hrtimer tx_timer; struct tasklet_struct bh; + struct usbnet *dev; + const struct usb_cdc_ncm_desc *func_desc; const struct usb_cdc_mbim_desc *mbim_desc; const struct usb_cdc_mbim_extended_desc *mbim_extended_desc; -- cgit v1.2.3 From 012ce4dd3102a0f4d80167de343e9d44b257c1b8 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 2 Feb 2021 20:06:06 +0200 Subject: ethtool: Extend link modes settings uAPI with lanes Currently, when auto negotiation is on, the user can advertise all the linkmodes which correspond to a specific speed, but does not have a similar selector for the number of lanes. This is significant when a specific speed can be achieved using different number of lanes. For example, 2x50 or 4x25. Add 'ETHTOOL_A_LINKMODES_LANES' attribute and expand 'struct ethtool_link_settings' with lanes field in order to implement a new lanes-selector that will enable the user to advertise a specific number of lanes as well. When auto negotiation is off, lanes parameter can be forced only if the driver supports it. Add a capability bit in 'struct ethtool_ops' that allows ethtool know if the driver can handle the lanes parameter when auto negotiation is off, so if it does not, an error message will be returned when trying to set lanes. Example: $ ethtool -s swp1 lanes 4 $ ethtool swp1 Settings for swp1: Supported ports: [ FIBRE ] Supported link modes: 1000baseKX/Full 10000baseKR/Full 40000baseCR4/Full 40000baseSR4/Full 40000baseLR4/Full 25000baseCR/Full 25000baseSR/Full 50000baseCR2/Full 100000baseSR4/Full 100000baseCR4/Full Supported pause frame use: Symmetric Receive-only Supports auto-negotiation: Yes Supported FEC modes: Not reported Advertised link modes: 40000baseCR4/Full 40000baseSR4/Full 40000baseLR4/Full 100000baseSR4/Full 100000baseCR4/Full Advertised pause frame use: No Advertised auto-negotiation: Yes Advertised FEC modes: Not reported Speed: Unknown! Duplex: Unknown! (255) Auto-negotiation: on Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Link detected: no Signed-off-by: Danielle Ratson Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e3da25b51ae4..1ab13c5dfb2f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -128,6 +128,7 @@ struct ethtool_link_ksettings { __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); } link_modes; + u32 lanes; }; /** @@ -265,6 +266,8 @@ struct ethtool_pause_stats { /** * struct ethtool_ops - optional netdev operations + * @cap_link_lanes_supported: indicates if the driver supports lanes + * parameter. * @supported_coalesce_params: supported types of interrupt coalescing. * @get_drvinfo: Report driver/device information. Should only set the * @driver, @version, @fw_version and @bus_info fields. If not @@ -420,6 +423,7 @@ struct ethtool_pause_stats { * of the generic netdev features interface. */ struct ethtool_ops { + u32 cap_link_lanes_supported:1; u32 supported_coalesce_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); int (*get_regs_len)(struct net_device *); -- cgit v1.2.3 From c8907043c6ac9ed58e6c1a76f2824be714b42228 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 2 Feb 2021 20:06:07 +0200 Subject: ethtool: Get link mode in use instead of speed and duplex parameters Currently, when user space queries the link's parameters, as speed and duplex, each parameter is passed from the driver to ethtool. Instead, get the link mode bit in use, and derive each of the parameters from it in ethtool. Signed-off-by: Danielle Ratson Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 1ab13c5dfb2f..ec4cd3921c67 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -129,6 +129,7 @@ struct ethtool_link_ksettings { __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); } link_modes; u32 lanes; + enum ethtool_link_mode_bit_indices link_mode; }; /** -- cgit v1.2.3 From 7e3ce05e7f650371061d0b9eec1e1cf74ed6fca0 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 3 Feb 2021 22:48:16 -0300 Subject: netlink: add tracepoint at NL_SET_ERR_MSG Often userspace won't request the extack information, or they don't log it because of log level or so, and even when they do, sometimes it's not enough to know exactly what caused the error. Netlink extack is the standard way of reporting erros with descriptive error messages. With a trace point on it, we then can know exactly where the error happened, regardless of userspace app. Also, we can even see if the err msg was overwritten. The wrapper do_trace_netlink_extack() is because trace points shouldn't be called from .h files, as trace points are not that small, and the function call to do_trace_netlink_extack() on the macros is not protected by tracepoint_enabled() because the macros are called from modules, and this would require exporting some trace structs. As this is error path, it's better to export just the wrapper instead. v2: removed leftover tracepoint declaration Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: David Ahern Link: https://lore.kernel.org/r/4546b63e67b2989789d146498b13cc09e1fdc543.1612403190.git.marcelo.leitner@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netlink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 9f118771e248..0bcf98098c5a 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -11,6 +11,8 @@ struct net; +void do_trace_netlink_extack(const char *msg); + static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) { return (struct nlmsghdr *)skb->data; @@ -90,6 +92,8 @@ struct netlink_ext_ack { static const char __msg[] = msg; \ struct netlink_ext_ack *__extack = (extack); \ \ + do_trace_netlink_extack(__msg); \ + \ if (__extack) \ __extack->_msg = __msg; \ } while (0) @@ -110,6 +114,8 @@ struct netlink_ext_ack { static const char __msg[] = msg; \ struct netlink_ext_ack *__extack = (extack); \ \ + do_trace_netlink_extack(__msg); \ + \ if (__extack) { \ __extack->_msg = __msg; \ __extack->bad_attr = (attr); \ -- cgit v1.2.3 From 0053859496baa17c7675526936677c9213bf5a0d Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Thu, 4 Feb 2021 18:18:38 +0000 Subject: net: add EXPORT_INDIRECT_CALLABLE wrapper When a static function is annotated with INDIRECT_CALLABLE_SCOPE and CONFIG_RETPOLINE is set, the static keyword is removed. Sometimes the function needs to be exported but EXPORT_SYMBOL can't be used because if CONFIG_RETPOLINE is not set, we will attempt to export a static symbol. This patch introduces a new indirect call wrapper: EXPORT_INDIRECT_CALLABLE. This basically does EXPORT_SYMBOL when CONFIG_RETPOLINE is set, but does nothing when it's not. Reported-by: Stephen Rothwell Signed-off-by: Brian Vazquez Link: https://lore.kernel.org/r/20210204181839.558951-1-brianvv@google.com Signed-off-by: Jakub Kicinski --- include/linux/indirect_call_wrapper.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index 54c02c84906a..a8345c8a613d 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -36,6 +36,7 @@ #define INDIRECT_CALLABLE_DECLARE(f) f #define INDIRECT_CALLABLE_SCOPE +#define EXPORT_INDIRECT_CALLABLE(f) EXPORT_SYMBOL(f) #else #define INDIRECT_CALL_1(f, f1, ...) f(__VA_ARGS__) @@ -44,6 +45,7 @@ #define INDIRECT_CALL_4(f, f4, f3, f2, f1, ...) f(__VA_ARGS__) #define INDIRECT_CALLABLE_DECLARE(f) #define INDIRECT_CALLABLE_SCOPE static +#define EXPORT_INDIRECT_CALLABLE(f) #endif /* -- cgit v1.2.3 From 1d7bab6a94458e959f3f55788fd50ddc7d97403b Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 2 Feb 2021 13:30:54 +0000 Subject: mm: constify page_is_pfmemalloc() argument The function only tests for page->index, so its argument should be const. Signed-off-by: Alexander Lobakin Reviewed-by: Jesse Brandeburg Acked-by: David Rientjes Signed-off-by: Jakub Kicinski --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ecdf8a8cd6ae..078633d43af9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1584,7 +1584,7 @@ struct address_space *page_mapping_file(struct page *page); * ALLOC_NO_WATERMARKS and the low watermark was not * met implying that the system is under some pressure. */ -static inline bool page_is_pfmemalloc(struct page *page) +static inline bool page_is_pfmemalloc(const struct page *page) { /* * Page index cannot be this large so this must be -- cgit v1.2.3 From 48f971c9c80a728646fc03367a28df747f20d0f4 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 2 Feb 2021 13:31:05 +0000 Subject: skbuff: constify skb_propagate_pfmemalloc() "page" argument The function doesn't write anything to the page struct itself, so this argument can be const. Misc: align second argument to the brace while at it. Signed-off-by: Alexander Lobakin Reviewed-by: Jesse Brandeburg Acked-by: David Rientjes Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9313b5aaf45b..b027526da4f9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2943,8 +2943,8 @@ static inline struct page *dev_alloc_page(void) * @page: The page that was allocated from skb_alloc_page * @skb: The skb that may need pfmemalloc set */ -static inline void skb_propagate_pfmemalloc(struct page *page, - struct sk_buff *skb) +static inline void skb_propagate_pfmemalloc(const struct page *page, + struct sk_buff *skb) { if (page_is_pfmemalloc(page)) skb->pfmemalloc = true; -- cgit v1.2.3 From bc38f30f8dbce0afb8af05d917bee084b1329418 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 2 Feb 2021 13:31:21 +0000 Subject: net: introduce common dev_page_is_reusable() A bunch of drivers test the page before reusing/recycling for two common conditions: - if a page was allocated under memory pressure (pfmemalloc page); - if a page was allocated at a distant memory node (to exclude slowdowns). Introduce a new common inline for doing this, with likely() already folded inside to make driver code a bit simpler. Suggested-by: David Rientjes Suggested-by: Jakub Kicinski Cc: John Hubbard Signed-off-by: Alexander Lobakin Reviewed-by: Jesse Brandeburg Acked-by: David Rientjes Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b027526da4f9..0e42c53b8ca9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2938,6 +2938,22 @@ static inline struct page *dev_alloc_page(void) return dev_alloc_pages(0); } +/** + * dev_page_is_reusable - check whether a page can be reused for network Rx + * @page: the page to test + * + * A page shouldn't be considered for reusing/recycling if it was allocated + * under memory pressure or at a distant memory node. + * + * Returns false if this page should be returned to page allocator, true + * otherwise. + */ +static inline bool dev_page_is_reusable(const struct page *page) +{ + return likely(page_to_nid(page) == numa_mem_id() && + !page_is_pfmemalloc(page)); +} + /** * skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page * @page: The page that was allocated from skb_alloc_page -- cgit v1.2.3 From 1faba27f11c8da244e793546a1b35a9b1da8208e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 3 Feb 2021 15:51:09 +0200 Subject: ipv6: silence compilation warning for non-IPV6 builds The W=1 compilation of allmodconfig generates the following warning: net/ipv6/icmp.c:448:6: warning: no previous prototype for 'icmp6_send' [-Wmissing-prototypes] 448 | void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, | ^~~~~~~~~~ Fix it by providing function declaration for builds with ipv6 as a module. Signed-off-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- include/linux/icmpv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 1b3371ae8193..452d8978ffc7 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -16,9 +16,9 @@ static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr); -#if IS_BUILTIN(CONFIG_IPV6) void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr); +#if IS_BUILTIN(CONFIG_IPV6) static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) { icmp6_send(skb, type, code, info, NULL); -- cgit v1.2.3 From 10742efc20a429b2040658af685d6bb2aa674a73 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 21 Jan 2021 19:41:52 +0200 Subject: net/mlx5e: VF tunnel TX traffic offloading When tunnel endpoint is on VF, driver still assumes that endpoint is on uplink and incorrectly configures encap rule offload according to that assumption. As a result, traffic is sent directly to the uplink and rules installed on representor of tunnel endpoint VF are ignored. Implement following changes to allow offloading tx traffic with tunnel endpoint on VF: - For tunneling flows perform route lookup on route and out devices pair. If out device is uplink and route device is VF of same physical port, then modify packet reg_c_0 metadata register (source port) with the value of VF vport. Use eswitch vhca_id->vport mapping introduced in one of previous patches in the series to obtain vport from route netdevice. - Recirculate encapsulated packets to VF vport in order to apply any flow rules installed on VF representor that match on encapsulated traffic. Only enable support for this functionality when all following conditions are true: - Hardware advertises capability to preserve reg_c_0 value on packet recirculation. - Vport metadata matching is enabled. - Termination tables are to be used by the flow. Example TC rules for VF tunnel traffic: 1. Rule that redirects packets from UL to VF rep that has the tunnel endpoint IP address: $ tc -s filter show dev enp8s0f0 ingress filter protocol ip pref 4 flower chain 0 filter protocol ip pref 4 flower chain 0 handle 0x1 dst_mac 16:c9:a0:2d:69:2c src_mac 0c:42:a1:58:ab:e4 eth_type ipv4 ip_flags nofrag in_hw in_hw_count 1 action order 1: mirred (Egress Redirect to device enp8s0f0_0) stolen index 3 ref 1 bind 1 installed 377 sec used 0 sec Action statistics: Sent 114096 bytes 952 pkt (dropped 0, overlimits 0 requeues 0) Sent software 0 bytes 0 pkt Sent hardware 114096 bytes 952 pkt backlog 0b 0p requeues 0 cookie 878fa48d8c423fc08c3b6ca599b50a97 no_percpu used_hw_stats delayed 2. Rule that decapsulates the tunneled flow and redirects to destination VF representor: $ tc -s filter show dev vxlan_sys_4789 ingress filter protocol ip pref 4 flower chain 0 filter protocol ip pref 4 flower chain 0 handle 0x1 dst_mac ca:2e:a7:3f:f5:0f src_mac 0a:40:bd:30:89:99 eth_type ipv4 enc_dst_ip 7.7.7.5 enc_src_ip 7.7.7.1 enc_key_id 98 enc_dst_port 4789 enc_tos 0 ip_flags nofrag in_hw in_hw_count 1 action order 1: tunnel_key unset pipe index 2 ref 1 bind 1 installed 434 sec used 434 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 used_hw_stats delayed action order 2: mirred (Egress Redirect to device enp8s0f0_1) stolen index 4 ref 1 bind 1 installed 434 sec used 0 sec Action statistics: Sent 129936 bytes 1082 pkt (dropped 0, overlimits 0 requeues 0) Sent software 0 bytes 0 pkt Sent hardware 129936 bytes 1082 pkt backlog 0b 0p requeues 0 cookie ac17cf398c4c69e4a5b2f7aabd1b88ff no_percpu used_hw_stats delayed Co-developed-by: Dmytro Linkin Signed-off-by: Dmytro Linkin Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 29fd832950e0..67e341274a22 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -96,6 +96,8 @@ static inline u32 mlx5_eswitch_get_vport_metadata_mask(void) u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num); +u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, + u16 vport_num); u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ -- cgit v1.2.3 From 48d216e5596a58e3cfa6d4548343f982c5921b79 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 31 Aug 2020 16:18:19 +0300 Subject: net/mlx5e: Refactor reg_c1 usage Following patch in series uses reg_c1 in eswitch code. To use reg_c1 helpers in both TC and eswitch code, refactor existing helpers according to similar use case of reg_c0 and move the functionality into eswitch.h. Calculate reg mappings length from new defines to ensure that they are always in sync and only need to be changed in single place. Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 67e341274a22..3b20e84049c1 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -98,6 +98,25 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num); u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, u16 vport_num); + +/* Reg C1 usage: + * Reg C1 = < ESW_TUN_ID(12) | ESW_TUN_OPTS(12) | ESW_ZONE_ID(8) > + * + * Highest 12 bits of reg c1 is the encapsulation tunnel id, next 12 bits is + * encapsulation tunnel options, and the lowest 8 bits are used for zone id. + * + * Zone id is used to restore CT flow when packet misses on chain. + * + * Tunnel id and options are used together to restore the tunnel info metadata + * on miss and to support inner header rewrite by means of implicit chain 0 + * flows. + */ +#define ESW_ZONE_ID_BITS 8 +#define ESW_TUN_OPTS_BITS 12 +#define ESW_TUN_ID_BITS 12 +#define ESW_TUN_OFFSET ESW_ZONE_ID_BITS +#define ESW_ZONE_ID_MASK GENMASK(ESW_ZONE_ID_BITS - 1, 0) + u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ -- cgit v1.2.3 From 8e404fefa58b6138531e3d4b5647ee79f75ae9a8 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 31 Aug 2020 16:18:57 +0300 Subject: net/mlx5e: Match recirculated packet miss in slow table using reg_c1 Previous patch in series that implements stack devices RX path implements indirect table rules that match on tunnel VNI. After such rule is created all tunnel traffic is recirculated to root table. However, recirculated packet might not match on any rules installed in the table (for example, when IP traffic follows ARP traffic). In that case packets appear on representor of tunnel endpoint VF instead being redirected to the VF itself. Extend slow table with additional flow group that matches on reg_c0 (source port value set by indirect tables implemented by previous patch in series) and reg_c1 (special 0xFFF mark). When creating offloads fdb tables, install one rule per VF vport to match on recirculated miss packets and redirect them to appropriate VF vport. Modify indirect tables code to also rewrite reg_c1 with special 0xFFF mark. Implementation reuses reg_c1 tunnel id bits. This is safe to do because recirculated packets are always matched before decapsulation. Signed-off-by: Vlad Buslov Signed-off-by: Dmytro Linkin Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 3b20e84049c1..994c2c8cb4fd 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -114,8 +114,16 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, #define ESW_ZONE_ID_BITS 8 #define ESW_TUN_OPTS_BITS 12 #define ESW_TUN_ID_BITS 12 -#define ESW_TUN_OFFSET ESW_ZONE_ID_BITS +#define ESW_TUN_OPTS_OFFSET ESW_ZONE_ID_BITS +#define ESW_TUN_OFFSET ESW_TUN_OPTS_OFFSET #define ESW_ZONE_ID_MASK GENMASK(ESW_ZONE_ID_BITS - 1, 0) +#define ESW_TUN_OPTS_MASK GENMASK(32 - ESW_TUN_ID_BITS - 1, ESW_TUN_OPTS_OFFSET) +#define ESW_TUN_MASK GENMASK(31, ESW_TUN_OFFSET) +#define ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT 0 /* 0 is not a valid tunnel id */ +#define ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT 0xFFF /* 0xFFF is a reserved mapping */ +#define ESW_TUN_SLOW_TABLE_GOTO_VPORT ((ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT << ESW_TUN_OPTS_BITS) | \ + ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT) +#define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ -- cgit v1.2.3 From b358e2122b9d7aa99f681d4edfafd999845d16ff Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 4 Feb 2021 18:56:35 +0800 Subject: mm: page_frag: Introduce page_frag_alloc_align() In the current implementation of page_frag_alloc(), it doesn't have any align guarantee for the returned buffer address. But for some hardwares they do require the DMA buffer to be aligned correctly, so we would have to use some workarounds like below if the buffers allocated by the page_frag_alloc() are used by these hardwares for DMA. buf = page_frag_alloc(really_needed_size + align); buf = PTR_ALIGN(buf, align); These codes seems ugly and would waste a lot of memories if the buffers are used in a network driver for the TX/RX. So introduce page_frag_alloc_align() to make sure that an aligned buffer address is returned. Signed-off-by: Kevin Hao Acked-by: Vlastimil Babka Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- include/linux/gfp.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 6e479e9c48ce..80544d5c08e7 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -583,8 +583,16 @@ extern void free_pages(unsigned long addr, unsigned int order); struct page_frag_cache; extern void __page_frag_cache_drain(struct page *page, unsigned int count); -extern void *page_frag_alloc(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask); +extern void *page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align_mask); + +static inline void *page_frag_alloc(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask) +{ + return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); +} + extern void page_frag_free(void *addr); #define __free_page(page) __free_pages((page), 0) -- cgit v1.2.3 From 3f6e687dff395da43b056c18150a423bc7bf5d14 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 4 Feb 2021 18:56:36 +0800 Subject: net: Introduce {netdev,napi}_alloc_frag_align() In the current implementation of {netdev,napi}_alloc_frag(), it doesn't have any align guarantee for the returned buffer address, But for some hardwares they do require the DMA buffer to be aligned correctly, so we would have to use some workarounds like below if the buffers allocated by the {netdev,napi}_alloc_frag() are used by these hardwares for DMA. buf = napi_alloc_frag(really_needed_size + align); buf = PTR_ALIGN(buf, align); These codes seems ugly and would waste a lot of memories if the buffers are used in a network driver for the TX/RX. We have added the align support for the page_frag functions, so add the corresponding {netdev,napi}_frag functions. Signed-off-by: Kevin Hao Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0e42c53b8ca9..0a4e91a2f873 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2818,7 +2818,26 @@ void skb_queue_purge(struct sk_buff_head *list); unsigned int skb_rbtree_purge(struct rb_root *root); -void *netdev_alloc_frag(unsigned int fragsz); +void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); + +/** + * netdev_alloc_frag - allocate a page fragment + * @fragsz: fragment size + * + * Allocates a frag from a page for receive buffer. + * Uses GFP_ATOMIC allocations. + */ +static inline void *netdev_alloc_frag(unsigned int fragsz) +{ + return __netdev_alloc_frag_align(fragsz, ~0u); +} + +static inline void *netdev_alloc_frag_align(unsigned int fragsz, + unsigned int align) +{ + WARN_ON_ONCE(!is_power_of_2(align)); + return __netdev_alloc_frag_align(fragsz, -align); +} struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, gfp_t gfp_mask); @@ -2877,7 +2896,20 @@ static inline void skb_free_frag(void *addr) page_frag_free(addr); } -void *napi_alloc_frag(unsigned int fragsz); +void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); + +static inline void *napi_alloc_frag(unsigned int fragsz) +{ + return __napi_alloc_frag_align(fragsz, ~0u); +} + +static inline void *napi_alloc_frag_align(unsigned int fragsz, + unsigned int align) +{ + WARN_ON_ONCE(!is_power_of_2(align)); + return __napi_alloc_frag_align(fragsz, -align); +} + struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int length, gfp_t gfp_mask); static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, -- cgit v1.2.3 From 4331667fa14e6643859d0498b34281185eb8018b Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 5 Feb 2021 14:56:39 +0800 Subject: ssb: Use true and false for bool variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following coccicheck warnings: ./include/linux/ssb/ssb_driver_gige.h:89:8-9: WARNING: return of 0/1 in function 'ssb_gige_one_dma_at_once' with return type bool. ./include/linux/ssb/ssb_driver_gige.h:79:8-9: WARNING: return of 0/1 in function 'ssb_gige_have_roboswitch' with return type bool. ./include/linux/ssb/ssb_driver_gige.h:182:8-9: WARNING: return of 0/1 in function 'ssb_gige_must_flush_posted_writes' with return type bool. ./include/linux/ssb/ssb_driver_gige.h:178:8-9: WARNING: return of 0/1 in function 'ssb_gige_one_dma_at_once' with return type bool. ./include/linux/ssb/ssb_driver_gige.h:174:8-9: WARNING: return of 0/1 in function 'ssb_gige_have_roboswitch' with return type bool. ./include/linux/ssb/ssb_driver_gige.h:170:8-9: WARNING: return of 0/1 in function 'ssb_gige_is_rgmii' with return type bool. ./include/linux/ssb/ssb_driver_gige.h:162:8-9: WARNING: return of 0/1 in function 'pdev_is_ssb_gige_core' with return type bool. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Acked-by: Michael BĂŒsch Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1612508199-92282-1-git-send-email-jiapeng.chong@linux.alibaba.com --- include/linux/ssb/ssb_driver_gige.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb_driver_gige.h b/include/linux/ssb/ssb_driver_gige.h index 31593b34608e..15ba0df1ee0d 100644 --- a/include/linux/ssb/ssb_driver_gige.h +++ b/include/linux/ssb/ssb_driver_gige.h @@ -76,7 +76,7 @@ static inline bool ssb_gige_have_roboswitch(struct pci_dev *pdev) if (dev) return !!(dev->dev->bus->sprom.boardflags_lo & SSB_GIGE_BFL_ROBOSWITCH); - return 0; + return false; } /* Returns whether we can only do one DMA at once. */ @@ -86,7 +86,7 @@ static inline bool ssb_gige_one_dma_at_once(struct pci_dev *pdev) if (dev) return ((dev->dev->bus->chip_id == 0x4785) && (dev->dev->bus->chip_rev < 2)); - return 0; + return false; } /* Returns whether we must flush posted writes. */ @@ -159,7 +159,7 @@ static inline void ssb_gige_exit(void) static inline bool pdev_is_ssb_gige_core(struct pci_dev *pdev) { - return 0; + return false; } static inline struct ssb_gige * pdev_to_ssb_gige(struct pci_dev *pdev) { @@ -167,19 +167,19 @@ static inline struct ssb_gige * pdev_to_ssb_gige(struct pci_dev *pdev) } static inline bool ssb_gige_is_rgmii(struct pci_dev *pdev) { - return 0; + return false; } static inline bool ssb_gige_have_roboswitch(struct pci_dev *pdev) { - return 0; + return false; } static inline bool ssb_gige_one_dma_at_once(struct pci_dev *pdev) { - return 0; + return false; } static inline bool ssb_gige_must_flush_posted_writes(struct pci_dev *pdev) { - return 0; + return false; } static inline int ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr) { -- cgit v1.2.3 From 29863d41bb6e1d969c62fdb15b0961806942960e Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 8 Feb 2021 11:34:09 -0800 Subject: net: implement threaded-able napi poll loop support This patch allows running each napi poll loop inside its own kernel thread. The kthread is created during netif_napi_add() if dev->threaded is set. And threaded mode is enabled in napi_enable(). We will provide a way to set dev->threaded and enable threaded mode without a device up/down in the following patch. Once that threaded mode is enabled and the kthread is started, napi_schedule() will wake-up such thread instead of scheduling the softirq. The threaded poll loop behaves quite likely the net_rx_action, but it does not have to manipulate local irqs and uses an explicit scheduling point based on netdev_budget. Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Co-developed-by: Hannes Frederic Sowa Signed-off-by: Hannes Frederic Sowa Co-developed-by: Jakub Kicinski Signed-off-by: Jakub Kicinski Signed-off-by: Wei Wang Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e9e7ada07ea1..99fb4ec9573e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -347,6 +347,7 @@ struct napi_struct { struct list_head dev_list; struct hlist_node napi_hash_node; unsigned int napi_id; + struct task_struct *thread; }; enum { @@ -358,6 +359,7 @@ enum { NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */ NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */ NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ + NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ }; enum { @@ -369,6 +371,7 @@ enum { NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL), + NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), }; enum gro_result { @@ -503,20 +506,7 @@ static inline bool napi_complete(struct napi_struct *n) */ void napi_disable(struct napi_struct *n); -/** - * napi_enable - enable NAPI scheduling - * @n: NAPI context - * - * Resume NAPI from being scheduled on this context. - * Must be paired with napi_disable. - */ -static inline void napi_enable(struct napi_struct *n) -{ - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - smp_mb__before_atomic(); - clear_bit(NAPI_STATE_SCHED, &n->state); - clear_bit(NAPI_STATE_NPSVC, &n->state); -} +void napi_enable(struct napi_struct *n); /** * napi_synchronize - wait until NAPI is not running @@ -1827,6 +1817,8 @@ enum netdev_priv_flags { * * @wol_enabled: Wake-on-LAN is enabled * + * @threaded: napi threaded mode is enabled + * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved * to another network namespace. @@ -2145,6 +2137,7 @@ struct net_device { struct lock_class_key *qdisc_running_key; bool proto_down; unsigned wol_enabled:1; + unsigned threaded:1; struct list_head net_notifier_list; -- cgit v1.2.3 From 5fdd2f0e5c64846bf3066689b73fc3b8dddd1c74 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 8 Feb 2021 11:34:10 -0800 Subject: net: add sysfs attribute to control napi threaded mode This patch adds a new sysfs attribute to the network device class. Said attribute provides a per-device control to enable/disable the threaded mode for all the napi instances of the given network device, without the need for a device up/down. User sets it to 1 or 0 to enable or disable threaded mode. Note: when switching between threaded and the current softirq based mode for a napi instance, it will not immediately take effect if the napi is currently being polled. The mode switch will happen for the next time napi_schedule() is called. Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Co-developed-by: Hannes Frederic Sowa Signed-off-by: Hannes Frederic Sowa Co-developed-by: Felix Fietkau Signed-off-by: Felix Fietkau Signed-off-by: Wei Wang Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 99fb4ec9573e..1340327f7abf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -497,6 +497,8 @@ static inline bool napi_complete(struct napi_struct *n) return napi_complete_done(n, 0); } +int dev_set_threaded(struct net_device *dev, bool threaded); + /** * napi_disable - prevent NAPI from scheduling * @n: NAPI context -- cgit v1.2.3 From 01f810ace9ed37255f27608a0864abebccf0aab3 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Sat, 6 Feb 2021 20:10:24 -0500 Subject: bpf: Allow variable-offset stack access Before this patch, variable offset access to the stack was dissalowed for regular instructions, but was allowed for "indirect" accesses (i.e. helpers). This patch removes the restriction, allowing reading and writing to the stack through stack pointers with variable offsets. This makes stack-allocated buffers more usable in programs, and brings stack pointers closer to other types of pointers. The motivation is being able to use stack-allocated buffers for data manipulation. When the stack size limit is sufficient, allocating buffers on the stack is simpler than per-cpu arrays, or other alternatives. In unpriviledged programs, variable-offset reads and writes are disallowed (they were already disallowed for the indirect access case) because the speculative execution checking code doesn't support them. Additionally, when writing through a variable-offset stack pointer, if any pointers are in the accessible range, there's possilibities of later leaking pointers because the write cannot be tracked precisely. Writes with variable offset mark the whole range as initialized, even though we don't know which stack slots are actually written. This is in order to not reject future reads to these slots. Note that this doesn't affect writes done through helpers; like before, helpers need the whole stack range to be initialized to begin with. All the stack slots are in range are considered scalars after the write; variable-offset register spills are not tracked. For reads, all the stack slots in the variable range needs to be initialized (but see above about what writes do), otherwise the read is rejected. All register spilled in stack slots that might be read are marked as having been read, however reads through such pointers don't do register filling; the target register will always be either a scalar or a constant zero. Signed-off-by: Andrei Matei Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210207011027.676572-2-andreimatei1@gmail.com --- include/linux/bpf.h | 5 +++++ include/linux/bpf_verifier.h | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 321966fc35db..079162bbd387 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1290,6 +1290,11 @@ static inline bool bpf_allow_ptr_leaks(void) return perfmon_capable(); } +static inline bool bpf_allow_uninit_stack(void) +{ + return perfmon_capable(); +} + static inline bool bpf_allow_ptr_to_map_access(void) { return perfmon_capable(); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index dfe6f85d97dd..532c97836d0d 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -195,7 +195,7 @@ struct bpf_func_state { * 0 = main function, 1 = first callee. */ u32 frameno; - /* subprog number == index within subprog_stack_depth + /* subprog number == index within subprog_info * zero == main subprog */ u32 subprogno; @@ -404,6 +404,7 @@ struct bpf_verifier_env { u32 used_btf_cnt; /* number of used BTF objects */ u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; + bool allow_uninit_stack; bool allow_ptr_to_map_access; bool bpf_capable; bool bypass_spec_v1; -- cgit v1.2.3 From 5b74df80f301e872143fa716f3f4361b2e293e19 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 4 Jan 2021 09:38:57 +0200 Subject: net/mlx5: Delete device list leftover Device list is not stored in mlx5_priv anymore, so delete it as it's not used. Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 88197b87bd81..936302b2c141 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -588,7 +588,6 @@ struct mlx5_priv { /* end: alloc staff */ struct dentry *dbg_root; - struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; struct mlx5_adev **adev; -- cgit v1.2.3 From 700d4796ef59f5faf240d307839bd419e2b6bdff Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 9 Feb 2021 19:36:26 -0800 Subject: bpf: Optimize program stats Move bpf_prog_stats from prog->aux into prog to avoid one extra load in critical path of program execution. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210210033634.62081-2-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 8 -------- include/linux/filter.h | 14 +++++++++++--- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 079162bbd387..5a388955e6b6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -507,12 +506,6 @@ enum bpf_cgroup_storage_type { */ #define MAX_BPF_FUNC_ARGS 12 -struct bpf_prog_stats { - u64 cnt; - u64 nsecs; - struct u64_stats_sync syncp; -} __aligned(2 * sizeof(u64)); - struct btf_func_model { u8 ret_size; u8 nr_args; @@ -845,7 +838,6 @@ struct bpf_prog_aux { u32 linfo_idx; u32 num_exentries; struct exception_table_entry *extable; - struct bpf_prog_stats __percpu *stats; union { struct work_struct work; struct rcu_head rcu; diff --git a/include/linux/filter.h b/include/linux/filter.h index 5b3137d7b690..cecb03c9d251 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -539,6 +540,12 @@ struct bpf_binary_header { u8 image[] __aligned(BPF_IMAGE_ALIGNMENT); }; +struct bpf_prog_stats { + u64 cnt; + u64 nsecs; + struct u64_stats_sync syncp; +} __aligned(2 * sizeof(u64)); + struct bpf_prog { u16 pages; /* Number of allocated pages */ u16 jited:1, /* Is our filter JIT'ed? */ @@ -557,10 +564,11 @@ struct bpf_prog { u32 len; /* Number of filter blocks */ u32 jited_len; /* Size of jited insns in bytes */ u8 tag[BPF_TAG_SIZE]; - struct bpf_prog_aux *aux; /* Auxiliary fields */ - struct sock_fprog_kern *orig_prog; /* Original BPF program */ + struct bpf_prog_stats __percpu *stats; unsigned int (*bpf_func)(const void *ctx, const struct bpf_insn *insn); + struct bpf_prog_aux *aux; /* Auxiliary fields */ + struct sock_fprog_kern *orig_prog; /* Original BPF program */ /* Instructions for interpreter */ struct sock_filter insns[0]; struct bpf_insn insnsi[]; @@ -581,7 +589,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); struct bpf_prog_stats *__stats; \ u64 __start = sched_clock(); \ __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ - __stats = this_cpu_ptr(prog->aux->stats); \ + __stats = this_cpu_ptr(prog->stats); \ u64_stats_update_begin(&__stats->syncp); \ __stats->cnt++; \ __stats->nsecs += sched_clock() - __start; \ -- cgit v1.2.3 From f2dd3b39467411c53703125a111f45b3672c1771 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 9 Feb 2021 19:36:28 -0800 Subject: bpf: Compute program stats for sleepable programs Since sleepable programs don't migrate from the cpu the excution stats can be computed for them as well. Reuse the same infrastructure for both sleepable and non-sleepable programs. run_cnt -> the number of times the program was executed. run_time_ns -> the program execution time in nanoseconds including the off-cpu time when the program was sleeping. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210210033634.62081-4-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5a388955e6b6..e1840ceaf55f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -563,8 +563,8 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, /* these two functions are called from generated trampoline */ u64 notrace __bpf_prog_enter(void); void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); -void notrace __bpf_prog_enter_sleepable(void); -void notrace __bpf_prog_exit_sleepable(void); +u64 notrace __bpf_prog_enter_sleepable(void); +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start); struct bpf_ksym { unsigned long start; -- cgit v1.2.3 From ca06f55b90020cd97f4cc6d52db95436162e7dcf Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 9 Feb 2021 19:36:29 -0800 Subject: bpf: Add per-program recursion prevention mechanism Since both sleepable and non-sleepable programs execute under migrate_disable add recursion prevention mechanism to both types of programs when they're executed via bpf trampoline. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210210033634.62081-5-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 6 +++--- include/linux/filter.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e1840ceaf55f..1c8ea682c007 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -529,7 +529,7 @@ struct btf_func_model { /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 */ -#define BPF_MAX_TRAMP_PROGS 40 +#define BPF_MAX_TRAMP_PROGS 38 struct bpf_tramp_progs { struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS]; @@ -561,9 +561,9 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, struct bpf_tramp_progs *tprogs, void *orig_call); /* these two functions are called from generated trampoline */ -u64 notrace __bpf_prog_enter(void); +u64 notrace __bpf_prog_enter(struct bpf_prog *prog); void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); -u64 notrace __bpf_prog_enter_sleepable(void); +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog); void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start); struct bpf_ksym { diff --git a/include/linux/filter.h b/include/linux/filter.h index cecb03c9d251..6a06f3c69f4e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -565,6 +565,7 @@ struct bpf_prog { u32 jited_len; /* Size of jited insns in bytes */ u8 tag[BPF_TAG_SIZE]; struct bpf_prog_stats __percpu *stats; + int __percpu *active; unsigned int (*bpf_func)(const void *ctx, const struct bpf_insn *insn); struct bpf_prog_aux *aux; /* Auxiliary fields */ -- cgit v1.2.3 From 9ed9e9ba2337205311398a312796c213737bac35 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 9 Feb 2021 19:36:31 -0800 Subject: bpf: Count the number of times recursion was prevented Add per-program counter for number of times recursion prevention mechanism was triggered and expose it via show_fdinfo and bpf_prog_info. Teach bpftool to print it. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210210033634.62081-7-alexei.starovoitov@gmail.com --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 6a06f3c69f4e..3b00fc906ccd 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -543,6 +543,7 @@ struct bpf_binary_header { struct bpf_prog_stats { u64 cnt; u64 nsecs; + u64 misses; struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); -- cgit v1.2.3 From 4217a64e18a1647a0dbc68cb3169a5a06f054ec8 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 9 Feb 2021 17:38:52 +0100 Subject: net: phy: introduce phydev->port At the moment, PORT_MII is reported in the ethtool ops. This is odd because it is an interface between the MAC and the PHY and no external port. Some network card drivers will overwrite the port to twisted pair or fiber, though. Even worse, the MDI/MDIX setting is only used by ethtool if the port is twisted pair. Set the port to PORT_TP by default because most PHY drivers are copper ones. If there is fibre support and it is enabled, the PHY driver will set it to PORT_FIBRE. This will change reporting PORT_MII to either PORT_TP or PORT_FIBRE; except for the genphy fallback driver. Suggested-by: Andrew Lunn Signed-off-by: Michael Walle Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index bc323fbdd21e..c130788306c8 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -503,6 +503,7 @@ struct macsec_ops; * * @speed: Current link speed * @duplex: Current duplex + * @port: Current port * @pause: Current pause * @asym_pause: Current asymmetric pause * @supported: Combined MAC/PHY supported linkmodes @@ -581,6 +582,7 @@ struct phy_device { */ int speed; int duplex; + int port; int pause; int asym_pause; u8 master_slave_get; -- cgit v1.2.3 From dcf0cd1cc58b8e88793ad6531db9b3a47324ca09 Mon Sep 17 00:00:00 2001 From: George McCollister Date: Tue, 9 Feb 2021 19:02:11 -0600 Subject: net: hsr: add offloading support Add support for offloading of HSR/PRP (IEC 62439-3) tag insertion tag removal, duplicate generation and forwarding. For HSR, insertion involves the switch adding a 6 byte HSR header after the 14 byte Ethernet header. For PRP it adds a 6 byte trailer. Tag removal involves automatically stripping the HSR/PRP header/trailer in the switch. This is possible when the switch also performs auto deduplication using the HSR/PRP header/trailer (making it no longer required). Forwarding involves automatically forwarding between redundant ports in an HSR. This is crucial because delay is accumulated as a frame passes through each node in the ring. Duplication involves the switch automatically sending a single frame from the CPU port to both redundant ports. This is required because the inserted HSR/PRP header/trailer must contain the same sequence number on the frames sent out both redundant ports. Export is_hsr_master so DSA can tell them apart from other devices in dsa_slave_changeupper. Signed-off-by: George McCollister Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/if_hsr.h | 27 +++++++++++++++++++++++++++ include/linux/netdev_features.h | 9 +++++++++ 2 files changed, 36 insertions(+) create mode 100644 include/linux/if_hsr.h (limited to 'include/linux') diff --git a/include/linux/if_hsr.h b/include/linux/if_hsr.h new file mode 100644 index 000000000000..38bbc537d4e4 --- /dev/null +++ b/include/linux/if_hsr.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_IF_HSR_H_ +#define _LINUX_IF_HSR_H_ + +/* used to differentiate various protocols */ +enum hsr_version { + HSR_V0 = 0, + HSR_V1, + PRP_V1, +}; + +#if IS_ENABLED(CONFIG_HSR) +extern bool is_hsr_master(struct net_device *dev); +extern int hsr_get_version(struct net_device *dev, enum hsr_version *ver); +#else +static inline bool is_hsr_master(struct net_device *dev) +{ + return false; +} +static inline int hsr_get_version(struct net_device *dev, + enum hsr_version *ver) +{ + return -EINVAL; +} +#endif /* CONFIG_HSR */ + +#endif /*_LINUX_IF_HSR_H_*/ diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index c06d6aaba9df..3de38d6a0aea 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -86,6 +86,11 @@ enum { NETIF_F_HW_MACSEC_BIT, /* Offload MACsec operations */ NETIF_F_GRO_UDP_FWD_BIT, /* Allow UDP GRO for forwarding */ + NETIF_F_HW_HSR_TAG_INS_BIT, /* Offload HSR tag insertion */ + NETIF_F_HW_HSR_TAG_RM_BIT, /* Offload HSR tag removal */ + NETIF_F_HW_HSR_FWD_BIT, /* Offload HSR forwarding */ + NETIF_F_HW_HSR_DUP_BIT, /* Offload HSR duplication */ + /* * Add your fresh new feature above and remember to update * netdev_features_strings[] in net/core/ethtool.c and maybe @@ -159,6 +164,10 @@ enum { #define NETIF_F_GSO_FRAGLIST __NETIF_F(GSO_FRAGLIST) #define NETIF_F_HW_MACSEC __NETIF_F(HW_MACSEC) #define NETIF_F_GRO_UDP_FWD __NETIF_F(GRO_UDP_FWD) +#define NETIF_F_HW_HSR_TAG_INS __NETIF_F(HW_HSR_TAG_INS) +#define NETIF_F_HW_HSR_TAG_RM __NETIF_F(HW_HSR_TAG_RM) +#define NETIF_F_HW_HSR_FWD __NETIF_F(HW_HSR_FWD) +#define NETIF_F_HW_HSR_DUP __NETIF_F(HW_HSR_DUP) /* Finds the next feature with the highest number of the range of start till 0. */ -- cgit v1.2.3 From 4c236d5dc8b86222dc155cd68e7934624264150f Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Thu, 11 Feb 2021 21:28:27 +0530 Subject: octeontx2-pf: cn10k: Use LMTST lines for NPA/NIX operations This patch adds support to use new LMTST lines for NPA batch free and burst SQE flush. Adds new dev_hw_ops structure to hold platform specific functions and create new files cn10k.c and cn10k.h. Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- include/linux/soc/marvell/octeontx2/asm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/marvell/octeontx2/asm.h b/include/linux/soc/marvell/octeontx2/asm.h index ae2279fe830a..28c04d918f0f 100644 --- a/include/linux/soc/marvell/octeontx2/asm.h +++ b/include/linux/soc/marvell/octeontx2/asm.h @@ -22,8 +22,16 @@ : [rs]"r" (ioaddr)); \ (result); \ }) +#define cn10k_lmt_flush(val, addr) \ +({ \ + __asm__ volatile(".cpu generic+lse\n" \ + "steor %x[rf],[%[rs]]" \ + : [rf]"+r"(val) \ + : [rs]"r"(addr)); \ +}) #else #define otx2_lmt_flush(ioaddr) ({ 0; }) +#define cn10k_lmt_flush(val, addr) ({ addr = val; }) #endif #endif /* __SOC_OTX2_ASM_H */ -- cgit v1.2.3 From c5dbb89fc2ac013afe67b9e4fcb3743c02b567cd Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Wed, 10 Feb 2021 12:14:03 +0100 Subject: bpf: Expose bpf_get_socket_cookie to tracing programs This needs a new helper that: - can work in a sleepable context (using sock_gen_cookie) - takes a struct sock pointer and checks that it's not NULL Signed-off-by: Florent Revest Signed-off-by: Alexei Starovoitov Acked-by: KP Singh Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210210111406.785541-2-revest@chromium.org --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1c8ea682c007..cccaef1088ea 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1885,6 +1885,7 @@ extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; extern const struct bpf_func_proto bpf_sock_from_file_proto; +extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); -- cgit v1.2.3 From 3b23a32a63219f51a5298bc55a65ecee866e79d0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 11 Feb 2021 11:34:10 -0800 Subject: net: fix dev_ifsioc_locked() race condition dev_ifsioc_locked() is called with only RCU read lock, so when there is a parallel writer changing the mac address, it could get a partially updated mac address, as shown below: Thread 1 Thread 2 // eth_commit_mac_addr_change() memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); // dev_ifsioc_locked() memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,...); Close this race condition by guarding them with a RW semaphore, like netdev_get_name(). We can not use seqlock here as it does not allow blocking. The writers already take RTNL anyway, so this does not affect the slow path. To avoid bothering existing dev_set_mac_address() callers in drivers, introduce a new wrapper just for user-facing callers on ioctl and rtnetlink paths. Note, bonding also changes slave mac addresses but that requires a separate patch due to the complexity of bonding code. Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()") Reported-by: "Gong, Sishuai" Cc: Eric Dumazet Cc: Jakub Kicinski Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a20310ff5083..bfadf3b82f9c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3902,6 +3902,9 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); +int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack); +int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_change_carrier(struct net_device *, bool new_carrier); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); -- cgit v1.2.3 From 5f7d57280c1982d993d5f4ff0edac310f820f607 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 9 Feb 2021 14:38:29 +0100 Subject: bpf: Drop MTU check when doing TC-BPF redirect to ingress The use-case for dropping the MTU check when TC-BPF does redirect to ingress, is described by Eyal Birger in email[0]. The summary is the ability to increase packet size (e.g. with IPv6 headers for NAT64) and ingress redirect packet and let normal netstack fragment packet as needed. [0] https://lore.kernel.org/netdev/CAHsH6Gug-hsLGHQ6N0wtixdOa85LDZ3HNRHVd0opR=19Qo4W4Q@mail.gmail.com/ V15: - missing static for function declaration V9: - Make net_device "up" (IFF_UP) check explicit in skb_do_redirect V4: - Keep net_device "up" (IFF_UP) check. - Adjustment to handle bpf_redirect_peer() helper Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/161287790971.790810.11785274340154740591.stgit@firesoul --- include/linux/netdevice.h | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ef517254367d..b9bcbfde7849 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3931,14 +3931,42 @@ int xdp_umem_query(struct net_device *dev, u16 queue_id); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); +int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb); +static __always_inline bool __is_skb_forwardable(const struct net_device *dev, + const struct sk_buff *skb, + const bool check_mtu) +{ + const u32 vlan_hdr_len = 4; /* VLAN_HLEN */ + unsigned int len; + + if (!(dev->flags & IFF_UP)) + return false; + + if (!check_mtu) + return true; + + len = dev->mtu + dev->hard_header_len + vlan_hdr_len; + if (skb->len <= len) + return true; + + /* if TSO is enabled, we don't care about the length as the packet + * could be forwarded without being segmented before + */ + if (skb_is_gso(skb)) + return true; + + return false; +} + static __always_inline int ____dev_forward_skb(struct net_device *dev, - struct sk_buff *skb) + struct sk_buff *skb, + const bool check_mtu) { if (skb_orphan_frags(skb, GFP_ATOMIC) || - unlikely(!is_skb_forwardable(dev, skb))) { + unlikely(!__is_skb_forwardable(dev, skb, check_mtu))) { atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); return NET_RX_DROP; -- cgit v1.2.3 From e5069b9c23b3857db986c58801bebe450cff3392 Mon Sep 17 00:00:00 2001 From: Dmitrii Banshchikov Date: Sat, 13 Feb 2021 00:56:41 +0400 Subject: bpf: Support pointers in global func args Add an ability to pass a pointer to a type with known size in arguments of a global function. Such pointers may be used to overcome the limit on the maximum number of arguments, avoid expensive and tricky workarounds and to have multiple output arguments. A referenced type may contain pointers but indirect access through them isn't supported. The implementation consists of two parts. If a global function has an argument that is a pointer to a type with known size then: 1) In btf_check_func_arg_match(): check that the corresponding register points to NULL or to a valid memory region that is large enough to contain the expected argument's type. 2) In btf_prepare_func_args(): set the corresponding register type to PTR_TO_MEM_OR_NULL and its size to the size of the expected type. Only global functions are supported because allowance of pointers for static functions might break validation. Consider the following scenario. A static function has a pointer argument. A caller passes pointer to its stack memory. Because the callee can change referenced memory verifier cannot longer assume any particular slot type of the caller's stack memory hence the slot type is changed to SLOT_MISC. If there is an operation that relies on slot type other than SLOT_MISC then verifier won't be able to infer safety of the operation. When verifier sees a static function that has a pointer argument different from PTR_TO_CTX then it skips arguments check and continues with "inline" validation with more information available. The operation that relies on the particular slot type now succeeds. Because global functions were not allowed to have pointer arguments different from PTR_TO_CTX it's not possible to break existing and valid code. Signed-off-by: Dmitrii Banshchikov Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210212205642.620788-4-me@ubique.spb.ru --- include/linux/bpf_verifier.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 532c97836d0d..971b33aca13d 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -471,6 +471,8 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); int check_ctx_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno); +int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + u32 regno, u32 mem_size); /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, -- cgit v1.2.3 From fec6e49b63989657bc4076dad99fa51d5ece34da Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sat, 13 Feb 2021 14:12:02 +0000 Subject: skbuff: remove __kfree_skb_flush() This function isn't much needed as NAPI skb queue gets bulk-freed anyway when there's no more room, and even may reduce the efficiency of bulk operations. It will be even less needed after reusing skb cache on allocation path, so remove it and this way lighten network softirqs a bit. Suggested-by: Eric Dumazet Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0a4e91a2f873..0e0707296098 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2919,7 +2919,6 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, } void napi_consume_skb(struct sk_buff *skb, int budget); -void __kfree_skb_flush(void); void __kfree_skb_defer(struct sk_buff *skb); /** -- cgit v1.2.3 From f450d539c05a14c103dd174718f81bb2fe65cb4b Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sat, 13 Feb 2021 14:12:25 +0000 Subject: skbuff: introduce {,__}napi_build_skb() which reuses NAPI cache heads Instead of just bulk-flushing skbuff_heads queued up through napi_consume_skb() or __kfree_skb_defer(), try to reuse them on allocation path. If the cache is empty on allocation, bulk-allocate the first 16 elements, which is more efficient than per-skb allocation. If the cache is full on freeing, bulk-wipe the second half of the cache (32 elements). This also includes custom KASAN poisoning/unpoisoning to be double sure there are no use-after-free cases. To not change current behaviour, introduce a new function, napi_build_skb(), to optionally use a new approach later in drivers. Note on selected bulk size, 16: - this equals to XDP_BULK_QUEUE_SIZE, DEV_MAP_BULK_SIZE and especially VETH_XDP_BATCH, which is also used to bulk-allocate skbuff_heads and was tested on powerful setups; - this also showed the best performance in the actual test series (from the array of {8, 16, 32}). Suggested-by: Edward Cree # Divide on two halves Suggested-by: Eric Dumazet # KASAN poisoning Cc: Dmitry Vyukov # Help with KASAN Cc: Paolo Abeni # Reduced batch size Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0e0707296098..906122eac82a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1087,6 +1087,8 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size); +struct sk_buff *napi_build_skb(void *data, unsigned int frag_size); + /** * alloc_skb - allocate a network buffer * @size: size to allocate -- cgit v1.2.3 From 9243adfc311a20371c3f4d8eaf0af4b135e6fac3 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sat, 13 Feb 2021 14:13:09 +0000 Subject: skbuff: queue NAPI_MERGED_FREE skbs into NAPI cache instead of freeing napi_frags_finish() and napi_skb_finish() can only be called inside NAPI Rx context, so we can feed NAPI cache with skbuff_heads that got NAPI_MERGED_FREE verdict instead of immediate freeing. Replace __kfree_skb() with __kfree_skb_defer() in napi_skb_finish() and move napi_skb_free_stolen_head() to skbuff.c, so it can drop skbs to NAPI cache. As many drivers call napi_alloc_skb()/napi_get_frags() on their receive path, this becomes especially useful. Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 906122eac82a..6d0a33d1c0db 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2921,6 +2921,7 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, } void napi_consume_skb(struct sk_buff *skb, int budget); +void napi_skb_free_stolen_head(struct sk_buff *skb); void __kfree_skb_defer(struct sk_buff *skb); /** -- cgit v1.2.3 From 40d3f295b5feda409784e569550057b5fbc2a295 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 14 Feb 2021 00:37:56 +0200 Subject: net: mscc: ocelot: use common tag parsing code with DSA The Injection Frame Header and Extraction Frame Header that the switch prepends to frames over the NPI port is also prepended to frames delivered over the CPU port module's queues. Let's unify the handling of the frame headers by making the ocelot driver call some helpers exported by the DSA tagger. Among other things, this allows us to get rid of the strange cpu_to_be32 when transmitting the Injection Frame Header on ocelot, since the packing API uses network byte order natively (when "quirks" is 0). The comments above ocelot_gen_ifh talk about setting pop_cnt to 3, and the cpu extraction queue mask to something, but the code doesn't do it, so we don't do it either. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/ocelot.h | 208 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 include/linux/dsa/ocelot.h (limited to 'include/linux') diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h new file mode 100644 index 000000000000..763dbbfaff7a --- /dev/null +++ b/include/linux/dsa/ocelot.h @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright 2019-2021 NXP Semiconductors + */ + +#ifndef _NET_DSA_TAG_OCELOT_H +#define _NET_DSA_TAG_OCELOT_H + +#include + +#define OCELOT_TAG_LEN 16 +#define OCELOT_SHORT_PREFIX_LEN 4 +#define OCELOT_LONG_PREFIX_LEN 16 +#define OCELOT_TOTAL_TAG_LEN (OCELOT_SHORT_PREFIX_LEN + OCELOT_TAG_LEN) + +/* The CPU injection header and the CPU extraction header can have 3 types of + * prefixes: long, short and no prefix. The format of the header itself is the + * same in all 3 cases. + * + * Extraction with long prefix: + * + * +-------------------+-------------------+------+------+------------+-------+ + * | ff:ff:ff:ff:ff:ff | fe:ff:ff:ff:ff:ff | 8880 | 000a | extraction | frame | + * | | | | | header | | + * +-------------------+-------------------+------+------+------------+-------+ + * 48 bits 48 bits 16 bits 16 bits 128 bits + * + * Extraction with short prefix: + * + * +------+------+------------+-------+ + * | 8880 | 000a | extraction | frame | + * | | | header | | + * +------+------+------------+-------+ + * 16 bits 16 bits 128 bits + * + * Extraction with no prefix: + * + * +------------+-------+ + * | extraction | frame | + * | header | | + * +------------+-------+ + * 128 bits + * + * + * Injection with long prefix: + * + * +-------------------+-------------------+------+------+------------+-------+ + * | any dmac | any smac | 8880 | 000a | injection | frame | + * | | | | | header | | + * +-------------------+-------------------+------+------+------------+-------+ + * 48 bits 48 bits 16 bits 16 bits 128 bits + * + * Injection with short prefix: + * + * +------+------+------------+-------+ + * | 8880 | 000a | injection | frame | + * | | | header | | + * +------+------+------------+-------+ + * 16 bits 16 bits 128 bits + * + * Injection with no prefix: + * + * +------------+-------+ + * | injection | frame | + * | header | | + * +------------+-------+ + * 128 bits + * + * The injection header looks like this (network byte order, bit 127 + * is part of lowest address byte in memory, bit 0 is part of highest + * address byte): + * + * +------+------+------+------+------+------+------+------+ + * 127:120 |BYPASS| MASQ | MASQ_PORT |REW_OP|REW_OP| + * +------+------+------+------+------+------+------+------+ + * 119:112 | REW_OP | + * +------+------+------+------+------+------+------+------+ + * 111:104 | REW_VAL | + * +------+------+------+------+------+------+------+------+ + * 103: 96 | REW_VAL | + * +------+------+------+------+------+------+------+------+ + * 95: 88 | REW_VAL | + * +------+------+------+------+------+------+------+------+ + * 87: 80 | REW_VAL | + * +------+------+------+------+------+------+------+------+ + * 79: 72 | RSV | + * +------+------+------+------+------+------+------+------+ + * 71: 64 | RSV | DEST | + * +------+------+------+------+------+------+------+------+ + * 63: 56 | DEST | + * +------+------+------+------+------+------+------+------+ + * 55: 48 | RSV | + * +------+------+------+------+------+------+------+------+ + * 47: 40 | RSV | SRC_PORT | RSV |TFRM_TIMER| + * +------+------+------+------+------+------+------+------+ + * 39: 32 | TFRM_TIMER | RSV | + * +------+------+------+------+------+------+------+------+ + * 31: 24 | RSV | DP | POP_CNT | CPUQ | + * +------+------+------+------+------+------+------+------+ + * 23: 16 | CPUQ | QOS_CLASS |TAG_TYPE| + * +------+------+------+------+------+------+------+------+ + * 15: 8 | PCP | DEI | VID | + * +------+------+------+------+------+------+------+------+ + * 7: 0 | VID | + * +------+------+------+------+------+------+------+------+ + * + * And the extraction header looks like this: + * + * +------+------+------+------+------+------+------+------+ + * 127:120 | RSV | REW_OP | + * +------+------+------+------+------+------+------+------+ + * 119:112 | REW_OP | REW_VAL | + * +------+------+------+------+------+------+------+------+ + * 111:104 | REW_VAL | + * +------+------+------+------+------+------+------+------+ + * 103: 96 | REW_VAL | + * +------+------+------+------+------+------+------+------+ + * 95: 88 | REW_VAL | + * +------+------+------+------+------+------+------+------+ + * 87: 80 | REW_VAL | LLEN | + * +------+------+------+------+------+------+------+------+ + * 79: 72 | LLEN | WLEN | + * +------+------+------+------+------+------+------+------+ + * 71: 64 | WLEN | RSV | + * +------+------+------+------+------+------+------+------+ + * 63: 56 | RSV | + * +------+------+------+------+------+------+------+------+ + * 55: 48 | RSV | + * +------+------+------+------+------+------+------+------+ + * 47: 40 | RSV | SRC_PORT | ACL_ID | + * +------+------+------+------+------+------+------+------+ + * 39: 32 | ACL_ID | RSV | SFLOW_ID | + * +------+------+------+------+------+------+------+------+ + * 31: 24 |ACL_HIT| DP | LRN_FLAGS | CPUQ | + * +------+------+------+------+------+------+------+------+ + * 23: 16 | CPUQ | QOS_CLASS |TAG_TYPE| + * +------+------+------+------+------+------+------+------+ + * 15: 8 | PCP | DEI | VID | + * +------+------+------+------+------+------+------+------+ + * 7: 0 | VID | + * +------+------+------+------+------+------+------+------+ + */ + +static inline void ocelot_xfh_get_rew_val(void *extraction, u64 *rew_val) +{ + packing(extraction, rew_val, 116, 85, OCELOT_TAG_LEN, UNPACK, 0); +} + +static inline void ocelot_xfh_get_len(void *extraction, u64 *len) +{ + u64 llen, wlen; + + packing(extraction, &llen, 84, 79, OCELOT_TAG_LEN, UNPACK, 0); + packing(extraction, &wlen, 78, 71, OCELOT_TAG_LEN, UNPACK, 0); + + *len = 60 * wlen + llen - 80; +} + +static inline void ocelot_xfh_get_src_port(void *extraction, u64 *src_port) +{ + packing(extraction, src_port, 46, 43, OCELOT_TAG_LEN, UNPACK, 0); +} + +static inline void ocelot_xfh_get_qos_class(void *extraction, u64 *qos_class) +{ + packing(extraction, qos_class, 19, 17, OCELOT_TAG_LEN, UNPACK, 0); +} + +static inline void ocelot_xfh_get_tag_type(void *extraction, u64 *tag_type) +{ + packing(extraction, tag_type, 16, 16, OCELOT_TAG_LEN, UNPACK, 0); +} + +static inline void ocelot_xfh_get_vlan_tci(void *extraction, u64 *vlan_tci) +{ + packing(extraction, vlan_tci, 15, 0, OCELOT_TAG_LEN, UNPACK, 0); +} + +static inline void ocelot_ifh_set_bypass(void *injection, u64 bypass) +{ + packing(injection, &bypass, 127, 127, OCELOT_TAG_LEN, PACK, 0); +} + +static inline void ocelot_ifh_set_rew_op(void *injection, u64 rew_op) +{ + packing(injection, &rew_op, 125, 117, OCELOT_TAG_LEN, PACK, 0); +} + +static inline void ocelot_ifh_set_dest(void *injection, u64 dest) +{ + packing(injection, &dest, 67, 56, OCELOT_TAG_LEN, PACK, 0); +} + +static inline void ocelot_ifh_set_qos_class(void *injection, u64 qos_class) +{ + packing(injection, &qos_class, 19, 17, OCELOT_TAG_LEN, PACK, 0); +} + +static inline void ocelot_ifh_set_tag_type(void *injection, u64 tag_type) +{ + packing(injection, &tag_type, 16, 16, OCELOT_TAG_LEN, PACK, 0); +} + +static inline void ocelot_ifh_set_vid(void *injection, u64 vid) +{ + packing(injection, &vid, 11, 0, OCELOT_TAG_LEN, PACK, 0); +} + +#endif -- cgit v1.2.3 From 7c4bb540e9173c914c2091fdd9b6aee3c2a3e1e5 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 14 Feb 2021 00:37:58 +0200 Subject: net: dsa: tag_ocelot: create separate tagger for Seville The ocelot tagger is a hot mess currently, it relies on memory initialized by the attached driver for basic frame transmission. This is against all that DSA tagging protocols stand for, which is that the transmission and reception of a DSA-tagged frame, the data path, should be independent from the switch control path, because the tag protocol is in principle hot-pluggable and reusable across switches (even if in practice it wasn't until very recently). But if another driver like dsa_loop wants to make use of tag_ocelot, it couldn't. This was done to have common code between Felix and Ocelot, which have one bit difference in the frame header format. Quoting from commit 67c2404922c2 ("net: dsa: felix: create a template for the DSA tags on xmit"): Other alternatives have been analyzed, such as: - Create a separate tag_seville.c: too much code duplication for just 1 bit field difference. - Create a separate DSA_TAG_PROTO_SEVILLE under tag_ocelot.c, just like tag_brcm.c, which would have a separate .xmit function. Again, too much code duplication for just 1 bit field difference. - Allocate the template from the init function of the tag_ocelot.c module, instead of from the driver: couldn't figure out a method of accessing the correct port template corresponding to the correct tagger in the .xmit function. The really interesting part is that Seville should have had its own tagging protocol defined - it is not compatible on the wire with Ocelot, even for that single bit. In principle, a packet generated by DSA_TAG_PROTO_OCELOT when booted on NXP LS1028A would look in a certain way, but when booted on NXP T1040 it would look differently. The reverse is also true: a packet generated by a Seville switch would be interpreted incorrectly by Wireshark if it was told it was generated by an Ocelot switch. Actually things are a bit more nuanced. If we concentrate only on the DSA tag, what I said above is true, but Ocelot/Seville also support an optional DSA tag prefix, which can be short or long, and it is possible to distinguish the two taggers based on an integer constant put in that prefix. Nonetheless, creating a separate tagger is still justified, since the tag prefix is optional, and without it, there is again no way to distinguish. Claiming backwards binary compatibility is a bit more tough, since I've already changed the format of tag_ocelot once, in commit 5124197ce58b ("net: dsa: tag_ocelot: use a short prefix on both ingress and egress"). Therefore I am not very concerned with treating this as a bugfix and backporting it to stable kernels (which would be another mess due to the fact that there would be lots of conflicts with the other DSA_TAG_PROTO* definitions). It's just simpler to say that the string values of the taggers have ABI value starting with kernel 5.12, which will be when the changing of tag protocol via /sys/class/net//dsa/tagging goes live. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/ocelot.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index 763dbbfaff7a..c6bc45ae5e03 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -195,6 +195,16 @@ static inline void ocelot_ifh_set_qos_class(void *injection, u64 qos_class) packing(injection, &qos_class, 19, 17, OCELOT_TAG_LEN, PACK, 0); } +static inline void seville_ifh_set_dest(void *injection, u64 dest) +{ + packing(injection, &dest, 67, 57, OCELOT_TAG_LEN, PACK, 0); +} + +static inline void ocelot_ifh_set_src(void *injection, u64 src) +{ + packing(injection, &src, 46, 43, OCELOT_TAG_LEN, PACK, 0); +} + static inline void ocelot_ifh_set_tag_type(void *injection, u64 tag_type) { packing(injection, &tag_type, 16, 16, OCELOT_TAG_LEN, PACK, 0); -- cgit v1.2.3 From d0a0bbe7b0a181c58bd22d6942146cfa3ab9e49a Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sun, 14 Feb 2021 18:43:08 -0500 Subject: atm: idt77252: fix build broken on amd64 idt77252 is broken and wont load on amd64 systems modprobe idt77252 shows the following idt77252_init: skb->cb is too small (48 < 56) Add packed attribute to struct idt77252_skb_prv and struct atm_skb_data so that the total size can be <= sizeof(skb->cb) Also convert runtime size check to buildtime size check in idt77252_init() Signed-off-by: Tong Zhang Signed-off-by: David S. Miller --- include/linux/atmdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index d7493016cd46..60cd25c0461b 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -207,7 +207,7 @@ struct atm_skb_data { struct atm_vcc *vcc; /* ATM VCC */ unsigned long atm_options; /* ATM layer options */ unsigned int acct_truesize; /* truesize accounted to vcc */ -}; +} __packed; #define VCC_HTABLE_SIZE 32 -- cgit v1.2.3 From 17d3a83afbbff34209d6c3636718fc1abe305ef8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 12 Feb 2021 19:46:31 -0800 Subject: net: phy: broadcom: Remove unused flags We have a number of unused flags defined today and since we are scarce on space and may need to introduce new flags in the future remove and shift every existing flag down into a contiguous assignment. PHY_BCM_FLAGS_MODE_1000BX was only used internally for the BCM54616S PHY, so we allocate a driver private structure instead to store that flag instead of canibalizing one from phydev->dev_flags for that purpose. Signed-off-by: Florian Fainelli Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index de9430d55c90..844dcfe789a2 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -61,19 +61,14 @@ #define PHY_BCM_OUI_5 0x03625e00 #define PHY_BCM_OUI_6 0xae025000 -#define PHY_BCM_FLAGS_MODE_COPPER 0x00000001 -#define PHY_BCM_FLAGS_MODE_1000BX 0x00000002 -#define PHY_BCM_FLAGS_INTF_SGMII 0x00000010 -#define PHY_BCM_FLAGS_INTF_XAUI 0x00000020 -#define PHY_BRCM_WIRESPEED_ENABLE 0x00000100 -#define PHY_BRCM_AUTO_PWRDWN_ENABLE 0x00000200 -#define PHY_BRCM_RX_REFCLK_UNUSED 0x00000400 -#define PHY_BRCM_STD_IBND_DISABLE 0x00000800 -#define PHY_BRCM_EXT_IBND_RX_ENABLE 0x00001000 -#define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000 -#define PHY_BRCM_CLEAR_RGMII_MODE 0x00004000 -#define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00008000 -#define PHY_BRCM_EN_MASTER_MODE 0x00010000 +#define PHY_BRCM_AUTO_PWRDWN_ENABLE 0x00000001 +#define PHY_BRCM_RX_REFCLK_UNUSED 0x00000002 +#define PHY_BRCM_STD_IBND_DISABLE 0x00000004 +#define PHY_BRCM_EXT_IBND_RX_ENABLE 0x00000008 +#define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00000010 +#define PHY_BRCM_CLEAR_RGMII_MODE 0x00000020 +#define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00000040 +#define PHY_BRCM_EN_MASTER_MODE 0x00000080 /* Broadcom BCM7xxx specific workarounds */ #define PHY_BRCM_7XXX_REV(x) (((x) >> 8) & 0xff) -- cgit v1.2.3 From 5d4358ede8ebe2e4ae03a633082f3ce21ec2df3e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 12 Feb 2021 19:46:32 -0800 Subject: net: phy: broadcom: Allow BCM54210E to configure APD BCM54210E/BCM50212E has been verified to work correctly with the auto-power down configuration done by bcm54xx_adjust_rxrefclk(), add it to the list of PHYs working. While we are at it, provide an appropriate name for the bit we are changing which disables the RXC and TXC during auto-power down when there is no energy on the cable. Signed-off-by: Florian Fainelli Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 844dcfe789a2..16597d3fa011 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -193,6 +193,7 @@ #define BCM54XX_SHD_SCR3_DEF_CLK125 0x0001 #define BCM54XX_SHD_SCR3_DLLAPD_DIS 0x0002 #define BCM54XX_SHD_SCR3_TRDDAPD 0x0004 +#define BCM54XX_SHD_SCR3_RXCTXC_DIS 0x0100 /* 01010: Auto Power-Down */ #define BCM54XX_SHD_APD 0x0a @@ -253,7 +254,6 @@ #define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN (1 << 0) #define BCM54810_SHD_CLK_CTL 0x3 #define BCM54810_SHD_CLK_CTL_GTXCLK_EN (1 << 9) -#define BCM54810_SHD_SCR3_TRDDAPD 0x0100 /* BCM54612E Registers */ #define BCM54612E_EXP_SPARE0 (MII_BCM54XX_EXP_SEL_ETC + 0x34) -- cgit v1.2.3 From 93e8990c24bee30696c02e8f6aed043333491a25 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 14 Feb 2021 15:16:23 +0100 Subject: net: phy: rename PHY_IGNORE_INTERRUPT to PHY_MAC_INTERRUPT Some internal PHY's have their events like link change reported by the MAC interrupt. We have PHY_IGNORE_INTERRUPT to deal with this scenario. I'm not too happy with this name. We don't ignore interrupts, typically there is no interrupt exposed at a PHY level. So let's rename it to PHY_MAC_INTERRUPT. This is in line with phy_mac_interrupt(), which is called from the MAC interrupt handler to handle PHY events. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Acked-by: Florian Fainelli Reviewed-by: Russell King Signed-off-by: David S. Miller --- include/linux/phy.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index c130788306c8..5d7c4084ade9 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -71,11 +71,11 @@ extern const int phy_10gbit_features_array[1]; /* * Set phydev->irq to PHY_POLL if interrupts are not supported, - * or not desired for this PHY. Set to PHY_IGNORE_INTERRUPT if - * the attached driver handles the interrupt + * or not desired for this PHY. Set to PHY_MAC_INTERRUPT if + * the attached MAC driver handles the interrupt */ #define PHY_POLL -1 -#define PHY_IGNORE_INTERRUPT -2 +#define PHY_MAC_INTERRUPT -2 #define PHY_IS_INTERNAL 0x00000001 #define PHY_RST_AFTER_CLK_EN 0x00000002 @@ -1202,11 +1202,11 @@ static inline int phy_clear_bits_mmd(struct phy_device *phydev, int devad, * @phydev: the phy_device struct * * NOTE: must be kept in sync with addition/removal of PHY_POLL and - * PHY_IGNORE_INTERRUPT + * PHY_MAC_INTERRUPT */ static inline bool phy_interrupt_is_valid(struct phy_device *phydev) { - return phydev->irq != PHY_POLL && phydev->irq != PHY_IGNORE_INTERRUPT; + return phydev->irq != PHY_POLL && phydev->irq != PHY_MAC_INTERRUPT; } /** -- cgit v1.2.3 From a6a217dddcd544f6b75f0e2a60b6e84c1d494b7e Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 9 Feb 2021 15:11:06 +0200 Subject: net/mlx5: Add new timestamp mode bits These fields declare which timestamp mode is supported by the device per RQ/SQ/QP. In addition add the ts_format field to the select the mode for RQ/SQ/QP. Link: https://lore.kernel.org/r/20210209131107.698833-2-leon@kernel.org Signed-off-by: Aharon Landau Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 54 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cf692fc17f41..436d6f421dfd 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -932,11 +932,18 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 reserved_at_200[0x600]; }; +enum { + MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING = 0x0, + MLX5_QP_TIMESTAMP_FORMAT_CAP_REAL_TIME = 0x1, + MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME = 0x2, +}; + struct mlx5_ifc_roce_cap_bits { u8 roce_apm[0x1]; u8 reserved_at_1[0x3]; u8 sw_r_roce_src_udp_port[0x1]; - u8 reserved_at_5[0x1b]; + u8 reserved_at_5[0x19]; + u8 qp_ts_format[0x2]; u8 reserved_at_20[0x60]; @@ -1253,6 +1260,18 @@ enum { MLX5_STEERING_FORMAT_CONNECTX_6DX = 1, }; +enum { + MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING = 0x0, + MLX5_SQ_TIMESTAMP_FORMAT_CAP_REAL_TIME = 0x1, + MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME = 0x2, +}; + +enum { + MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING = 0x0, + MLX5_RQ_TIMESTAMP_FORMAT_CAP_REAL_TIME = 0x1, + MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME = 0x2, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x1f]; u8 vhca_resource_manager[0x1]; @@ -1564,7 +1583,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 general_obj_types[0x40]; - u8 reserved_at_440[0x4]; + u8 sq_ts_format[0x2]; + u8 rq_ts_format[0x2]; u8 steering_format_version[0x4]; u8 create_qp_start_hint[0x18]; @@ -2868,6 +2888,12 @@ enum { MLX5_QPC_CS_RES_UP_TO_64B = 0x2, }; +enum { + MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING = 0x0, + MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT = 0x1, + MLX5_QPC_TIMESTAMP_FORMAT_REAL_TIME = 0x2, +}; + struct mlx5_ifc_qpc_bits { u8 state[0x4]; u8 lag_tx_port_affinity[0x4]; @@ -2896,7 +2922,9 @@ struct mlx5_ifc_qpc_bits { u8 log_rq_stride[0x3]; u8 no_sq[0x1]; u8 log_sq_size[0x4]; - u8 reserved_at_55[0x6]; + u8 reserved_at_55[0x3]; + u8 ts_format[0x2]; + u8 reserved_at_5a[0x1]; u8 rlky[0x1]; u8 ulp_stateless_offload_mode[0x4]; @@ -3312,6 +3340,12 @@ enum { MLX5_SQC_STATE_ERR = 0x3, }; +enum { + MLX5_SQC_TIMESTAMP_FORMAT_FREE_RUNNING = 0x0, + MLX5_SQC_TIMESTAMP_FORMAT_DEFAULT = 0x1, + MLX5_SQC_TIMESTAMP_FORMAT_REAL_TIME = 0x2, +}; + struct mlx5_ifc_sqc_bits { u8 rlky[0x1]; u8 cd_master[0x1]; @@ -3323,7 +3357,9 @@ struct mlx5_ifc_sqc_bits { u8 reg_umr[0x1]; u8 allow_swp[0x1]; u8 hairpin[0x1]; - u8 reserved_at_f[0x11]; + u8 reserved_at_f[0xb]; + u8 ts_format[0x2]; + u8 reserved_at_1c[0x4]; u8 reserved_at_20[0x8]; u8 user_index[0x18]; @@ -3414,6 +3450,12 @@ enum { MLX5_RQC_STATE_ERR = 0x3, }; +enum { + MLX5_RQC_TIMESTAMP_FORMAT_FREE_RUNNING = 0x0, + MLX5_RQC_TIMESTAMP_FORMAT_DEFAULT = 0x1, + MLX5_RQC_TIMESTAMP_FORMAT_REAL_TIME = 0x2, +}; + struct mlx5_ifc_rqc_bits { u8 rlky[0x1]; u8 delay_drop_en[0x1]; @@ -3424,7 +3466,9 @@ struct mlx5_ifc_rqc_bits { u8 reserved_at_c[0x1]; u8 flush_in_error_en[0x1]; u8 hairpin[0x1]; - u8 reserved_at_f[0x11]; + u8 reserved_at_f[0xb]; + u8 ts_format[0x2]; + u8 reserved_at_1c[0x4]; u8 reserved_at_20[0x8]; u8 user_index[0x18]; -- cgit v1.2.3 From ae02d41551d6f2a035d3e63ce4415e1b2ba3a7e6 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Fri, 12 Feb 2021 14:30:38 -0800 Subject: net/mlx5: Add register layout to support real-time time-stamp Add needed structure layouts and defines for MTUTC (Management UTC) register. MTUTC will be used for cyc2time HW translation. In addition, add cyc2time modify capability bit and init segment HCA real time address. Finally, add capability bits indicating which time-stamping format is supported per SQ and RQ. Add ts_format in the queue's context layout to allow configuration. Signed-off-by: Eran Ben Elisha Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 5 ++++- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 30 ++++++++++++++++++++++++++++-- 3 files changed, 33 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f1de49d64a98..fd694add6ff0 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -578,7 +578,10 @@ struct mlx5_init_seg { __be32 internal_timer_l; __be32 rsvd3[2]; __be32 health_counter; - __be32 rsvd4[1019]; + __be32 rsvd4[11]; + __be32 real_time_h; + __be32 real_time_l; + __be32 rsvd5[1006]; __be64 ieee1588_clk; __be32 ieee1588_clk_type; __be32 clr_intx; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f93bfe7473aa..2eb7755143d7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -143,6 +143,7 @@ enum { MLX5_REG_MPCNT = 0x9051, MLX5_REG_MTPPS = 0x9053, MLX5_REG_MTPPSE = 0x9054, + MLX5_REG_MTUTC = 0x9055, MLX5_REG_MPEGC = 0x9056, MLX5_REG_MCQS = 0x9060, MLX5_REG_MCQI = 0x9061, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 436d6f421dfd..7059fcdf54a3 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9150,6 +9150,28 @@ struct mlx5_ifc_mpegc_reg_bits { u8 reserved_at_60[0x100]; }; +enum { + MLX5_MTUTC_OPERATION_SET_TIME_IMMEDIATE = 0x1, + MLX5_MTUTC_OPERATION_ADJUST_TIME = 0x2, + MLX5_MTUTC_OPERATION_ADJUST_FREQ_UTC = 0x3, +}; + +struct mlx5_ifc_mtutc_reg_bits { + u8 reserved_at_0[0x1c]; + u8 operation[0x4]; + + u8 freq_adjustment[0x20]; + + u8 reserved_at_40[0x40]; + + u8 utc_sec[0x20]; + + u8 reserved_at_a0[0x2]; + u8 utc_nsec[0x1e]; + + u8 time_adjustment[0x20]; +}; + struct mlx5_ifc_pcam_enhanced_features_bits { u8 reserved_at_0[0x68]; u8 fec_50G_per_lane_in_pplm[0x1]; @@ -9208,7 +9230,9 @@ struct mlx5_ifc_pcam_reg_bits { }; struct mlx5_ifc_mcam_enhanced_features_bits { - u8 reserved_at_0[0x6e]; + u8 reserved_at_0[0x6b]; + u8 ptpcyc2realtime_modify[0x1]; + u8 reserved_at_6c[0x2]; u8 pci_status_and_power[0x1]; u8 reserved_at_6f[0x5]; u8 mark_tx_action_cnp[0x1]; @@ -9231,7 +9255,8 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 regs_95_to_87[0x9]; u8 mpegc[0x1]; - u8 regs_85_to_68[0x12]; + u8 mtutc[0x1]; + u8 regs_84_to_68[0x11]; u8 tracer_registers[0x4]; u8 regs_63_to_32[0x20]; @@ -9964,6 +9989,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_mcda_reg_bits mcda_reg; struct mlx5_ifc_mirc_reg_bits mirc_reg; struct mlx5_ifc_mfrl_reg_bits mfrl_reg; + struct mlx5_ifc_mtutc_reg_bits mtutc_reg; u8 reserved_at_0[0x60e0]; }; -- cgit v1.2.3 From d6f3dc8f509ce6288e2537eb4b0614ef444fd84a Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Fri, 12 Feb 2021 14:30:40 -0800 Subject: net/mlx5: Move all internal timer metadata into a dedicated struct Internal timer mode (SW clock) requires some PTP clock related metadata structs. Real time mode (HW clock) will not need these metadata structs. This separation emphasize the different interfaces for HW clock and SW clock. Signed-off-by: Eran Ben Elisha Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2eb7755143d7..2f4d6182df1b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -662,18 +662,22 @@ struct mlx5_pps { u8 enabled; }; -struct mlx5_clock { - struct mlx5_nb pps_nb; - seqlock_t lock; +struct mlx5_timer { struct cyclecounter cycles; struct timecounter tc; - struct hwtstamp_config hwtstamp_config; u32 nominal_c_mult; unsigned long overflow_period; struct delayed_work overflow_work; +}; + +struct mlx5_clock { + struct mlx5_nb pps_nb; + seqlock_t lock; + struct hwtstamp_config hwtstamp_config; struct ptp_clock *ptp; struct ptp_clock_info ptp_info; struct mlx5_pps pps_info; + struct mlx5_timer timer; }; struct mlx5_dm; -- cgit v1.2.3 From 32aeba1f7a98b0c69d4a5704a7d9cea42ba856ba Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 16 Feb 2021 11:08:37 -0800 Subject: tg3: Remove unused PHY_BRCM flags The tg3 driver tried to communicate towards the PHY driver whether it wanted RGMII in-band signaling enabled or disabled however there is nothing that looks at those flags in drivers/net/phy/broadcom.c so this does do not anything. Suggested-by: Vladimir Oltean Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 16597d3fa011..8fe1d55371ac 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -63,12 +63,9 @@ #define PHY_BRCM_AUTO_PWRDWN_ENABLE 0x00000001 #define PHY_BRCM_RX_REFCLK_UNUSED 0x00000002 -#define PHY_BRCM_STD_IBND_DISABLE 0x00000004 -#define PHY_BRCM_EXT_IBND_RX_ENABLE 0x00000008 -#define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00000010 -#define PHY_BRCM_CLEAR_RGMII_MODE 0x00000020 -#define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00000040 -#define PHY_BRCM_EN_MASTER_MODE 0x00000080 +#define PHY_BRCM_CLEAR_RGMII_MODE 0x00000004 +#define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00000008 +#define PHY_BRCM_EN_MASTER_MODE 0x00000010 /* Broadcom BCM7xxx specific workarounds */ #define PHY_BRCM_7XXX_REV(x) (((x) >> 8) & 0xff) -- cgit v1.2.3 From 7331d1d4622ba7e668ec19cfba2ed7feb4a3084e Mon Sep 17 00:00:00 2001 From: Pavana Sharma Date: Tue, 16 Feb 2021 20:20:53 +0100 Subject: net: phy: Add 5GBASER interface mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add 5GBASE-R phy interface mode Signed-off-by: Pavana Sharma Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Marek BehĂșn Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 5d7c4084ade9..0d537f59b77f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -107,6 +107,7 @@ extern const int phy_10gbit_features_array[1]; * @PHY_INTERFACE_MODE_100BASEX: 100 BaseX * @PHY_INTERFACE_MODE_1000BASEX: 1000 BaseX * @PHY_INTERFACE_MODE_2500BASEX: 2500 BaseX + * @PHY_INTERFACE_MODE_5GBASER: 5G BaseR * @PHY_INTERFACE_MODE_RXAUI: Reduced XAUI * @PHY_INTERFACE_MODE_XAUI: 10 Gigabit Attachment Unit Interface * @PHY_INTERFACE_MODE_10GBASER: 10G BaseR @@ -139,6 +140,7 @@ typedef enum { PHY_INTERFACE_MODE_100BASEX, PHY_INTERFACE_MODE_1000BASEX, PHY_INTERFACE_MODE_2500BASEX, + PHY_INTERFACE_MODE_5GBASER, PHY_INTERFACE_MODE_RXAUI, PHY_INTERFACE_MODE_XAUI, /* 10GBASE-R, XFI, SFI - single lane 10G Serdes */ @@ -209,6 +211,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "1000base-x"; case PHY_INTERFACE_MODE_2500BASEX: return "2500base-x"; + case PHY_INTERFACE_MODE_5GBASER: + return "5gbase-r"; case PHY_INTERFACE_MODE_RXAUI: return "rxaui"; case PHY_INTERFACE_MODE_XAUI: -- cgit v1.2.3 From d8ea7ff3995ead5193313c72c0d97c9c16c83be9 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 16 Feb 2021 22:42:03 +0100 Subject: net: mscc: ocelot: Add support for MRP Add basic support for MRP. The HW will just trap all MRP frames on the ring ports to CPU and allow the SW to process them. In this way it is possible to for this node to behave both as MRM and MRC. Current limitations are: - it doesn't support Interconnect roles. - it supports only a single ring. - the HW should be able to do forwarding of MRP Test frames so the SW will not need to do this. So it would be able to have the role MRC without SW support. Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- include/linux/dsa/ocelot.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index c6bc45ae5e03..4265f328681a 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -160,6 +160,11 @@ static inline void ocelot_xfh_get_src_port(void *extraction, u64 *src_port) packing(extraction, src_port, 46, 43, OCELOT_TAG_LEN, UNPACK, 0); } +static inline void ocelot_xfh_get_cpuq(void *extraction, u64 *cpuq) +{ + packing(extraction, cpuq, 28, 20, OCELOT_TAG_LEN, UNPACK, 0); +} + static inline void ocelot_xfh_get_qos_class(void *extraction, u64 *qos_class) { packing(extraction, qos_class, 19, 17, OCELOT_TAG_LEN, UNPACK, 0); -- cgit v1.2.3 From 3afd0218992a8d1398e9791d6c2edd4c948ae7ee Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 16 Feb 2021 16:54:52 -0600 Subject: net: phy: broadcom: Set proper 1000BaseX/SGMII interface mode for BCM54616S The default configuration for the BCM54616S PHY may not match the desired mode when using 1000BaseX or SGMII interface modes, such as when it is on an SFP module. Add code to explicitly set the correct mode using programming sequences provided by Bel-Fuse: https://www.belfuse.com/resources/datasheets/powersolutions/ds-bps-sfp-1gbt-05-series.pdf https://www.belfuse.com/resources/datasheets/powersolutions/ds-bps-sfp-1gbt-06-series.pdf Signed-off-by: Robert Hancock Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 8fe1d55371ac..c2c2147dfeb8 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -129,6 +129,7 @@ #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x07 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN 0x0010 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_EN 0x0080 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN 0x0100 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 #define MII_BCM54XX_AUXCTL_MISC_WREN 0x8000 @@ -216,6 +217,9 @@ /* 11111: Mode Control Register */ #define BCM54XX_SHD_MODE 0x1f #define BCM54XX_SHD_INTF_SEL_MASK GENMASK(2, 1) /* INTERF_SEL[1:0] */ +#define BCM54XX_SHD_INTF_SEL_RGMII 0x02 +#define BCM54XX_SHD_INTF_SEL_SGMII 0x04 +#define BCM54XX_SHD_INTF_SEL_GBIC 0x06 #define BCM54XX_SHD_MODE_1000BX BIT(0) /* Enable 1000-X registers */ /* -- cgit v1.2.3 From b834489bceccc64641684eee5e93275cdf5f465b Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 16 Feb 2021 16:54:53 -0600 Subject: net: phy: Add is_on_sfp_module flag and phy_on_sfp helper Add a flag and helper function to indicate that a PHY device is part of an SFP module, which is set on attach. This can be used by PHY drivers to handle SFP-specific quirks or behavior. Signed-off-by: Robert Hancock Signed-off-by: David S. Miller --- include/linux/phy.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 0d537f59b77f..1a12e4436b5b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -492,6 +492,7 @@ struct macsec_ops; * @sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. * @loopback_enabled: Set true if this PHY has been loopbacked successfully. * @downshifted_rate: Set true if link speed has been downshifted. + * @is_on_sfp_module: Set true if PHY is located on an SFP module. * @state: State of the PHY for management purposes * @dev_flags: Device-specific flags used by the PHY driver. * @irq: IRQ number of the PHY's interrupt (-1 if none) @@ -565,6 +566,7 @@ struct phy_device { unsigned sysfs_links:1; unsigned loopback_enabled:1; unsigned downshifted_rate:1; + unsigned is_on_sfp_module:1; unsigned autoneg:1; /* The most recently read link state */ @@ -1296,6 +1298,15 @@ static inline bool phy_is_internal(struct phy_device *phydev) return phydev->is_internal; } +/** + * phy_on_sfp - Convenience function for testing if a PHY is on an SFP module + * @phydev: the phy_device struct + */ +static inline bool phy_on_sfp(struct phy_device *phydev) +{ + return phydev->is_on_sfp_module; +} + /** * phy_interface_mode_is_rgmii - Convenience function for testing if a * PHY interface mode is RGMII (all variants) -- cgit v1.2.3 From 96313e1db8e5629cc2217616dca78f03e6463008 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 17 Feb 2021 12:25:57 -0800 Subject: net: mdio: Remove of_phy_attach() We have no in-tree users, also update the sfp-phylink.rst documentation to indicate that phy_attach_direct() is used instead of of_phy_attach(). Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index cfe8c607a628..2b05e7f7c238 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -26,9 +26,6 @@ of_phy_connect(struct net_device *dev, struct device_node *phy_np, struct phy_device * of_phy_get_and_connect(struct net_device *dev, struct device_node *np, void (*hndlr)(struct net_device *)); -struct phy_device * -of_phy_attach(struct net_device *dev, struct device_node *phy_np, - u32 flags, phy_interface_t iface); struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); int of_phy_register_fixed_link(struct device_node *np); @@ -100,13 +97,6 @@ of_phy_get_and_connect(struct net_device *dev, struct device_node *np, return NULL; } -static inline struct phy_device *of_phy_attach(struct net_device *dev, - struct device_node *phy_np, - u32 flags, phy_interface_t iface) -{ - return NULL; -} - static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np) { return NULL; -- cgit v1.2.3 From 20e07e2c3cf310578ef19fb4f1e64dc9832abd9d Mon Sep 17 00:00:00 2001 From: Wong Vee Khee Date: Wed, 17 Feb 2021 17:57:05 +0800 Subject: net: stmmac: Add PCI bus info to ethtool driver query output This patch populates the PCI bus info in the ethtool driver query data. Users will be able to view PCI bus info using 'ethtool -i '. Signed-off-by: Wong Vee Khee Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 15ca6b4167cc..a302982de2d7 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -202,5 +202,6 @@ struct plat_stmmacenet_data { bool vlan_fail_q_en; u8 vlan_fail_q; unsigned int eee_usecs_rate; + struct pci_dev *pdev; }; #endif -- cgit v1.2.3