From 4bd97d51a5e602ea1fbdab8c2d653513dea17115 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 20 Mar 2019 11:02:04 +0100 Subject: net: dev: rename queue selection helpers. With the following patches, we are going to use __netdev_pick_tx() in many modules. Rename it to netdev_pick_tx(), to make it clear is a public API. Also rename the existing netdev_pick_tx() to netdev_core_pick_tx(), to avoid name clashes. Suggested-by: Eric Dumazet Suggested-by: David Miller Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 26f69cf763f4..57cd2bdd9f78 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2152,9 +2152,9 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, &qdisc_xmit_lock_key); \ } -struct netdev_queue *netdev_pick_tx(struct net_device *dev, - struct sk_buff *skb, - struct net_device *sb_dev); +struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, + struct sk_buff *skb, + struct net_device *sb_dev); /* returns the headroom that the master device needs to take in account * when forwarding to this dev -- cgit v1.2.3 From b71b5837f8711dbc4bc0424cb5c75e5921be055c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 20 Mar 2019 11:02:05 +0100 Subject: packet: rework packet_pick_tx_queue() to use common code selection Currently packet_pick_tx_queue() is the only caller of ndo_select_queue() using a fallback argument other than netdev_pick_tx. Leveraging rx queue, we can obtain a similar queue selection behavior using core helpers. After this change, ndo_select_queue() is always invoked with netdev_pick_tx() as fallback. We can change ndo_select_queue() signature in a followup patch, dropping an indirect call per transmitted packet in some scenarios (e.g. TCP syn and XDP generic xmit) This changes slightly how af packet queue selection happens when PACKET_QDISC_BYPASS is set. It's now more similar to plan dev_queue_xmit() tacking in account both XPS and TC mapping. v1 -> v2: - rebased after helper name change RFC -> v1: - initialize sender_cpu to the expected value Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 57cd2bdd9f78..0ff28db4239f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2152,6 +2152,8 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, &qdisc_xmit_lock_key); \ } +u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev); struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); -- cgit v1.2.3 From a350eccee5830d9a1f29e393a88dc05a15326d44 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 20 Mar 2019 11:02:06 +0100 Subject: net: remove 'fallback' argument from dev->ndo_select_queue() After the previous patch, all the callers of ndo_select_queue() provide as a 'fallback' argument netdev_pick_tx. The only exceptions are nested calls to ndo_select_queue(), which pass down the 'fallback' available in the current scope - still netdev_pick_tx. We can drop such argument and replace fallback() invocation with netdev_pick_tx(). This avoids an indirect call per xmit packet in some scenarios (TCP syn, UDP unconnected, XDP generic, pktgen) with device drivers implementing such ndo. It also clean the code a bit. Tested with ixgbe and CONFIG_FCOE=m With pktgen using queue xmit: threads vanilla patched (kpps) (kpps) 1 2334 2428 2 4166 4278 4 7895 8100 v1 -> v2: - rebased after helper's name change Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0ff28db4239f..823762291ebf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -986,8 +986,7 @@ struct devlink; * those the driver believes to be appropriate. * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, - * struct net_device *sb_dev, - * select_queue_fallback_t fallback); + * struct net_device *sb_dev); * Called to decide which queue to use when device supports multiple * transmit queues. * @@ -1268,8 +1267,7 @@ struct net_device_ops { netdev_features_t features); u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback); + struct net_device *sb_dev); void (*ndo_change_rx_flags)(struct net_device *dev, int flags); void (*ndo_set_rx_mode)(struct net_device *dev); @@ -2641,11 +2639,9 @@ void dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback); + struct net_device *sb_dev); u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback); + struct net_device *sb_dev); int dev_queue_xmit(struct sk_buff *skb); int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev); int dev_direct_xmit(struct sk_buff *skb, u16 queue_id); -- cgit v1.2.3 From dc05360fee660a9dbe59824b3f7896534210432b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Mar 2019 08:56:38 -0700 Subject: net: convert rps_needed and rfs_needed to new static branch api We prefer static_branch_unlikely() over static_key_false() these days. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 823762291ebf..166fdc0a78b4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -194,8 +194,8 @@ struct net_device_stats { #ifdef CONFIG_RPS #include -extern struct static_key rps_needed; -extern struct static_key rfs_needed; +extern struct static_key_false rps_needed; +extern struct static_key_false rfs_needed; #endif struct neighbour; -- cgit v1.2.3 From 5dc37bb9b03586e8fdeb47d25e8d2a0399984936 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 28 Mar 2019 13:56:36 +0100 Subject: net: replace ndo_get_devlink with ndo_get_devlink_port Follow-up patch is going to need a devlink port instance according to a netdev. Devlink port instance should be always available when devlink is used. So change the recently introduced ndo_get_devlink to ndo_get_devlink_port. With that, adjust the wrapper for the only user to get devlink pointer. Signed-off-by: Jiri Pirko Reviewed-by: Michal Kubecek Reviewed-by: Florian Fainelli Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 166fdc0a78b4..78f5ec4ebf64 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1250,8 +1250,8 @@ struct devlink; * that got dropped are freed/returned via xdp_return_frame(). * Returns negative number, means general error invoking ndo, meaning * no frames were xmit'ed and core-caller will free all frames. - * struct devlink *(*ndo_get_devlink)(struct net_device *dev); - * Get devlink instance associated with a given netdev. + * struct devlink_port *(*ndo_get_devlink_port)(struct net_device *dev); + * Get devlink port instance associated with a given netdev. * Called with a reference on the netdevice and devlink locks only, * rtnl_lock is not held. */ @@ -1451,7 +1451,7 @@ struct net_device_ops { u32 flags); int (*ndo_xsk_async_xmit)(struct net_device *dev, u32 queue_id); - struct devlink * (*ndo_get_devlink)(struct net_device *dev); + struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); }; /** -- cgit v1.2.3 From 97cdcf37b57e3f204be3000b9eab9686f38b4356 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 1 Apr 2019 16:42:13 +0200 Subject: net: place xmit recursion in softnet data This fills a hole in softnet data, so no change in structure size. Also prepares for xmit_more placement in the same spot; skb->xmit_more will be removed in followup patch. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/netdevice.h | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 78f5ec4ebf64..2b25824642fa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2659,14 +2659,6 @@ void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); -DECLARE_PER_CPU(int, xmit_recursion); -#define XMIT_RECURSION_LIMIT 10 - -static inline int dev_recursion_level(void) -{ - return this_cpu_read(xmit_recursion); -} - struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); @@ -3015,6 +3007,11 @@ struct softnet_data { #ifdef CONFIG_XFRM_OFFLOAD struct sk_buff_head xfrm_backlog; #endif + /* written and read only by owning cpu: */ + struct { + u16 recursion; + u8 more; + } xmit; #ifdef CONFIG_RPS /* input_queue_head should be written by cpu owning this struct, * and only read by other cpus. Worth using a cache line. @@ -3050,6 +3047,28 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd, DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); +static inline int dev_recursion_level(void) +{ + return __this_cpu_read(softnet_data.xmit.recursion); +} + +#define XMIT_RECURSION_LIMIT 10 +static inline bool dev_xmit_recursion(void) +{ + return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > + XMIT_RECURSION_LIMIT); +} + +static inline void dev_xmit_recursion_inc(void) +{ + __this_cpu_inc(softnet_data.xmit.recursion); +} + +static inline void dev_xmit_recursion_dec(void) +{ + __this_cpu_dec(softnet_data.xmit.recursion); +} + void __netif_schedule(struct Qdisc *q); void netif_schedule_queue(struct netdev_queue *txq); @@ -4409,6 +4428,11 @@ static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, return ops->ndo_start_xmit(skb, dev); } +static inline bool netdev_xmit_more(void) +{ + return __this_cpu_read(softnet_data.xmit.more); +} + static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, bool more) { -- cgit v1.2.3 From 6b16f9ee89b8d5709f24bc3ac89ae8b5452c0d7c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 1 Apr 2019 16:42:14 +0200 Subject: net: move skb->xmit_more hint to softnet data There are two reasons for this. First, the xmit_more flag conceptually doesn't fit into the skb, as xmit_more is not a property related to the skb. Its only a hint to the driver that the stack is about to transmit another packet immediately. Second, it was only done this way to not have to pass another argument to ndo_start_xmit(). We can place xmit_more in the softnet data, next to the device recursion. The recursion counter is already written to on each transmit. The "more" indicator is placed right next to it. Drivers can use the netdev_xmit_more() helper instead of skb->xmit_more to check the "more packets coming" hint. skb->xmit_more is retained (but always 0) to not cause build breakage. This change takes care of the simple s/skb->xmit_more/netdev_xmit_more()/ conversions. Remaining drivers are converted in the next patches. Suggested-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2b25824642fa..eb9f05e0863d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4424,7 +4424,7 @@ static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, struct sk_buff *skb, struct net_device *dev, bool more) { - skb->xmit_more = more ? 1 : 0; + __this_cpu_write(softnet_data.xmit.more, more); return ops->ndo_start_xmit(skb, dev); } -- cgit v1.2.3 From 28b05b92886871bdd8e6a9df73e3a15845fe8ef4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 3 Apr 2019 08:28:35 +0200 Subject: net: use correct this_cpu primitive in dev_recursion_level syzbot reports: BUG: using __this_cpu_read() in preemptible code: caller is dev_recursion_level include/linux/netdevice.h:3052 [inline] __this_cpu_preempt_check+0x246/0x270 lib/smp_processor_id.c:47 dev_recursion_level include/linux/netdevice.h:3052 [inline] ip6_skb_dst_mtu include/net/ip6_route.h:245 [inline] I erronously downgraded a this_cpu_read to __this_cpu_read when moving dev_recursion_level() around. Reported-by: syzbot+51471b4aae195285a4a3@syzkaller.appspotmail.com Fixes: 97cdcf37b57e ("net: place xmit recursion in softnet data") Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index eb9f05e0863d..521eb869555e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3049,7 +3049,7 @@ DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); static inline int dev_recursion_level(void) { - return __this_cpu_read(softnet_data.xmit.recursion); + return this_cpu_read(softnet_data.xmit.recursion); } #define XMIT_RECURSION_LIMIT 10 -- cgit v1.2.3 From 8065a779f17e94536a1c4dcee4f9d88011672f97 Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Mon, 8 Apr 2019 19:45:27 -0400 Subject: failover: allow name change on IFF_UP slave interfaces When a netdev appears through hot plug then gets enslaved by a failover master that is already up and running, the slave will be opened right away after getting enslaved. Today there's a race that userspace (udev) may fail to rename the slave if the kernel (net_failover) opens the slave earlier than when the userspace rename happens. Unlike bond or team, the primary slave of failover can't be renamed by userspace ahead of time, since the kernel initiated auto-enslavement is unable to, or rather, is never meant to be synchronized with the rename request from userspace. As the failover slave interfaces are not designed to be operated directly by userspace apps: IP configuration, filter rules with regard to network traffic passing and etc., should all be done on master interface. In general, userspace apps only care about the name of master interface, while slave names are less important as long as admin users can see reliable names that may carry other information describing the netdev. For e.g., they can infer that "ens3nsby" is a standby slave of "ens3", while for a name like "eth0" they can't tell which master it belongs to. Historically the name of IFF_UP interface can't be changed because there might be admin script or management software that is already relying on such behavior and assumes that the slave name can't be changed once UP. But failover is special: with the in-kernel auto-enslavement mechanism, the userspace expectation for device enumeration and bring-up order is already broken. Previously initramfs and various userspace config tools were modified to bypass failover slaves because of auto-enslavement and duplicate MAC address. Similarly, in case that users care about seeing reliable slave name, the new type of failover slaves needs to be taken care of specifically in userspace anyway. It's less risky to lift up the rename restriction on failover slave which is already UP. Although it's possible this change may potentially break userspace component (most likely configuration scripts or management software) that assumes slave name can't be changed while UP, it's relatively a limited and controllable set among all userspace components, which can be fixed specifically to listen for the rename events on failover slaves. Userspace component interacting with slaves is expected to be changed to operate on failover master interface instead, as the failover slave is dynamic in nature which may come and go at any point. The goal is to make the role of failover slaves less relevant, and userspace components should only deal with failover master in the long run. Fixes: 30c8bd5aa8b2 ("net: Introduce generic failover module") Signed-off-by: Si-Wei Liu Reviewed-by: Liran Alon Acked-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 26f69cf763f4..324e872c91d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1500,6 +1500,7 @@ struct net_device_ops { * @IFF_FAILOVER: device is a failover master device * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device + * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1532,6 +1533,7 @@ enum netdev_priv_flags { IFF_FAILOVER = 1<<27, IFF_FAILOVER_SLAVE = 1<<28, IFF_L3MDEV_RX_HANDLER = 1<<29, + IFF_LIVE_RENAME_OK = 1<<30, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1563,6 +1565,7 @@ enum netdev_priv_flags { #define IFF_FAILOVER IFF_FAILOVER #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER +#define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK /** * struct net_device - The DEVICE structure. -- cgit v1.2.3 From da68b4ad02343862fee1e3e8c6315984f16a4778 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 25 Apr 2019 12:32:03 -0700 Subject: net/tls: move definition of tls ops into net/tls.h There seems to be no reason for tls_ops to be defined in netdevice.h which is included in a lot of places. Don't wrap the struct/enum declaration in ifdefs, it trickles down unnecessary ifdefs into driver code. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c46d218a0456..44b47e9df94a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -914,34 +914,13 @@ struct xfrmdev_ops { }; #endif -#if IS_ENABLED(CONFIG_TLS_DEVICE) -enum tls_offload_ctx_dir { - TLS_OFFLOAD_CTX_DIR_RX, - TLS_OFFLOAD_CTX_DIR_TX, -}; - -struct tls_crypto_info; -struct tls_context; - -struct tlsdev_ops { - int (*tls_dev_add)(struct net_device *netdev, struct sock *sk, - enum tls_offload_ctx_dir direction, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn); - void (*tls_dev_del)(struct net_device *netdev, - struct tls_context *ctx, - enum tls_offload_ctx_dir direction); - void (*tls_dev_resync_rx)(struct net_device *netdev, - struct sock *sk, u32 seq, u64 rcd_sn); -}; -#endif - struct dev_ifalias { struct rcu_head rcuhead; char ifalias[]; }; struct devlink; +struct tlsdev_ops; /* * This structure defines the management hooks for network devices. -- cgit v1.2.3