From 32953543221cfe2bf0a24205fab225e5b8ed81a0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 5 Oct 2009 06:01:03 +0000 Subject: dcb: data center bridging ops should be r/o The data center bridging ops structure can be const Signed-off-by: Stephen Hemminger Acked-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 94958c109761..b332eefebb1b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -909,7 +909,7 @@ struct net_device #ifdef CONFIG_DCB /* Data Center Bridging netlink ops */ - struct dcbnl_rtnl_ops *dcbnl_ops; + const struct dcbnl_rtnl_ops *dcbnl_ops; #endif #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) -- cgit v1.2.3 From 44a0873d52282f24b1894c58c0f157e0f626ddc9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Oct 2009 07:03:04 +0000 Subject: net: Introduce unregister_netdevice_queue() This patchs adds an unreg_list anchor to struct net_device, and introduces an unregister_netdevice_queue() function, able to queue a net_device to a list instead of immediately unregister it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 83800091a31a..0ded0a4768a0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -683,6 +683,7 @@ struct net_device struct list_head dev_list; struct list_head napi_list; + struct list_head unreg_list; /* Net device features */ unsigned long features; @@ -1116,7 +1117,13 @@ extern int dev_close(struct net_device *dev); extern void dev_disable_lro(struct net_device *dev); extern int dev_queue_xmit(struct sk_buff *skb); extern int register_netdevice(struct net_device *dev); -extern void unregister_netdevice(struct net_device *dev); +extern void unregister_netdevice_queue(struct net_device *dev, + struct list_head *head); +static inline void unregister_netdevice(struct net_device *dev) +{ + unregister_netdevice_queue(dev, NULL); +} + extern void free_netdev(struct net_device *dev); extern void synchronize_net(void); extern int register_netdevice_notifier(struct notifier_block *nb); -- cgit v1.2.3 From 9b5e383c11b08784eb0087617f880077982ef769 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Oct 2009 07:04:19 +0000 Subject: net: Introduce unregister_netdevice_many() Introduce rollback_registered_many() and unregister_netdevice_many() rollback_registered_many() is able to perform necessary steps at device dismantle time, factorizing two expensive synchronize_net() calls. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0ded0a4768a0..e7c227d7cb98 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1119,6 +1119,7 @@ extern int dev_queue_xmit(struct sk_buff *skb); extern int register_netdevice(struct net_device *dev); extern void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); +extern void unregister_netdevice_many(struct list_head *head); static inline void unregister_netdevice(struct net_device *dev) { unregister_netdevice_queue(dev, NULL); -- cgit v1.2.3 From df5c79452f26f2a3d0883a213102515cfeb7aae9 Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Wed, 28 Oct 2009 18:24:35 +0000 Subject: net: Add ndo_fcoe_get_wwn to net_device_ops Add ndo_fcoe_get_wwn so Fiber Channel over Ethernet (FCoE) can make use of the provided World Wide Port Name (WWPN) and World Wide Node Name (WWNN) from the underlying network interface driver. Signed-off-by: Yi Zou Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e7c227d7cb98..656110a46e96 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -635,6 +635,10 @@ struct net_device_ops { unsigned int sgc); int (*ndo_fcoe_ddp_done)(struct net_device *dev, u16 xid); +#define NETDEV_FCOE_WWNN 0 +#define NETDEV_FCOE_WWPN 1 + int (*ndo_fcoe_get_wwn)(struct net_device *dev, + u64 *wwn, int type); #endif }; -- cgit v1.2.3 From fb699dfd426a189fe33b91586c15176a75c8aed0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Oct 2009 19:18:49 +0000 Subject: net: Introduce dev_get_by_index_rcu() Some workloads hit dev_base_lock rwlock pretty hard. We can use RCU lookups to avoid touching this rwlock. netdevices are already freed after a RCU grace period, so this patch adds no penalty at device dismantle time. dev_ifname() converted to dev_get_by_index_rcu() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 656110a46e96..ffc3106cc037 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1139,6 +1139,7 @@ extern void netdev_resync_ops(struct net_device *dev); extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); extern struct net_device *dev_get_by_index(struct net *net, int ifindex); extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); +extern struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); extern int dev_restart(struct net_device *dev); #ifdef CONFIG_NETPOLL_TRAP extern int netpoll_trap(void); -- cgit v1.2.3 From 5b252f0c2f98df21fadf0f6cf189b87a0b938228 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 29 Oct 2009 07:17:09 +0000 Subject: gro: Name the GRO result enumeration type This clarifies which return and parameter types are GRO result codes and not RX result codes. Signed-off-by: Ben Hutchings Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ffc3106cc037..6e777efe149e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -348,13 +348,14 @@ enum NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ }; -enum { +enum gro_result { GRO_MERGED, GRO_MERGED_FREE, GRO_HELD, GRO_NORMAL, GRO_DROP, }; +typedef enum gro_result gro_result_t; extern void __napi_schedule(struct napi_struct *n); @@ -1480,16 +1481,17 @@ extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); extern void napi_gro_flush(struct napi_struct *napi); -extern int dev_gro_receive(struct napi_struct *napi, +extern gro_result_t dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb); -extern int napi_skb_finish(int ret, struct sk_buff *skb); +extern int napi_skb_finish(gro_result_t ret, struct sk_buff *skb); extern int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); extern void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb); extern struct sk_buff * napi_get_frags(struct napi_struct *napi); extern int napi_frags_finish(struct napi_struct *napi, - struct sk_buff *skb, int ret); + struct sk_buff *skb, + gro_result_t ret); extern struct sk_buff * napi_frags_skb(struct napi_struct *napi); extern int napi_gro_frags(struct napi_struct *napi); -- cgit v1.2.3 From c7c4b3b6e976b95facbb723951bdcd554a3530a4 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 29 Oct 2009 21:36:53 -0700 Subject: gro: Change all receive functions to return GRO result codes This will allow drivers to adjust their receive path dynamically based on whether GRO is being applied successfully. Currently all in-tree callers ignore the return values of these functions and do not need to be changed. Signed-off-by: Ben Hutchings Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6e777efe149e..193b637889f9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1483,17 +1483,17 @@ extern int netif_receive_skb(struct sk_buff *skb); extern void napi_gro_flush(struct napi_struct *napi); extern gro_result_t dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb); -extern int napi_skb_finish(gro_result_t ret, struct sk_buff *skb); -extern int napi_gro_receive(struct napi_struct *napi, +extern gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb); +extern gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); extern void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb); extern struct sk_buff * napi_get_frags(struct napi_struct *napi); -extern int napi_frags_finish(struct napi_struct *napi, +extern gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, gro_result_t ret); extern struct sk_buff * napi_frags_skb(struct napi_struct *napi); -extern int napi_gro_frags(struct napi_struct *napi); +extern gro_result_t napi_gro_frags(struct napi_struct *napi); static inline void napi_free_frags(struct napi_struct *napi) { -- cgit v1.2.3 From 0c509a6c9393b27a8c5a01acd4a72616206cfc24 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 29 Oct 2009 14:18:21 +0000 Subject: net: Allow devices to specify a device specific sysfs group. This isn't beautifully abstracted, but it is simple, simplifies uses and so far is only needed for the bonding driver. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 193b637889f9..e5ece8dceaad 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -900,8 +900,8 @@ struct net_device /* class/net/name entry */ struct device dev; - /* space for optional statistics and wireless sysfs groups */ - const struct attribute_group *sysfs_groups[3]; + /* space for optional device, statistics, and wireless sysfs groups */ + const struct attribute_group *sysfs_groups[4]; /* rtnetlink link ops */ const struct rtnl_link_ops *rtnl_link_ops; -- cgit v1.2.3 From 72c9528bab94cc052d00ce241b8e85f5d71e45f0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 30 Oct 2009 07:11:27 +0000 Subject: net: Introduce dev_get_by_name_rcu() Some workloads hit dev_base_lock rwlock pretty hard. We can use RCU lookups to avoid touching this rwlock (and avoid touching netdevice refcount) netdevices are already freed after a RCU grace period, so this patch adds no penalty at device dismantle time. However, it adds a synchronize_rcu() call in dev_change_name() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e5ece8dceaad..bcf1083857fc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1115,6 +1115,7 @@ extern void __dev_remove_pack(struct packet_type *pt); extern struct net_device *dev_get_by_flags(struct net *net, unsigned short flags, unsigned short mask); extern struct net_device *dev_get_by_name(struct net *net, const char *name); +extern struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); extern struct net_device *__dev_get_by_name(struct net *net, const char *name); extern int dev_alloc_name(struct net_device *dev, const char *name); extern int dev_open(struct net_device *dev); -- cgit v1.2.3 From c6d14c84566d6b70ad9dc1618db0dec87cca9300 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Nov 2009 05:43:23 -0800 Subject: net: Introduce for_each_netdev_rcu() iterator Adds RCU management to the list of netdevices. Convert some for_each_netdev() users to RCU version, if it can avoid read_lock-ing dev_base_lock Ie: read_lock(&dev_base_loack); for_each_netdev(net, dev) some_action(); read_unlock(&dev_base_lock); becomes : rcu_read_lock(); for_each_netdev_rcu(net, dev) some_action(); rcu_read_unlock(); Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bcf1083857fc..5077de028317 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1081,6 +1081,8 @@ extern rwlock_t dev_base_lock; /* Device list lock */ #define for_each_netdev(net, d) \ list_for_each_entry(d, &(net)->dev_base_head, dev_list) +#define for_each_netdev_rcu(net, d) \ + list_for_each_entry_rcu(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_safe(net, d, n) \ list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) #define for_each_netdev_continue(net, d) \ -- cgit v1.2.3 From d94d9fee9fa4e66a0b91640a694b8b10177075b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Nov 2009 09:50:58 -0800 Subject: net: cleanup include/linux This cleanup patch puts struct/union/enum opening braces, in first line to ease grep games. struct something { becomes : struct something { Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5077de028317..465add6c43e3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -125,8 +125,7 @@ typedef enum netdev_tx netdev_tx_t; * with byte counters. */ -struct net_device_stats -{ +struct net_device_stats { unsigned long rx_packets; /* total packets received */ unsigned long tx_packets; /* total packets transmitted */ unsigned long rx_bytes; /* total bytes received */ @@ -179,8 +178,7 @@ struct neighbour; struct neigh_parms; struct sk_buff; -struct netif_rx_stats -{ +struct netif_rx_stats { unsigned total; unsigned dropped; unsigned time_squeeze; @@ -189,8 +187,7 @@ struct netif_rx_stats DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat); -struct dev_addr_list -{ +struct dev_addr_list { struct dev_addr_list *next; u8 da_addr[MAX_ADDR_LEN]; u8 da_addrlen; @@ -227,8 +224,7 @@ struct netdev_hw_addr_list { int count; }; -struct hh_cache -{ +struct hh_cache { struct hh_cache *hh_next; /* Next entry */ atomic_t hh_refcnt; /* number of users */ /* @@ -291,8 +287,7 @@ struct header_ops { * code. */ -enum netdev_state_t -{ +enum netdev_state_t { __LINK_STATE_START, __LINK_STATE_PRESENT, __LINK_STATE_NOCARRIER, @@ -341,8 +336,7 @@ struct napi_struct { struct sk_buff *skb; }; -enum -{ +enum { NAPI_STATE_SCHED, /* Poll is scheduled */ NAPI_STATE_DISABLE, /* Disable pending */ NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ @@ -458,8 +452,7 @@ static inline void napi_synchronize(const struct napi_struct *n) # define napi_synchronize(n) barrier() #endif -enum netdev_queue_state_t -{ +enum netdev_queue_state_t { __QUEUE_STATE_XOFF, __QUEUE_STATE_FROZEN, }; @@ -653,8 +646,7 @@ struct net_device_ops { * moves out. */ -struct net_device -{ +struct net_device { /* * This is the first field of the "visible" part of this structure @@ -1229,8 +1221,7 @@ static inline int unregister_gifconf(unsigned int family) * Incoming packets are placed on per-cpu queues so that * no locking is needed. */ -struct softnet_data -{ +struct softnet_data { struct Qdisc *output_queue; struct sk_buff_head input_pkt_queue; struct list_head poll_list; @@ -1627,7 +1618,8 @@ static inline int netif_dormant(const struct net_device *dev) * * Check if carrier is operational */ -static inline int netif_oper_up(const struct net_device *dev) { +static inline int netif_oper_up(const struct net_device *dev) +{ return (dev->operstate == IF_OPER_UP || dev->operstate == IF_OPER_UNKNOWN /* backward compat */); } -- cgit v1.2.3 From 254245d23396aca1f9100d500163d7bd6019ab6f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 10 Nov 2009 07:54:47 +0000 Subject: netdev: add netdev_continue_rcu This adds an RCU macro for continuing search, useful for some network devices like vlan. Signed-off-by: Stephen Hemminger Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 465add6c43e3..083b5989cecb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1079,6 +1079,8 @@ extern rwlock_t dev_base_lock; /* Device list lock */ list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) #define for_each_netdev_continue(net, d) \ list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) +#define for_each_netdev_continue_rcu(net, d) \ + list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) static inline struct net_device *next_net_device(struct net_device *dev) -- cgit v1.2.3 From 572a9d7b6fc7f20f573664063324c086be310c42 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 10 Nov 2009 06:14:14 +0000 Subject: net: allow to propagate errors through ->ndo_hard_start_xmit() Currently the ->ndo_hard_start_xmit() callbacks are only permitted to return one of the NETDEV_TX codes. This prevents any kind of error propagation for virtual devices, like queue congestion of the underlying device in case of layered devices, or unreachability in case of tunnels. This patches changes the NET_XMIT codes to avoid clashes with the NETDEV_TX codes and changes the two callers of dev_hard_start_xmit() to expect either errno codes, NET_XMIT codes or NETDEV_TX codes as return value. In case of qdisc_restart(), all non NETDEV_TX codes are mapped to NETDEV_TX_OK since no error propagation is possible when using qdiscs. In case of dev_queue_xmit(), the error is propagated upwards. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 083b5989cecb..8b266390b9e2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -63,27 +63,48 @@ struct wireless_dev; #define HAVE_FREE_NETDEV /* free_netdev() */ #define HAVE_NETDEV_PRIV /* netdev_priv() */ -#define NET_XMIT_SUCCESS 0 -#define NET_XMIT_DROP 1 /* skb dropped */ -#define NET_XMIT_CN 2 /* congestion notification */ -#define NET_XMIT_POLICED 3 /* skb is shot by police */ -#define NET_XMIT_MASK 0xFFFF /* qdisc flags in net/sch_generic.h */ +/* + * Transmit return codes: transmit return codes originate from three different + * namespaces: + * + * - qdisc return codes + * - driver transmit return codes + * - errno values + * + * Drivers are allowed to return any one of those in their hard_start_xmit() + * function. Real network devices commonly used with qdiscs should only return + * the driver transmit return codes though - when qdiscs are used, the actual + * transmission happens asynchronously, so the value is not propagated to + * higher layers. Virtual network devices transmit synchronously, in this case + * the driver transmit return codes are consumed by dev_queue_xmit(), all + * others are propagated to higher layers. + */ + +/* qdisc ->enqueue() return codes. */ +#define NET_XMIT_SUCCESS 0x00 +#define NET_XMIT_DROP 0x10 /* skb dropped */ +#define NET_XMIT_CN 0x20 /* congestion notification */ +#define NET_XMIT_POLICED 0x30 /* skb is shot by police */ +#define NET_XMIT_MASK 0xf0 /* qdisc flags in net/sch_generic.h */ /* Backlog congestion levels */ -#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ -#define NET_RX_DROP 1 /* packet dropped */ +#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ +#define NET_RX_DROP 1 /* packet dropped */ /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It * indicates that the device will soon be dropping packets, or already drops * some packets of the same priority; prompting us to send less aggressively. */ -#define net_xmit_eval(e) ((e) == NET_XMIT_CN? 0 : (e)) +#define net_xmit_eval(e) ((e) == NET_XMIT_CN ? 0 : (e)) #define net_xmit_errno(e) ((e) != NET_XMIT_CN ? -ENOBUFS : 0) /* Driver transmit return codes */ +#define NETDEV_TX_MASK 0xf + enum netdev_tx { - NETDEV_TX_OK = 0, /* driver took care of packet */ - NETDEV_TX_BUSY, /* driver tx path was busy*/ - NETDEV_TX_LOCKED = -1, /* driver tx lock was already taken */ + __NETDEV_TX_MIN = INT_MIN, /* make sure enum is signed */ + NETDEV_TX_OK = 0, /* driver took care of packet */ + NETDEV_TX_BUSY = 1, /* driver tx path was busy*/ + NETDEV_TX_LOCKED = 2, /* driver tx lock was already taken */ }; typedef enum netdev_tx netdev_tx_t; -- cgit v1.2.3 From ce81b76a39835a721cd168e0c0bcfe7132f1f66b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Nov 2009 17:34:30 +0000 Subject: ipv6: use RCU to walk list of network devices No longer need read_lock(&dev_base_lock), use RCU instead. We also can avoid taking references on inet6_dev structs. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8b266390b9e2..61425d0c6123 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1114,6 +1114,16 @@ static inline struct net_device *next_net_device(struct net_device *dev) return lh == &net->dev_base_head ? NULL : net_device_entry(lh); } +static inline struct net_device *next_net_device_rcu(struct net_device *dev) +{ + struct list_head *lh; + struct net *net; + + net = dev_net(dev); + lh = rcu_dereference(dev->dev_list.next); + return lh == &net->dev_base_head ? NULL : net_device_entry(lh); +} + static inline struct net_device *first_net_device(struct net *net) { return list_empty(&net->dev_base_head) ? NULL : -- cgit v1.2.3 From 9a1654ba0b50402a6bd03c7b0fe9b0200a5ea7b1 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sun, 15 Nov 2009 07:20:12 +0000 Subject: net: Optimize hard_start_xmit() return checking Recent changes in the TX error propagation require additional checking and masking of values returned from hard_start_xmit(), mainly to separate cases where skb was consumed. This aim can be simplified by changing the order of NETDEV_TX and NET_XMIT codes, because the latter are treated similarly to negative (ERRNO) values. After this change much simpler dev_xmit_complete() is also used in sch_direct_xmit(), so it is moved to netdevice.h. Additionally NET_RX definitions in netdevice.h are moved up from between TX codes to avoid confusion while reading the TX comment. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 61425d0c6123..7043f85e643d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -63,6 +63,10 @@ struct wireless_dev; #define HAVE_FREE_NETDEV /* free_netdev() */ #define HAVE_NETDEV_PRIV /* netdev_priv() */ +/* Backlog congestion levels */ +#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ +#define NET_RX_DROP 1 /* packet dropped */ + /* * Transmit return codes: transmit return codes originate from three different * namespaces: @@ -82,14 +86,10 @@ struct wireless_dev; /* qdisc ->enqueue() return codes. */ #define NET_XMIT_SUCCESS 0x00 -#define NET_XMIT_DROP 0x10 /* skb dropped */ -#define NET_XMIT_CN 0x20 /* congestion notification */ -#define NET_XMIT_POLICED 0x30 /* skb is shot by police */ -#define NET_XMIT_MASK 0xf0 /* qdisc flags in net/sch_generic.h */ - -/* Backlog congestion levels */ -#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ -#define NET_RX_DROP 1 /* packet dropped */ +#define NET_XMIT_DROP 0x01 /* skb dropped */ +#define NET_XMIT_CN 0x02 /* congestion notification */ +#define NET_XMIT_POLICED 0x03 /* skb is shot by police */ +#define NET_XMIT_MASK 0x0f /* qdisc flags in net/sch_generic.h */ /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It * indicates that the device will soon be dropping packets, or already drops @@ -98,16 +98,34 @@ struct wireless_dev; #define net_xmit_errno(e) ((e) != NET_XMIT_CN ? -ENOBUFS : 0) /* Driver transmit return codes */ -#define NETDEV_TX_MASK 0xf +#define NETDEV_TX_MASK 0xf0 enum netdev_tx { __NETDEV_TX_MIN = INT_MIN, /* make sure enum is signed */ - NETDEV_TX_OK = 0, /* driver took care of packet */ - NETDEV_TX_BUSY = 1, /* driver tx path was busy*/ - NETDEV_TX_LOCKED = 2, /* driver tx lock was already taken */ + NETDEV_TX_OK = 0x00, /* driver took care of packet */ + NETDEV_TX_BUSY = 0x10, /* driver tx path was busy*/ + NETDEV_TX_LOCKED = 0x20, /* driver tx lock was already taken */ }; typedef enum netdev_tx netdev_tx_t; +/* + * Current order: NETDEV_TX_MASK > NET_XMIT_MASK >= 0 is significant; + * hard_start_xmit() return < NET_XMIT_MASK means skb was consumed. + */ +static inline bool dev_xmit_complete(int rc) +{ + /* + * Positive cases with an skb consumed by a driver: + * - successful transmission (rc == NETDEV_TX_OK) + * - error while transmitting (rc < 0) + * - error while queueing to a different device (rc & NET_XMIT_MASK) + */ + if (likely(rc < NET_XMIT_MASK)) + return true; + + return false; +} + #endif #define MAX_ADDR_LEN 32 /* Largest hardware address length */ -- cgit v1.2.3 From d83345adf96bc13a5e360f4649a2e68ef968dec0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Nov 2009 03:36:51 +0000 Subject: net: add dev_txq_stats_fold() helper Some drivers ndo_get_stats() method need to perform txqueue stats folding. Move folding from dev_get_stats() to a new dev_txq_stats_fold() function Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7043f85e643d..c8fa4627de00 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1941,6 +1941,7 @@ extern void netdev_features_change(struct net_device *dev); extern void dev_load(struct net *net, const char *name); extern void dev_mcast_init(void); extern const struct net_device_stats *dev_get_stats(struct net_device *dev); +extern void dev_txq_stats_fold(const struct net_device *dev, struct net_device_stats *stats); extern int netdev_max_backlog; extern int weight_p; -- cgit v1.2.3 From e014debecd3ee3832e6476b3a9c948edfcfd1250 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Nov 2009 05:59:21 +0000 Subject: linkwatch: linkwatch_forget_dev() to speedup device dismantle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c8fa4627de00..97873e31661c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -896,7 +896,7 @@ struct net_device { /* device index hash chain */ struct hlist_node index_hlist; - struct net_device *link_watch_next; + struct list_head link_watch_list; /* register/unregister state machine */ enum { NETREG_UNINITIALIZED=0, @@ -1600,6 +1600,7 @@ static inline void dev_hold(struct net_device *dev) */ extern void linkwatch_fire_event(struct net_device *dev); +extern void linkwatch_forget_dev(struct net_device *dev); /** * netif_carrier_ok - test if carrier present -- cgit v1.2.3 From 445409602c09219767c06497c0dc2285eac244ed Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 26 Nov 2009 06:07:08 +0000 Subject: veth: move loopback logic to common location The veth driver contains code to forward an skb from the start_xmit function of one network device into the receive path of another device. Moving that code into a common location lets us reuse the code for direct forwarding of data between macvlan ports, and possibly in other drivers. Signed-off-by: Arnd Bergmann Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 97873e31661c..9428793775a0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1562,6 +1562,8 @@ extern int dev_set_mac_address(struct net_device *, extern int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq); +extern int dev_forward_skb(struct net_device *dev, + struct sk_buff *skb); extern int netdev_budget; -- cgit v1.2.3 From dcbccbd4f1f6ad0f0e169d4b2e816e42bde06f82 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 29 Nov 2009 22:25:26 +0000 Subject: net: Implement for_each_netdev_reverse. I will need this shortly to implement network namespace shutdown batching. For sanity sake network devices should be removed in the reverse order they were created in. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9428793775a0..daf13d367498 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1112,6 +1112,8 @@ extern rwlock_t dev_base_lock; /* Device list lock */ #define for_each_netdev(net, d) \ list_for_each_entry(d, &(net)->dev_base_head, dev_list) +#define for_each_netdev_reverse(net, d) \ + list_for_each_entry_reverse(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_rcu(net, d) \ list_for_each_entry_rcu(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_safe(net, d, n) \ -- cgit v1.2.3 From fc4a7489663250360cd40d5adf06a08d1c5d54df Mon Sep 17 00:00:00 2001 From: Patrick Mullaney Date: Thu, 3 Dec 2009 15:59:22 -0800 Subject: netdevice: provide common routine for macvlan and vlan operstate management Provide common routine for the transition of operational state for a leaf device during a root device transition. Signed-off-by: Patrick Mullaney Acked-by: Arnd Bergmann Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index daf13d367498..a3fccc85b1a0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1981,6 +1981,9 @@ unsigned long netdev_increment_features(unsigned long all, unsigned long one, unsigned long mask); unsigned long netdev_fix_features(unsigned long features, const char *name); +void netif_stacked_transfer_operstate(const struct net_device *rootdev, + struct net_device *dev); + static inline int net_gso_ok(int features, int gso_type) { int feature = gso_type << NETIF_F_GSO_SHIFT; -- cgit v1.2.3