From c1da4ac752b8b0411791d26c678fcf23d2eed242 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 13 Jun 2008 18:12:00 -0700 Subject: net/core: add NETDEV_BONDING_FAILOVER event Add NETDEV_BONDING_FAILOVER event to be used in a successive patch by bonding to announce fail-over for the active-backup mode through the netdev events notifier chain mechanism. Such an event can be of use for the RDMA CM (communication manager) to let native RDMA ULPs (eg NFS-RDMA, iSER) always be aligned with the IP stack, in the sense that they use the same ports/links as the stack does. More usages can be done to allow monitoring tools based on netlink events being aware to bonding fail-over. Signed-off-by: Or Gerlitz Signed-off-by: Jay Vosburgh Signed-off-by: Jeff Garzik --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f27fd2009334..e92fc839ab1d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1479,6 +1479,7 @@ extern void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, struct extern void dev_set_promiscuity(struct net_device *dev, int inc); extern void dev_set_allmulti(struct net_device *dev, int inc); extern void netdev_state_change(struct net_device *dev); +extern void netdev_bonding_change(struct net_device *dev); extern void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ extern void dev_load(struct net *net, const char *name); -- cgit v1.2.3 From b8a9787eddb0e4665f31dd1d64584732b2b5d051 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 13 Jun 2008 18:12:04 -0700 Subject: bonding: Allow setting max_bonds to zero Permit bonding to function rationally if max_bonds is set to zero. This will load the module, but create no master devices (which can be created via sysfs). Requires some change to bond_create_sysfs; currently, the netdev sysfs directory is determined from the first bonding device created, but this is no longer possible. Instead, an interface from net/core is created to create and destroy files in net_class. Based on a patch submitted by Phil Oester . Modified by Jay Vosburgh to fix the sysfs issue mentioned above and to update the documentation. Signed-off-by: Phil Oester Signed-off-by: Jay Vosburgh Signed-off-by: Jeff Garzik --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e92fc839ab1d..9ccbfac3fd95 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1506,6 +1506,9 @@ extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); extern void dev_seq_stop(struct seq_file *seq, void *v); #endif +extern int netdev_class_create_file(struct class_attribute *class_attr); +extern void netdev_class_remove_file(struct class_attribute *class_attr); + extern void linkwatch_run_queue(void); extern int netdev_compute_features(unsigned long all, unsigned long one); -- cgit v1.2.3 From 9d45abe1c2949183e5d9cb25721bf1c42c7b5e3b Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Tue, 17 Jun 2008 21:12:48 -0700 Subject: netdevice: change net_device->promiscuity/allmulti to unsigned int The comments of dev_set_allmulti/promiscuity() is that "While the count in the device remains above zero...". So negative count is useless. Fix the type of the counter from "int" to "unsigned int". Signed-off-by: Wang Chen Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f27fd2009334..06d8ea5992df 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -595,8 +595,8 @@ struct net_device int uc_promisc; struct dev_addr_list *mc_list; /* Multicast mac addresses */ int mc_count; /* Number of installed mcasts */ - int promiscuity; - int allmulti; + unsigned int promiscuity; + unsigned int allmulti; /* Protocol specific pointers */ -- cgit v1.2.3 From dad9b335c6940de2746a9788eb456d09cf102f81 Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Wed, 18 Jun 2008 01:48:28 -0700 Subject: netdevice: Fix promiscuity and allmulti overflow Max of promiscuity and allmulti plus positive @inc can cause overflow. Fox example: when allmulti=0xFFFFFFFF, any caller give dev_set_allmulti() a positive @inc will cause allmulti be off. This is not what we want, though it's rare case. The fix is that only negative @inc will cause allmulti or promiscuity be off and when any caller makes the counters touch the roof, we return error. Change of v2: Change void function dev_set_promiscuity/allmulti to return int. So callers can get the overflow error. Caller's fix will be done later. Change of v3: 1. Since we return error to caller, we don't need to print KERN_ERROR, KERN_WARNING is enough. 2. In dev_set_promiscuity(), if __dev_set_promiscuity() failed, we return at once. Signed-off-by: Wang Chen Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4bf613cd9e2d..45dce2b58d4c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1476,8 +1476,8 @@ extern int __dev_addr_delete(struct dev_addr_list **list, int *count, void *ad extern int __dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly); extern int __dev_addr_sync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count); extern void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count); -extern void dev_set_promiscuity(struct net_device *dev, int inc); -extern void dev_set_allmulti(struct net_device *dev, int inc); +extern int dev_set_promiscuity(struct net_device *dev, int inc); +extern int dev_set_allmulti(struct net_device *dev, int inc); extern void netdev_state_change(struct net_device *dev); extern void netdev_bonding_change(struct net_device *dev); extern void netdev_features_change(struct net_device *dev); -- cgit v1.2.3 From 0187bdfb05674147774ca79a79942537f3ad54bd Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 19 Jun 2008 16:15:47 -0700 Subject: net: Disable LRO on devices that are forwarding Large Receive Offload (LRO) is only appropriate for packets that are destined for the host, and should be disabled if received packets may be forwarded. It can also confuse the GSO on output. Add dev_disable_lro() function which uses the appropriate ethtool ops to disable LRO if enabled. Add calls to dev_disable_lro() in br_add_if() and functions that enable IPv4 and IPv6 forwarding. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 45dce2b58d4c..1304ad2d7105 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -886,6 +886,7 @@ extern struct net_device *__dev_get_by_name(struct net *net, const char *name); extern int dev_alloc_name(struct net_device *dev, const char *name); extern int dev_open(struct net_device *dev); extern int dev_close(struct net_device *dev); +extern void dev_disable_lro(struct net_device *dev); extern int dev_queue_xmit(struct sk_buff *skb); extern int register_netdevice(struct net_device *dev); extern void unregister_netdevice(struct net_device *dev); -- cgit v1.2.3 From eca9ebac651f774d8b10fce7c5d173c3c3d3394f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 5 Jul 2008 21:26:13 -0700 Subject: net: Add GARP applicant-only participant Add an implementation of the GARP (Generic Attribute Registration Protocol) applicant-only participant. This will be used by the following patch to add GVRP support to the VLAN code. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 56dadb528f67..e009c6fbf5cd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -740,6 +740,8 @@ struct net_device struct net_bridge_port *br_port; /* macvlan */ struct macvlan_port *macvlan_port; + /* GARP */ + struct garp_port *garp_port; /* class/net/name entry */ struct device dev; -- cgit v1.2.3 From bb949fbd1878973c3539d9aecff52f284482a937 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 16:55:56 -0700 Subject: netdev: Create netdev_queue abstraction. A netdev_queue is an entity managed by a qdisc. Currently there is one RX and one TX queue, and a netdev_queue merely contains a backpointer to the net_device. The Qdisc struct is augmented with a netdev_queue pointer as well. Eventually the 'dev' Qdisc member will go away and we will have the resulting hierarchy: net_device --> netdev_queue --> Qdisc Also, qdisc_alloc() and qdisc_create_dflt() now take a netdev_queue pointer argument. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e009c6fbf5cd..515fd25bf0fc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -448,6 +448,10 @@ static inline void napi_synchronize(const struct napi_struct *n) # define napi_synchronize(n) barrier() #endif +struct netdev_queue { + struct net_device *dev; +}; + /* * The DEVICE structure. * Actually, this whole structure is a big mistake. It mixes I/O @@ -624,6 +628,9 @@ struct net_device unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ + struct netdev_queue rx_queue; + struct netdev_queue tx_queue; + /* ingress path synchronizer */ spinlock_t ingress_lock; struct Qdisc *qdisc_ingress; -- cgit v1.2.3 From dc2b48475a0a36f8b3bbb2da60d3a006dc5c2c84 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 17:18:23 -0700 Subject: netdev: Move queue_lock into struct netdev_queue. The lock is now an attribute of the device queue. One thing to notice is that "suspicious" places emerge which will need specific training about multiple queue handling. They are so marked with explicit "netdev->rx_queue" and "netdev->tx_queue" references. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 515fd25bf0fc..e835acacb479 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -449,6 +449,7 @@ static inline void napi_synchronize(const struct napi_struct *n) #endif struct netdev_queue { + spinlock_t lock; struct net_device *dev; }; @@ -629,7 +630,7 @@ struct net_device unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ struct netdev_queue rx_queue; - struct netdev_queue tx_queue; + struct netdev_queue tx_queue ____cacheline_aligned_in_smp; /* ingress path synchronizer */ spinlock_t ingress_lock; @@ -639,7 +640,6 @@ struct net_device * Cache line mostly used on queue transmit path (qdisc) */ /* device queue lock */ - spinlock_t queue_lock ____cacheline_aligned_in_smp; struct Qdisc *qdisc; struct Qdisc *qdisc_sleeping; struct list_head qdisc_list; -- cgit v1.2.3 From 555353cfa1aee293de445bfa6de43276138ddd82 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 17:33:13 -0700 Subject: netdev: The ingress_lock member is no longer needed. Every qdisc is assosciated with a queue, and in the case of ingress qdiscs that will now be netdev->rx_queue so using that queue's lock is the thing to do. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e835acacb479..633a44c6fa5e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -632,8 +632,6 @@ struct net_device struct netdev_queue rx_queue; struct netdev_queue tx_queue ____cacheline_aligned_in_smp; - /* ingress path synchronizer */ - spinlock_t ingress_lock; struct Qdisc *qdisc_ingress; /* -- cgit v1.2.3 From b0e1e6462df3c5944010b3328a546d8fe5d932cd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 17:42:10 -0700 Subject: netdev: Move rest of qdisc state into struct netdev_queue Now qdisc, qdisc_sleeping, and qdisc_list also live there. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 633a44c6fa5e..df702a7b3db5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -451,6 +451,9 @@ static inline void napi_synchronize(const struct napi_struct *n) struct netdev_queue { spinlock_t lock; struct net_device *dev; + struct Qdisc *qdisc; + struct Qdisc *qdisc_sleeping; + struct list_head qdisc_list; }; /* @@ -634,13 +637,6 @@ struct net_device struct Qdisc *qdisc_ingress; -/* - * Cache line mostly used on queue transmit path (qdisc) - */ - /* device queue lock */ - struct Qdisc *qdisc; - struct Qdisc *qdisc_sleeping; - struct list_head qdisc_list; unsigned long tx_queue_len; /* Max frames per queue allowed */ /* Partially transmitted GSO packet. */ -- cgit v1.2.3 From 816f3258e70db38d6d92c8d871377179fd69160f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 22:49:00 -0700 Subject: netdev: Kill qdisc_ingress, use netdev->rx_queue.qdisc instead. Now that our qdisc management is bi-directional, per-queue, and fully orthogonal, there is no reason to have a special ingress qdisc pointer in struct net_device. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index df702a7b3db5..e7c49246fd88 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -634,9 +634,6 @@ struct net_device struct netdev_queue rx_queue; struct netdev_queue tx_queue ____cacheline_aligned_in_smp; - - struct Qdisc *qdisc_ingress; - unsigned long tx_queue_len; /* Max frames per queue allowed */ /* Partially transmitted GSO packet. */ -- cgit v1.2.3 From ee609cb36220d18c0cf476b066a5ab7e6f6d3a69 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 22:58:37 -0700 Subject: netdev: Move next_sched into struct netdev_queue. We schedule queues, not the device, for output queue processing in BH. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e7c49246fd88..1379c822e51d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -454,6 +454,7 @@ struct netdev_queue { struct Qdisc *qdisc; struct Qdisc *qdisc_sleeping; struct list_head qdisc_list; + struct netdev_queue *next_sched; }; /* @@ -545,8 +546,6 @@ struct net_device #define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) #define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) - struct net_device *next_sched; - /* Interface index. Unique device identifier */ int ifindex; int iflink; @@ -940,7 +939,7 @@ static inline int unregister_gifconf(unsigned int family) */ struct softnet_data { - struct net_device *output_queue; + struct netdev_queue *output_queue; struct sk_buff_head input_pkt_queue; struct list_head poll_list; struct sk_buff *completion_queue; -- cgit v1.2.3 From 970565bbad0c7b98db0d14131a69e5a0f4445d49 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 23:10:33 -0700 Subject: netdev: Move gso_skb into netdev_queue. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1379c822e51d..aae6c6d153f2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -452,6 +452,7 @@ struct netdev_queue { spinlock_t lock; struct net_device *dev; struct Qdisc *qdisc; + struct sk_buff *gso_skb; struct Qdisc *qdisc_sleeping; struct list_head qdisc_list; struct netdev_queue *next_sched; @@ -635,9 +636,6 @@ struct net_device struct netdev_queue tx_queue ____cacheline_aligned_in_smp; unsigned long tx_queue_len; /* Max frames per queue allowed */ - /* Partially transmitted GSO packet. */ - struct sk_buff *gso_skb; - /* * One part is mostly used on xmit path (device) */ -- cgit v1.2.3 From 86d804e10a37cd86f16bf72386c37e843a98a74b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 23:11:25 -0700 Subject: netdev: Make netif_schedule() routines work with netdev_queue objects. Only plain netif_schedule() remains taking a net_device, mostly as a compatability item while we transition the rest of these interfaces. Everything else calls netif_schedule_queue() or __netif_schedule(), both of which take a netdev_queue pointer. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aae6c6d153f2..28aa8e77cee9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -952,12 +952,19 @@ DECLARE_PER_CPU(struct softnet_data,softnet_data); #define HAVE_NETIF_QUEUE -extern void __netif_schedule(struct net_device *dev); +extern void __netif_schedule(struct netdev_queue *txq); -static inline void netif_schedule(struct net_device *dev) +static inline void netif_schedule_queue(struct netdev_queue *txq) { + struct net_device *dev = txq->dev; + if (!test_bit(__LINK_STATE_XOFF, &dev->state)) - __netif_schedule(dev); + __netif_schedule(txq); +} + +static inline void netif_schedule(struct net_device *dev) +{ + netif_schedule_queue(&dev->tx_queue); } /** @@ -987,7 +994,7 @@ static inline void netif_wake_queue(struct net_device *dev) } #endif if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) - __netif_schedule(dev); + __netif_schedule(&dev->tx_queue); } /** @@ -1103,7 +1110,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) #endif if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state)) - __netif_schedule(dev); + __netif_schedule(&dev->tx_queue); #endif } -- cgit v1.2.3 From c773e847ea8f6812804e40f52399c6921a00eab1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 23:13:53 -0700 Subject: netdev: Move _xmit_lock and xmit_lock_owner into netdev_queue. Accesses are mostly structured such that when there are multiple TX queues the code transformations will be a little bit simpler. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 62 +++++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 23 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 28aa8e77cee9..c8d5f128858d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -453,6 +453,8 @@ struct netdev_queue { struct net_device *dev; struct Qdisc *qdisc; struct sk_buff *gso_skb; + spinlock_t _xmit_lock; + int xmit_lock_owner; struct Qdisc *qdisc_sleeping; struct list_head qdisc_list; struct netdev_queue *next_sched; @@ -639,12 +641,6 @@ struct net_device /* * One part is mostly used on xmit path (device) */ - /* hard_start_xmit synchronizer */ - spinlock_t _xmit_lock ____cacheline_aligned_in_smp; - /* cpu id of processor entered to hard_start_xmit or -1, - if nobody entered there. - */ - int xmit_lock_owner; void *priv; /* pointer to private data */ int (*hard_start_xmit) (struct sk_buff *skb, struct net_device *dev); @@ -1402,52 +1398,72 @@ static inline void netif_rx_complete(struct net_device *dev, * * Get network device transmit lock */ -static inline void __netif_tx_lock(struct net_device *dev, int cpu) +static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) { - spin_lock(&dev->_xmit_lock); - dev->xmit_lock_owner = cpu; + spin_lock(&txq->_xmit_lock); + txq->xmit_lock_owner = cpu; } static inline void netif_tx_lock(struct net_device *dev) { - __netif_tx_lock(dev, smp_processor_id()); + __netif_tx_lock(&dev->tx_queue, smp_processor_id()); +} + +static inline void __netif_tx_lock_bh(struct netdev_queue *txq) +{ + spin_lock_bh(&txq->_xmit_lock); + txq->xmit_lock_owner = smp_processor_id(); } static inline void netif_tx_lock_bh(struct net_device *dev) { - spin_lock_bh(&dev->_xmit_lock); - dev->xmit_lock_owner = smp_processor_id(); + __netif_tx_lock_bh(&dev->tx_queue); } -static inline int netif_tx_trylock(struct net_device *dev) +static inline int __netif_tx_trylock(struct netdev_queue *txq) { - int ok = spin_trylock(&dev->_xmit_lock); + int ok = spin_trylock(&txq->_xmit_lock); if (likely(ok)) - dev->xmit_lock_owner = smp_processor_id(); + txq->xmit_lock_owner = smp_processor_id(); return ok; } +static inline int netif_tx_trylock(struct net_device *dev) +{ + return __netif_tx_trylock(&dev->tx_queue); +} + +static inline void __netif_tx_unlock(struct netdev_queue *txq) +{ + txq->xmit_lock_owner = -1; + spin_unlock(&txq->_xmit_lock); +} + static inline void netif_tx_unlock(struct net_device *dev) { - dev->xmit_lock_owner = -1; - spin_unlock(&dev->_xmit_lock); + __netif_tx_unlock(&dev->tx_queue); +} + +static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) +{ + txq->xmit_lock_owner = -1; + spin_unlock_bh(&txq->_xmit_lock); } static inline void netif_tx_unlock_bh(struct net_device *dev) { - dev->xmit_lock_owner = -1; - spin_unlock_bh(&dev->_xmit_lock); + __netif_tx_unlock_bh(&dev->tx_queue); } -#define HARD_TX_LOCK(dev, cpu) { \ +#define HARD_TX_LOCK(dev, txq, cpu) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ - __netif_tx_lock(dev, cpu); \ + __netif_tx_lock(txq, cpu); \ } \ } -#define HARD_TX_UNLOCK(dev) { \ +#define HARD_TX_UNLOCK(dev, txq) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ - netif_tx_unlock(dev); \ + __netif_tx_unlock(txq); \ } \ } -- cgit v1.2.3 From b19fa1fa91845234961c64dbd564671aa7c0fd27 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 23:14:24 -0700 Subject: net: Delete NETDEVICES_MULTIQUEUE kconfig option. Multiple TX queue support is a core networking feature. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c8d5f128858d..e2d931f9b700 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1043,9 +1043,7 @@ static inline int netif_running(const struct net_device *dev) */ static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) { -#ifdef CONFIG_NETDEVICES_MULTIQUEUE clear_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state); -#endif } /** @@ -1057,13 +1055,11 @@ static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) */ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) { -#ifdef CONFIG_NETDEVICES_MULTIQUEUE #ifdef CONFIG_NETPOLL_TRAP if (netpoll_trap()) return; #endif set_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state); -#endif } /** @@ -1076,12 +1072,8 @@ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) static inline int __netif_subqueue_stopped(const struct net_device *dev, u16 queue_index) { -#ifdef CONFIG_NETDEVICES_MULTIQUEUE return test_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state); -#else - return 0; -#endif } static inline int netif_subqueue_stopped(const struct net_device *dev, @@ -1099,7 +1091,6 @@ static inline int netif_subqueue_stopped(const struct net_device *dev, */ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) { -#ifdef CONFIG_NETDEVICES_MULTIQUEUE #ifdef CONFIG_NETPOLL_TRAP if (netpoll_trap()) return; @@ -1107,7 +1098,6 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state)) __netif_schedule(&dev->tx_queue); -#endif } /** @@ -1119,11 +1109,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) */ static inline int netif_is_multiqueue(const struct net_device *dev) { -#ifdef CONFIG_NETDEVICES_MULTIQUEUE return (!!(NETIF_F_MULTI_QUEUE & dev->features)); -#else - return 0; -#endif } /* Use this variant when it is known for sure that it -- cgit v1.2.3 From 79d16385c7f287a33ea771c4dbe60ae43f791b49 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 23:14:46 -0700 Subject: netdev: Move atomic queue state bits into netdev_queue. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 55 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 16 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e2d931f9b700..203c5504fe43 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -281,14 +281,12 @@ struct header_ops { enum netdev_state_t { - __LINK_STATE_XOFF=0, __LINK_STATE_START, __LINK_STATE_PRESENT, __LINK_STATE_SCHED, __LINK_STATE_NOCARRIER, __LINK_STATE_LINKWATCH_PENDING, __LINK_STATE_DORMANT, - __LINK_STATE_QDISC_RUNNING, }; @@ -448,10 +446,17 @@ static inline void napi_synchronize(const struct napi_struct *n) # define napi_synchronize(n) barrier() #endif +enum netdev_queue_state_t +{ + __QUEUE_STATE_XOFF, + __QUEUE_STATE_QDISC_RUNNING, +}; + struct netdev_queue { spinlock_t lock; struct net_device *dev; struct Qdisc *qdisc; + unsigned long state; struct sk_buff *gso_skb; spinlock_t _xmit_lock; int xmit_lock_owner; @@ -952,9 +957,7 @@ extern void __netif_schedule(struct netdev_queue *txq); static inline void netif_schedule_queue(struct netdev_queue *txq) { - struct net_device *dev = txq->dev; - - if (!test_bit(__LINK_STATE_XOFF, &dev->state)) + if (!test_bit(__QUEUE_STATE_XOFF, &txq->state)) __netif_schedule(txq); } @@ -969,9 +972,14 @@ static inline void netif_schedule(struct net_device *dev) * * Allow upper layers to call the device hard_start_xmit routine. */ +static inline void netif_tx_start_queue(struct netdev_queue *dev_queue) +{ + clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state); +} + static inline void netif_start_queue(struct net_device *dev) { - clear_bit(__LINK_STATE_XOFF, &dev->state); + netif_tx_start_queue(&dev->tx_queue); } /** @@ -981,16 +989,21 @@ static inline void netif_start_queue(struct net_device *dev) * Allow upper layers to call the device hard_start_xmit routine. * Used for flow control when transmit resources are available. */ -static inline void netif_wake_queue(struct net_device *dev) +static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) { #ifdef CONFIG_NETPOLL_TRAP if (netpoll_trap()) { - clear_bit(__LINK_STATE_XOFF, &dev->state); + clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state); return; } #endif - if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) - __netif_schedule(&dev->tx_queue); + if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state)) + __netif_schedule(dev_queue); +} + +static inline void netif_wake_queue(struct net_device *dev) +{ + netif_tx_wake_queue(&dev->tx_queue); } /** @@ -1000,9 +1013,14 @@ static inline void netif_wake_queue(struct net_device *dev) * Stop upper layers calling the device hard_start_xmit routine. * Used for flow control when transmit resources are unavailable. */ +static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) +{ + set_bit(__QUEUE_STATE_XOFF, &dev_queue->state); +} + static inline void netif_stop_queue(struct net_device *dev) { - set_bit(__LINK_STATE_XOFF, &dev->state); + netif_tx_stop_queue(&dev->tx_queue); } /** @@ -1011,9 +1029,14 @@ static inline void netif_stop_queue(struct net_device *dev) * * Test if transmit queue on device is currently unable to send. */ +static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue) +{ + return test_bit(__QUEUE_STATE_XOFF, &dev_queue->state); +} + static inline int netif_queue_stopped(const struct net_device *dev) { - return test_bit(__LINK_STATE_XOFF, &dev->state); + return netif_tx_queue_stopped(&dev->tx_queue); } /** @@ -1043,7 +1066,7 @@ static inline int netif_running(const struct net_device *dev) */ static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) { - clear_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state); + clear_bit(__QUEUE_STATE_XOFF, &dev->egress_subqueue[queue_index].state); } /** @@ -1059,7 +1082,7 @@ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) if (netpoll_trap()) return; #endif - set_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state); + set_bit(__QUEUE_STATE_XOFF, &dev->egress_subqueue[queue_index].state); } /** @@ -1072,7 +1095,7 @@ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) static inline int __netif_subqueue_stopped(const struct net_device *dev, u16 queue_index) { - return test_bit(__LINK_STATE_XOFF, + return test_bit(__QUEUE_STATE_XOFF, &dev->egress_subqueue[queue_index].state); } @@ -1095,7 +1118,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) if (netpoll_trap()) return; #endif - if (test_and_clear_bit(__LINK_STATE_XOFF, + if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev->egress_subqueue[queue_index].state)) __netif_schedule(&dev->tx_queue); } -- cgit v1.2.3 From d8156534040996f6a93a24d3592d5d587f2587e5 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 8 Jul 2008 15:13:05 -0700 Subject: net: add netif_napi_del function to allow for removal of napistructs Adds netif_napi_del function which is used to remove the napi struct from the netdev napi_list in cases where CONFIG_NETPOLL was enabled. The motivation for adding this is to handle the case in which the number of queues on a device changes due to a configuration change. Previously the napi structs for each queue would be left in the list until the netdev was freed. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Jeff Garzik --- include/linux/netdevice.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 203c5504fe43..b54ec16dfbda 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -828,6 +828,19 @@ static inline void netif_napi_add(struct net_device *dev, set_bit(NAPI_STATE_SCHED, &napi->state); } +/** + * netif_napi_del - remove a napi context + * @napi: napi context + * + * netif_napi_del() removes a napi context from the network device napi list + */ +static inline void netif_napi_del(struct napi_struct *napi) +{ +#ifdef CONFIG_NETPOLL + list_del(&napi->dev_list); +#endif +} + struct packet_type { __be16 type; /* This is really htons(ether_type). */ struct net_device *dev; /* NULL is wildcarded here */ -- cgit v1.2.3 From bc1d0411b804ad190cdadabac48a10067f17b9e6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 14 Jul 2008 22:49:30 -0700 Subject: vlan: deliver packets received with VLAN acceleration to network taps When VLAN header stripping is used, packets currently bypass packet sockets (and other network taps) completely. For locally existing VLANs, they appear directly on the VLAN device, for unknown VLANs they are silently dropped. Add a new function netif_nit_deliver() to deliver incoming packets to all network interface taps and use it in __vlan_hwaccel_rx() to make VLAN packets visible on the underlying device. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b54ec16dfbda..ba5c4639ea91 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1165,6 +1165,7 @@ extern int netif_rx(struct sk_buff *skb); extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); +extern void netif_nit_deliver(struct sk_buff *skb); extern int dev_valid_name(const char *name); extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); extern int dev_ethtool(struct net *net, struct ifreq *); -- cgit v1.2.3 From f1f28aa3510ddb84c966bac65611bb866c77a092 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Jul 2008 00:08:33 -0700 Subject: netdev: Add addr_list_lock to struct net_device. This will be used to protect the per-device unicast and multicast address lists, as well as the callbacks into the drivers which configure such state such as ->set_rx_mode() and ->set_multicast_list(). Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ba5c4639ea91..fd0365219181 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -609,6 +609,7 @@ struct net_device unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ + spinlock_t addr_list_lock; struct dev_addr_list *uc_list; /* Secondary unicast mac addresses */ int uc_count; /* Number of installed ucasts */ int uc_promisc; -- cgit v1.2.3 From e308a5d806c852f56590ffdd3834d0df0cbed8d7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Jul 2008 00:13:44 -0700 Subject: netdev: Add netdev->addr_list_lock protection. Add netif_addr_{lock,unlock}{,_bh}() helpers. Use them to protect operations that operate on or read the network device unicast and multicast address lists. Also use them in cases where the code simply wants to block calls into the driver's ->set_rx_mode() and ->set_multicast_list() methods. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fd0365219181..570cf7affa72 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1498,6 +1498,26 @@ static inline void netif_tx_disable(struct net_device *dev) netif_tx_unlock_bh(dev); } +static inline void netif_addr_lock(struct net_device *dev) +{ + spin_lock(&dev->addr_list_lock); +} + +static inline void netif_addr_lock_bh(struct net_device *dev) +{ + spin_lock_bh(&dev->addr_list_lock); +} + +static inline void netif_addr_unlock(struct net_device *dev) +{ + spin_unlock(&dev->addr_list_lock); +} + +static inline void netif_addr_unlock_bh(struct net_device *dev) +{ + spin_unlock_bh(&dev->addr_list_lock); +} + /* These functions live elsewhere (drivers/net/net_init.c, but related) */ extern void ether_setup(struct net_device *dev); -- cgit v1.2.3 From e8a0464cc950972824e2e128028ae3db666ec1ed Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 00:34:19 -0700 Subject: netdev: Allocate multiple queues for TX. alloc_netdev_mq() now allocates an array of netdev_queue structures for TX, based upon the queue_count argument. Furthermore, all accesses to the TX queues are now vectored through the netdev_get_tx_queue() and netdev_for_each_tx_queue() interfaces. This makes it easy to grep the tree for all things that want to get to a TX queue of a net device. Problem spots which are not really multiqueue aware yet, and only work with one queue, can easily be spotted by grepping for all netdev_get_tx_queue() calls that pass in a zero index. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 69 +++++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 23 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 570cf7affa72..f25d4f5a31b0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -463,7 +463,7 @@ struct netdev_queue { struct Qdisc *qdisc_sleeping; struct list_head qdisc_list; struct netdev_queue *next_sched; -}; +} ____cacheline_aligned_in_smp; /* * The DEVICE structure. @@ -641,7 +641,9 @@ struct net_device unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ struct netdev_queue rx_queue; - struct netdev_queue tx_queue ____cacheline_aligned_in_smp; + + struct netdev_queue *_tx ____cacheline_aligned_in_smp; + unsigned int num_tx_queues; unsigned long tx_queue_len; /* Max frames per queue allowed */ /* @@ -764,6 +766,25 @@ struct net_device #define NETDEV_ALIGN 32 #define NETDEV_ALIGN_CONST (NETDEV_ALIGN - 1) +static inline +struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, + unsigned int index) +{ + return &dev->_tx[index]; +} + +static inline void netdev_for_each_tx_queue(struct net_device *dev, + void (*f)(struct net_device *, + struct netdev_queue *, + void *), + void *arg) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) + f(dev, &dev->_tx[i], arg); +} + /* * Net namespace inlines */ @@ -977,7 +998,7 @@ static inline void netif_schedule_queue(struct netdev_queue *txq) static inline void netif_schedule(struct net_device *dev) { - netif_schedule_queue(&dev->tx_queue); + netif_schedule_queue(netdev_get_tx_queue(dev, 0)); } /** @@ -993,7 +1014,7 @@ static inline void netif_tx_start_queue(struct netdev_queue *dev_queue) static inline void netif_start_queue(struct net_device *dev) { - netif_tx_start_queue(&dev->tx_queue); + netif_tx_start_queue(netdev_get_tx_queue(dev, 0)); } /** @@ -1017,7 +1038,7 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) static inline void netif_wake_queue(struct net_device *dev) { - netif_tx_wake_queue(&dev->tx_queue); + netif_tx_wake_queue(netdev_get_tx_queue(dev, 0)); } /** @@ -1034,7 +1055,7 @@ static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) static inline void netif_stop_queue(struct net_device *dev) { - netif_tx_stop_queue(&dev->tx_queue); + netif_tx_stop_queue(netdev_get_tx_queue(dev, 0)); } /** @@ -1050,7 +1071,7 @@ static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue) static inline int netif_queue_stopped(const struct net_device *dev) { - return netif_tx_queue_stopped(&dev->tx_queue); + return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0)); } /** @@ -1134,7 +1155,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) #endif if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev->egress_subqueue[queue_index].state)) - __netif_schedule(&dev->tx_queue); + __netif_schedule(netdev_get_tx_queue(dev, 0)); } /** @@ -1430,18 +1451,19 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) static inline void netif_tx_lock(struct net_device *dev) { - __netif_tx_lock(&dev->tx_queue, smp_processor_id()); -} + int cpu = smp_processor_id(); + unsigned int i; -static inline void __netif_tx_lock_bh(struct netdev_queue *txq) -{ - spin_lock_bh(&txq->_xmit_lock); - txq->xmit_lock_owner = smp_processor_id(); + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + __netif_tx_lock(txq, cpu); + } } static inline void netif_tx_lock_bh(struct net_device *dev) { - __netif_tx_lock_bh(&dev->tx_queue); + local_bh_disable(); + netif_tx_lock(dev); } static inline int __netif_tx_trylock(struct netdev_queue *txq) @@ -1454,7 +1476,7 @@ static inline int __netif_tx_trylock(struct netdev_queue *txq) static inline int netif_tx_trylock(struct net_device *dev) { - return __netif_tx_trylock(&dev->tx_queue); + return __netif_tx_trylock(netdev_get_tx_queue(dev, 0)); } static inline void __netif_tx_unlock(struct netdev_queue *txq) @@ -1465,18 +1487,19 @@ static inline void __netif_tx_unlock(struct netdev_queue *txq) static inline void netif_tx_unlock(struct net_device *dev) { - __netif_tx_unlock(&dev->tx_queue); -} + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + __netif_tx_unlock(txq); + } -static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) -{ - txq->xmit_lock_owner = -1; - spin_unlock_bh(&txq->_xmit_lock); } static inline void netif_tx_unlock_bh(struct net_device *dev) { - __netif_tx_unlock_bh(&dev->tx_queue); + netif_tx_unlock(dev); + local_bh_enable(); } #define HARD_TX_LOCK(dev, txq, cpu) { \ -- cgit v1.2.3 From 09e83b5d7d1878065e2453239b49b684cd0fe4e5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 01:52:12 -0700 Subject: netdev: Kill NETIF_F_MULTI_QUEUE. There is no need for a feature bit for something that can be tested by simply checking the TX queue count. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f25d4f5a31b0..c02227b9dd7b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -533,7 +533,6 @@ struct net_device #define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ /* do not use LLTX in new drivers */ #define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ -#define NETIF_F_MULTI_QUEUE 16384 /* Has multiple TX/RX queues */ #define NETIF_F_LRO 32768 /* large receive offload */ /* Segmentation offload features */ @@ -1163,11 +1162,10 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) * @dev: network device * * Check if device has multiple transmit queues - * Always falls if NETDEVICE_MULTIQUEUE is not configured */ static inline int netif_is_multiqueue(const struct net_device *dev) { - return (!!(NETIF_F_MULTI_QUEUE & dev->features)); + return (dev->num_tx_queues > 1); } /* Use this variant when it is known for sure that it -- cgit v1.2.3 From fd2ea0a79faad824258af5dcec1927aa24d81c16 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 01:56:23 -0700 Subject: net: Use queue aware tests throughout. This effectively "flips the switch" by making the core networking and multiqueue-aware drivers use the new TX multiqueue structures. Non-multiqueue drivers need no changes. The interfaces they use such as netif_stop_queue() degenerate into an operation on TX queue zero. So everything "just works" for them. Code that really wants to do "X" to all TX queues now invokes a routine that does so, such as netif_tx_wake_all_queues(), netif_tx_stop_all_queues(), etc. pktgen and netpoll required a little bit more surgery than the others. In particular the pktgen changes, whilst functional, could be largely improved. The initial check in pktgen_xmit() will sometimes check the wrong queue, which is mostly harmless. The thing to do is probably to invoke fill_packet() earlier. The bulk of the netpoll changes is to make the code operate solely on the TX queue indicated by by the SKB queue mapping. Setting of the SKB queue mapping is entirely confined inside of net/core/dev.c:dev_pick_tx(). If we end up needing any kind of special semantics (drops, for example) it will be implemented here. Finally, we now have a "real_num_tx_queues" which is where the driver indicates how many TX queues are actually active. With IGB changes from Jeff Kirsher. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 82 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 73 insertions(+), 9 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c02227b9dd7b..b5c1e7df64fc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -642,7 +642,13 @@ struct net_device struct netdev_queue rx_queue; struct netdev_queue *_tx ____cacheline_aligned_in_smp; + + /* Number of TX queues allocated at alloc_netdev_mq() time */ unsigned int num_tx_queues; + + /* Number of TX queues currently active in device */ + unsigned int real_num_tx_queues; + unsigned long tx_queue_len; /* Max frames per queue allowed */ /* @@ -1000,6 +1006,14 @@ static inline void netif_schedule(struct net_device *dev) netif_schedule_queue(netdev_get_tx_queue(dev, 0)); } +static inline void netif_tx_schedule_all(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) + netif_schedule_queue(netdev_get_tx_queue(dev, i)); +} + /** * netif_start_queue - allow transmit * @dev: network device @@ -1016,6 +1030,16 @@ static inline void netif_start_queue(struct net_device *dev) netif_tx_start_queue(netdev_get_tx_queue(dev, 0)); } +static inline void netif_tx_start_all_queues(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + netif_tx_start_queue(txq); + } +} + /** * netif_wake_queue - restart transmit * @dev: network device @@ -1040,6 +1064,16 @@ static inline void netif_wake_queue(struct net_device *dev) netif_tx_wake_queue(netdev_get_tx_queue(dev, 0)); } +static inline void netif_tx_wake_all_queues(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + netif_tx_wake_queue(txq); + } +} + /** * netif_stop_queue - stop transmitted packets * @dev: network device @@ -1057,6 +1091,16 @@ static inline void netif_stop_queue(struct net_device *dev) netif_tx_stop_queue(netdev_get_tx_queue(dev, 0)); } +static inline void netif_tx_stop_all_queues(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + netif_tx_stop_queue(txq); + } +} + /** * netif_queue_stopped - test if transmit queue is flowblocked * @dev: network device @@ -1100,7 +1144,8 @@ static inline int netif_running(const struct net_device *dev) */ static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) { - clear_bit(__QUEUE_STATE_XOFF, &dev->egress_subqueue[queue_index].state); + struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); + clear_bit(__QUEUE_STATE_XOFF, &txq->state); } /** @@ -1112,11 +1157,12 @@ static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) */ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); #ifdef CONFIG_NETPOLL_TRAP if (netpoll_trap()) return; #endif - set_bit(__QUEUE_STATE_XOFF, &dev->egress_subqueue[queue_index].state); + set_bit(__QUEUE_STATE_XOFF, &txq->state); } /** @@ -1129,8 +1175,8 @@ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) static inline int __netif_subqueue_stopped(const struct net_device *dev, u16 queue_index) { - return test_bit(__QUEUE_STATE_XOFF, - &dev->egress_subqueue[queue_index].state); + struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); + return test_bit(__QUEUE_STATE_XOFF, &txq->state); } static inline int netif_subqueue_stopped(const struct net_device *dev, @@ -1148,13 +1194,13 @@ static inline int netif_subqueue_stopped(const struct net_device *dev, */ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); #ifdef CONFIG_NETPOLL_TRAP if (netpoll_trap()) return; #endif - if (test_and_clear_bit(__QUEUE_STATE_XOFF, - &dev->egress_subqueue[queue_index].state)) - __netif_schedule(netdev_get_tx_queue(dev, 0)); + if (test_and_clear_bit(__QUEUE_STATE_XOFF, &txq->state)) + __netif_schedule(txq); } /** @@ -1198,7 +1244,8 @@ extern int dev_set_mtu(struct net_device *, int); extern int dev_set_mac_address(struct net_device *, struct sockaddr *); extern int dev_hard_start_xmit(struct sk_buff *skb, - struct net_device *dev); + struct net_device *dev, + struct netdev_queue *txq); extern int netdev_budget; @@ -1447,6 +1494,12 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) txq->xmit_lock_owner = cpu; } +static inline void __netif_tx_lock_bh(struct netdev_queue *txq) +{ + spin_lock_bh(&txq->_xmit_lock); + txq->xmit_lock_owner = smp_processor_id(); +} + static inline void netif_tx_lock(struct net_device *dev) { int cpu = smp_processor_id(); @@ -1483,6 +1536,12 @@ static inline void __netif_tx_unlock(struct netdev_queue *txq) spin_unlock(&txq->_xmit_lock); } +static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) +{ + txq->xmit_lock_owner = -1; + spin_unlock_bh(&txq->_xmit_lock); +} + static inline void netif_tx_unlock(struct net_device *dev) { unsigned int i; @@ -1514,8 +1573,13 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) static inline void netif_tx_disable(struct net_device *dev) { + unsigned int i; + netif_tx_lock_bh(dev); - netif_stop_queue(dev); + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + netif_tx_stop_queue(txq); + } netif_tx_unlock_bh(dev); } -- cgit v1.2.3 From 6b0fb1261a4655613bed5dac0e935e733969e999 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Jul 2008 02:58:10 -0700 Subject: netdev: Kill struct net_device_subqueue and netdev->egress_subqueue* No longer used. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b5c1e7df64fc..a1c2c2204498 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -118,14 +118,6 @@ struct wireless_dev; #endif /* __KERNEL__ */ -struct net_device_subqueue -{ - /* Give a control state for each queue. This struct may contain - * per-queue locks in the future. - */ - unsigned long state; -}; - /* * Network device statistics. Akin to the 2.0 ether stats but * with byte counters. @@ -761,10 +753,6 @@ struct net_device /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; - - /* The TX queue control structures */ - unsigned int egress_subqueue_count; - struct net_device_subqueue egress_subqueue[1]; }; #define to_net_dev(d) container_of(d, struct net_device, dev) -- cgit v1.2.3 From e3c50d5d25ac09efd9acbe2b2a3e365466de84ed Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Jul 2008 02:58:39 -0700 Subject: netdev: netdev_priv() can now be sane again. The private area of a netdev is now at a fixed offset once more. Unfortunately, some assumptions that netdev_priv() == netdev->priv crept back into the tree. In particular this happened in the loopback driver. Make it use netdev->ml_priv. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a1c2c2204498..fdac1159253e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -808,7 +808,9 @@ void dev_net_set(struct net_device *dev, struct net *net) */ static inline void *netdev_priv(const struct net_device *dev) { - return dev->priv; + return (char *)dev + ((sizeof(struct net_device) + + NETDEV_ALIGN_CONST) + & ~NETDEV_ALIGN_CONST); } /* Set the sysfs physical device reference for the network logical device -- cgit v1.2.3 From eae792b722fef08dcf3aee88266ee7def9710757 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Jul 2008 03:03:33 -0700 Subject: netdev: Add netdev->select_queue() method. Devices or device layers can set this to control the queue selection performed by dev_pick_tx(). This function runs under RCU protection, which allows overriding functions to have some way of synchronizing with things like dynamic ->real_num_tx_queues adjustments. This makes the spinlock prefetch in dev_queue_xmit() a little bit less effective, but that's the price right now for correctness. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fdac1159253e..9464e6452967 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -724,6 +724,9 @@ struct net_device void (*poll_controller)(struct net_device *dev); #endif + u16 (*select_queue)(struct net_device *dev, + struct sk_buff *skb); + #ifdef CONFIG_NET_NS /* Network namespace this network device is inside */ struct net *nd_net; -- cgit v1.2.3 From 92831bc395ac8390bf759775c50cb6f90c6eb03d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Jul 2008 03:48:01 -0700 Subject: netdev: Kill plain netif_schedule() No more users. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9464e6452967..787fbfc5aebb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -994,11 +994,6 @@ static inline void netif_schedule_queue(struct netdev_queue *txq) __netif_schedule(txq); } -static inline void netif_schedule(struct net_device *dev) -{ - netif_schedule_queue(netdev_get_tx_queue(dev, 0)); -} - static inline void netif_tx_schedule_all(struct net_device *dev) { unsigned int i; -- cgit v1.2.3 From d3b753db7c4f1f37a98b51974d484fda5d86dab5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Jul 2008 20:14:35 -0700 Subject: pkt_sched: Move gso_skb into Qdisc. We liberate any dangling gso_skb during qdisc destruction. It really only matters for the root qdisc. But when qdiscs can be shared by multiple netdev_queue objects, we can't have the gso_skb in the netdev_queue any more. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 787fbfc5aebb..0883fcf2d16a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -449,7 +449,6 @@ struct netdev_queue { struct net_device *dev; struct Qdisc *qdisc; unsigned long state; - struct sk_buff *gso_skb; spinlock_t _xmit_lock; int xmit_lock_owner; struct Qdisc *qdisc_sleeping; -- cgit v1.2.3 From e2627c8c2241bce45e368e150654d076b58a4595 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 Jul 2008 00:56:32 -0700 Subject: pkt_sched: Make QDISC_RUNNING a qdisc state. Currently it is associated with a netdev_queue, but when we have qdisc sharing that no longer makes any sense. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0883fcf2d16a..9240a95793be 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -441,7 +441,6 @@ static inline void napi_synchronize(const struct napi_struct *n) enum netdev_queue_state_t { __QUEUE_STATE_XOFF, - __QUEUE_STATE_QDISC_RUNNING, }; struct netdev_queue { -- cgit v1.2.3 From 37437bb2e1ae8af470dfcd5b4ff454110894ccaf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 Jul 2008 02:15:04 -0700 Subject: pkt_sched: Schedule qdiscs instead of netdev_queue. When we have shared qdiscs, packets come out of the qdiscs for multiple transmit queues. Therefore it doesn't make any sense to schedule the transmit queue when logically we cannot know ahead of time the TX queue of the SKB that the qdisc->dequeue() will give us. Just for sanity I added a BUG check to make sure we never get into a state where the noop_qdisc is scheduled. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9240a95793be..1e839fa01434 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -275,7 +275,6 @@ enum netdev_state_t { __LINK_STATE_START, __LINK_STATE_PRESENT, - __LINK_STATE_SCHED, __LINK_STATE_NOCARRIER, __LINK_STATE_LINKWATCH_PENDING, __LINK_STATE_DORMANT, @@ -452,7 +451,6 @@ struct netdev_queue { int xmit_lock_owner; struct Qdisc *qdisc_sleeping; struct list_head qdisc_list; - struct netdev_queue *next_sched; } ____cacheline_aligned_in_smp; /* @@ -969,7 +967,7 @@ static inline int unregister_gifconf(unsigned int family) */ struct softnet_data { - struct netdev_queue *output_queue; + struct Qdisc *output_queue; struct sk_buff_head input_pkt_queue; struct list_head poll_list; struct sk_buff *completion_queue; @@ -984,12 +982,12 @@ DECLARE_PER_CPU(struct softnet_data,softnet_data); #define HAVE_NETIF_QUEUE -extern void __netif_schedule(struct netdev_queue *txq); +extern void __netif_schedule(struct Qdisc *q); static inline void netif_schedule_queue(struct netdev_queue *txq) { if (!test_bit(__QUEUE_STATE_XOFF, &txq->state)) - __netif_schedule(txq); + __netif_schedule(txq->qdisc); } static inline void netif_tx_schedule_all(struct net_device *dev) @@ -1042,7 +1040,7 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) } #endif if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state)) - __netif_schedule(dev_queue); + __netif_schedule(dev_queue->qdisc); } static inline void netif_wake_queue(struct net_device *dev) @@ -1186,7 +1184,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) return; #endif if (test_and_clear_bit(__QUEUE_STATE_XOFF, &txq->state)) - __netif_schedule(txq); + __netif_schedule(txq->qdisc); } /** -- cgit v1.2.3 From ead81cc5fc6d996db6afb20f211241612610a07a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 00:50:32 -0700 Subject: netdevice: Move qdisc_list back into net_device proper. And give it it's own lock. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1e839fa01434..3170bcef734b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -450,7 +450,6 @@ struct netdev_queue { spinlock_t _xmit_lock; int xmit_lock_owner; struct Qdisc *qdisc_sleeping; - struct list_head qdisc_list; } ____cacheline_aligned_in_smp; /* @@ -638,6 +637,8 @@ struct net_device unsigned int real_num_tx_queues; unsigned long tx_queue_len; /* Max frames per queue allowed */ + spinlock_t qdisc_list_lock; + struct list_head qdisc_list; /* * One part is mostly used on xmit path (device) -- cgit v1.2.3 From 83874000929ed63aef30b44083a9f713135ff040 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 00:53:03 -0700 Subject: pkt_sched: Kill netdev_queue lock. We can simply use the qdisc->q.lock for all of the qdisc tree synchronization. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3170bcef734b..9c5a68850114 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -443,7 +443,6 @@ enum netdev_queue_state_t }; struct netdev_queue { - spinlock_t lock; struct net_device *dev; struct Qdisc *qdisc; unsigned long state; -- cgit v1.2.3 From 3072367300aa8c779e3a14ee8e89de079e90f3ad Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 18 Jul 2008 22:50:15 -0700 Subject: pkt_sched: Manage qdisc list inside of root qdisc. Idea is from Patrick McHardy. Instead of managing the list of qdiscs on the device level, manage it in the root qdisc of a netdev_queue. This solves all kinds of visibility issues during qdisc destruction. The way to iterate over all qdiscs of a netdev_queue is to visit the netdev_queue->qdisc, and then traverse it's list. The only special case is to ignore builting qdiscs at the root when dumping or doing a qdisc_lookup(). That was not needed previously because builtin qdiscs were not added to the device's qdisc_list. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9c5a68850114..812bcd8b4363 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -636,8 +636,6 @@ struct net_device unsigned int real_num_tx_queues; unsigned long tx_queue_len; /* Max frames per queue allowed */ - spinlock_t qdisc_list_lock; - struct list_head qdisc_list; /* * One part is mostly used on xmit path (device) -- cgit v1.2.3