From d894ba18d4e449b3a7f6eb491f16c9e02933736e Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Tue, 12 Apr 2016 13:11:25 -0400 Subject: soreuseport: fix ordering for mixed v4/v6 sockets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the SO_REUSEPORT socket option, it is possible to create sockets in the AF_INET and AF_INET6 domains which are bound to the same IPv4 address. This is only possible with SO_REUSEPORT and when not using IPV6_V6ONLY on the AF_INET6 sockets. Prior to the commits referenced below, an incoming IPv4 packet would always be routed to a socket of type AF_INET when this mixed-mode was used. After those changes, the same packet would be routed to the most recently bound socket (if this happened to be an AF_INET6 socket, it would have an IPv4 mapped IPv6 address). The change in behavior occurred because the recent SO_REUSEPORT optimizations short-circuit the socket scoring logic as soon as they find a match. They did not take into account the scoring logic that favors AF_INET sockets over AF_INET6 sockets in the event of a tie. To fix this problem, this patch changes the insertion order of AF_INET and AF_INET6 addresses in the TCP and UDP socket lists when the sockets have SO_REUSEPORT set. AF_INET sockets will be inserted at the head of the list and AF_INET6 sockets with SO_REUSEPORT set will always be inserted at the tail of the list. This will force AF_INET sockets to always be considered first. Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection") Fixes: 125e80b88687 ("soreuseport: fast reuseport TCP socket selection") Reported-by: Maciej Żenczykowski Signed-off-by: Craig Gallek Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rculist_nulls.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 1c33dd7da4a7..4ae95f7e8597 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -98,6 +98,45 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, if (!is_a_nulls(first)) first->pprev = &n->next; } + +/** + * hlist_nulls_add_tail_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the end of the specified hlist_nulls, + * while permitting racing traversals. NOTE: tail insertion requires + * list traversal. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() + * or hlist_nulls_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, + struct hlist_nulls_head *h) +{ + struct hlist_nulls_node *i, *last = NULL; + + for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i); + i = hlist_nulls_next_rcu(i)) + last = i; + + if (last) { + n->next = last->next; + n->pprev = &last->next; + rcu_assign_pointer(hlist_nulls_next_rcu(last), n); + } else { + hlist_nulls_add_head_rcu(n, h); + } +} + /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. -- cgit v1.2.3 From 4bfd2e6e53435a214888fd35e230157a38ffc6a0 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 20 Apr 2016 16:01:16 +0300 Subject: net/mlx4_core: Avoid repeated calls to pci enable/disable Maintain the PCI status and provide wrappers for enabling and disabling the PCI device. Performing the actions more than once without doing its opposite results in warning logs. This occurred when EEH hotplugged the device causing a warning for disabling an already disabled device. Fixes: 2ba5fbd62b25 ('net/mlx4_core: Handle AER flow properly') Signed-off-by: Daniel Jurgens Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 8541a913f6a3..d1f904c8b2cb 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -828,6 +828,11 @@ struct mlx4_vf_dev { u8 n_ports; }; +enum mlx4_pci_status { + MLX4_PCI_STATUS_DISABLED, + MLX4_PCI_STATUS_ENABLED, +}; + struct mlx4_dev_persistent { struct pci_dev *pdev; struct mlx4_dev *dev; @@ -841,6 +846,8 @@ struct mlx4_dev_persistent { u8 state; struct mutex interface_state_mutex; /* protect SW state */ u8 interface_state; + struct mutex pci_status_mutex; /* sync pci state */ + enum mlx4_pci_status pci_status; }; struct mlx4_dev { -- cgit v1.2.3