From f05940e61845951517eda02a28ccc091888aaab9 Mon Sep 17 00:00:00 2001 From: Karl Heiss Date: Thu, 24 Sep 2015 12:15:06 -0400 Subject: sctp: Whitespace fix Fix indentation in sctp_generate_heartbeat_event. Signed-off-by: Karl Heiss Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 35df1266bf07..f554b9a96e07 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -388,8 +388,8 @@ void sctp_generate_heartbeat_event(unsigned long data) asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); - if (error) - asoc->base.sk->sk_err = -error; + if (error) + asoc->base.sk->sk_err = -error; out_unlock: bh_unlock_sock(asoc->base.sk); -- cgit v1.2.3 From 635682a14427d241bab7bbdeebb48a7d7b91638e Mon Sep 17 00:00:00 2001 From: Karl Heiss Date: Thu, 24 Sep 2015 12:15:07 -0400 Subject: sctp: Prevent soft lockup when sctp_accept() is called during a timeout event A case can occur when sctp_accept() is called by the user during a heartbeat timeout event after the 4-way handshake. Since sctp_assoc_migrate() changes both assoc->base.sk and assoc->ep, the bh_sock_lock in sctp_generate_heartbeat_event() will be taken with the listening socket but released with the new association socket. The result is a deadlock on any future attempts to take the listening socket lock. Note that this race can occur with other SCTP timeouts that take the bh_lock_sock() in the event sctp_accept() is called. BUG: soft lockup - CPU#9 stuck for 67s! [swapper:0] ... RIP: 0010:[] [] _spin_lock+0x1e/0x30 RSP: 0018:ffff880028323b20 EFLAGS: 00000206 RAX: 0000000000000002 RBX: ffff880028323b20 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff880028323be0 RDI: ffff8804632c4b48 RBP: ffffffff8100bb93 R08: 0000000000000000 R09: 0000000000000000 R10: ffff880610662280 R11: 0000000000000100 R12: ffff880028323aa0 R13: ffff8804383c3880 R14: ffff880028323a90 R15: ffffffff81534225 FS: 0000000000000000(0000) GS:ffff880028320000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 00000000006df528 CR3: 0000000001a85000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 0, threadinfo ffff880616b70000, task ffff880616b6cab0) Stack: ffff880028323c40 ffffffffa01c2582 ffff880614cfb020 0000000000000000 0100000000000000 00000014383a6c44 ffff8804383c3880 ffff880614e93c00 ffff880614e93c00 0000000000000000 ffff8804632c4b00 ffff8804383c38b8 Call Trace: [] ? sctp_rcv+0x492/0xa10 [sctp] [] ? nf_iterate+0x69/0xb0 [] ? ip_local_deliver_finish+0x0/0x2d0 [] ? nf_hook_slow+0x76/0x120 [] ? ip_local_deliver_finish+0x0/0x2d0 [] ? ip_local_deliver_finish+0xdd/0x2d0 [] ? ip_local_deliver+0x98/0xa0 [] ? ip_rcv_finish+0x12d/0x440 [] ? ip_rcv+0x275/0x350 [] ? __netif_receive_skb+0x4ab/0x750 ... With lockdep debugging: ===================================== [ BUG: bad unlock balance detected! ] ------------------------------------- CslRx/12087 is trying to release lock (slock-AF_INET) at: [] sctp_generate_timeout_event+0x40/0xe0 [sctp] but there are no more locks to release! other info that might help us debug this: 2 locks held by CslRx/12087: #0: (&asoc->timers[i]){+.-...}, at: [] run_timer_softirq+0x16f/0x3e0 #1: (slock-AF_INET){+.-...}, at: [] sctp_generate_timeout_event+0x23/0xe0 [sctp] Ensure the socket taken is also the same one that is released by saving a copy of the socket before entering the timeout event critical section. Signed-off-by: Karl Heiss Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index f554b9a96e07..6098d4c42fa9 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -244,12 +244,13 @@ void sctp_generate_t3_rtx_event(unsigned long peer) int error; struct sctp_transport *transport = (struct sctp_transport *) peer; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); /* Check whether a task is in the sock. */ - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -272,10 +273,10 @@ void sctp_generate_t3_rtx_event(unsigned long peer) transport, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_transport_put(transport); } @@ -285,11 +286,12 @@ out_unlock: static void sctp_generate_timeout_event(struct sctp_association *asoc, sctp_event_timeout_t timeout_type) { - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); int error = 0; - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy: timer %d\n", __func__, timeout_type); @@ -312,10 +314,10 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, (void *)timeout_type, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_association_put(asoc); } @@ -365,10 +367,11 @@ void sctp_generate_heartbeat_event(unsigned long data) int error = 0; struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -389,10 +392,10 @@ void sctp_generate_heartbeat_event(unsigned long data) transport, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_transport_put(transport); } @@ -403,10 +406,11 @@ void sctp_generate_proto_unreach_event(unsigned long data) { struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -427,7 +431,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_association_put(asoc); } -- cgit v1.2.3 From 06a15f51cf3618e32a73871ee6a547ef7fd902b5 Mon Sep 17 00:00:00 2001 From: Alexander Couzens Date: Mon, 28 Sep 2015 11:32:42 +0200 Subject: l2tp: protect tunnel->del_work by ref_count There is a small chance that tunnel_free() is called before tunnel->del_work scheduled resulting in a zero pointer dereference. Signed-off-by: Alexander Couzens Acked-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index f6b090df3930..afca2eb4dfa7 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1319,7 +1319,7 @@ static void l2tp_tunnel_del_work(struct work_struct *work) tunnel = container_of(work, struct l2tp_tunnel, del_work); sk = l2tp_tunnel_sock_lookup(tunnel); if (!sk) - return; + goto out; sock = sk->sk_socket; @@ -1341,6 +1341,8 @@ static void l2tp_tunnel_del_work(struct work_struct *work) } l2tp_tunnel_sock_put(sk); +out: + l2tp_tunnel_dec_refcount(tunnel); } /* Create a socket for the tunnel, if one isn't set up by @@ -1636,8 +1638,13 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); */ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { + l2tp_tunnel_inc_refcount(tunnel); l2tp_tunnel_closeall(tunnel); - return (false == queue_work(l2tp_wq, &tunnel->del_work)); + if (false == queue_work(l2tp_wq, &tunnel->del_work)) { + l2tp_tunnel_dec_refcount(tunnel); + return 1; + } + return 0; } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); -- cgit v1.2.3 From 2103d6b818fcdae15ffa04cf385f770e6c3892c3 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 28 Sep 2015 14:34:04 +0200 Subject: net: sctp: Don't use 64 kilobyte lookup table for four elements Seemingly innocuous sctp_trans_state_to_prio_map[] array is way bigger than it looks, since "[SCTP_UNKNOWN] = 2" expands into "[0xffff] = 2" ! This patch replaces it with switch() statement. Signed-off-by: Denys Vlasenko CC: Vlad Yasevich CC: Neil Horman CC: Marcelo Ricardo Leitner CC: linux-sctp@vger.kernel.org CC: netdev@vger.kernel.org CC: linux-kernel@vger.kernel.org Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/associola.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 197c3f59ecbf..b00f1f9611d6 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1208,20 +1208,22 @@ void sctp_assoc_update(struct sctp_association *asoc, * within this document. * * Our basic strategy is to round-robin transports in priorities - * according to sctp_state_prio_map[] e.g., if no such + * according to sctp_trans_score() e.g., if no such * transport with state SCTP_ACTIVE exists, round-robin through * SCTP_UNKNOWN, etc. You get the picture. */ -static const u8 sctp_trans_state_to_prio_map[] = { - [SCTP_ACTIVE] = 3, /* best case */ - [SCTP_UNKNOWN] = 2, - [SCTP_PF] = 1, - [SCTP_INACTIVE] = 0, /* worst case */ -}; - static u8 sctp_trans_score(const struct sctp_transport *trans) { - return sctp_trans_state_to_prio_map[trans->state]; + switch (trans->state) { + case SCTP_ACTIVE: + return 3; /* best case */ + case SCTP_UNKNOWN: + return 2; + case SCTP_PF: + return 1; + default: /* case SCTP_INACTIVE */ + return 0; /* worst case */ + } } static struct sctp_transport *sctp_trans_elect_tie(struct sctp_transport *trans1, -- cgit v1.2.3 From 9f389e35674f5b086edd70ed524ca0f287259725 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Sat, 26 Sep 2015 18:50:43 -0400 Subject: af_unix: return data from multiple SKBs on recv() with MSG_PEEK flag AF_UNIX sockets now return multiple skbs from recv() when MSG_PEEK flag is set. This is referenced in kernel bugzilla #12323 @ https://bugzilla.kernel.org/show_bug.cgi?id=12323 As described both in the BZ and lkml thread @ http://lkml.org/lkml/2008/1/8/444 calling recv() with MSG_PEEK on an AF_UNIX socket only reads a single skb, where the desired effect is to return as much skb data has been queued, until hitting the recv buffer size (whichever comes first). The modified MSG_PEEK path will now move to the next skb in the tree and jump to the again: label, rather than following the natural loop structure. This requires duplicating some of the loop head actions. This was tested using the python socketpair python code attached to the bugzilla issue. Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- net/unix/af_unix.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 03ee4d359f6a..ef31b40ad550 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2179,8 +2179,21 @@ unlock: if (UNIXCB(skb).fp) scm.fp = scm_fp_dup(UNIXCB(skb).fp); - sk_peek_offset_fwd(sk, chunk); + if (skip) { + sk_peek_offset_fwd(sk, chunk); + skip -= chunk; + } + if (UNIXCB(skb).fp) + break; + + last = skb; + last_len = skb->len; + unix_state_lock(sk); + skb = skb_peek_next(skb, &sk->sk_receive_queue); + if (skb) + goto again; + unix_state_unlock(sk); break; } } while (size); -- cgit v1.2.3 From 75c261b51ba19f0791de608f0acfb94956f78c76 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 28 Sep 2015 15:05:33 +0200 Subject: net sysfs: Print link speed as signed integer Otherwise 4294967295 (MBit/s) (-1) will be printed when there is no link. Documentation/ABI/testing/sysfs-class-net does not state if this shall be signed or unsigned. Also remove the now unused variable fmt_udec. Signed-off-by: Alexander Stein Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 805a95a48107..830f8a7c1cb1 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -31,7 +31,6 @@ static const char fmt_hex[] = "%#x\n"; static const char fmt_long_hex[] = "%#lx\n"; static const char fmt_dec[] = "%d\n"; -static const char fmt_udec[] = "%u\n"; static const char fmt_ulong[] = "%lu\n"; static const char fmt_u64[] = "%llu\n"; @@ -202,7 +201,7 @@ static ssize_t speed_show(struct device *dev, if (netif_running(netdev)) { struct ethtool_cmd cmd; if (!__ethtool_get_settings(netdev, &cmd)) - ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd)); + ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd)); } rtnl_unlock(); return ret; -- cgit v1.2.3 From 741a11d9e4103a8e1c590ef1280143fe654e4e33 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 28 Sep 2015 10:12:13 -0700 Subject: net: ipv6: Add RT6_LOOKUP_F_IFACE flag if oif is set Wolfgang reported that IPv6 stack is ignoring oif in output route lookups: With ipv6, ip -6 route get always returns the specific route. $ ip -6 r 2001:db8:e2::1 dev enp2s0 proto kernel metric 256 2001:db8:e2::/64 dev enp2s0 metric 1024 2001:db8:e3::1 dev enp3s0 proto kernel metric 256 2001:db8:e3::/64 dev enp3s0 metric 1024 fe80::/64 dev enp3s0 proto kernel metric 256 default via 2001:db8:e3::255 dev enp3s0 metric 1024 $ ip -6 r get 2001:db8:e2::100 2001:db8:e2::100 from :: dev enp2s0 src 2001:db8:e3::1 metric 0 cache $ ip -6 r get 2001:db8:e2::100 oif enp3s0 2001:db8:e2::100 from :: dev enp2s0 src 2001:db8:e3::1 metric 0 cache The stack does consider the oif but a mismatch in rt6_device_match is not considered fatal because RT6_LOOKUP_F_IFACE is not set in the flags. Cc: Wolfgang Nothdurft Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f204089e854c..cb32ce250db0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1193,7 +1193,8 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, fl6->flowi6_iif = LOOPBACK_IFINDEX; - if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) + if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || + fl6->flowi6_oif) flags |= RT6_LOOKUP_F_IFACE; if (!ipv6_addr_any(&fl6->saddr)) -- cgit v1.2.3 From 31b33dfb0a144469dd805514c9e63f4993729a48 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 28 Sep 2015 17:24:25 -0700 Subject: skbuff: Fix skb checksum partial check. Earlier patch 6ae459bda tried to detect void ckecksum partial skb by comparing pull length to checksum offset. But it does not work for all cases since checksum-offset depends on updates to skb->data. Following patch fixes it by validating checksum start offset after skb-data pointer is updated. Negative value of checksum offset start means there is no need to checksum. Fixes: 6ae459bda ("skbuff: Fix skb checksum flag on skb pull") Reported-by: Andrew Vagin Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/core/skbuff.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index dad4dd37e2aa..fab4599ba8b2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2958,11 +2958,12 @@ EXPORT_SYMBOL_GPL(skb_append_pagefrags); */ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) { + unsigned char *data = skb->data; + BUG_ON(len > skb->len); - skb->len -= len; - BUG_ON(skb->len < skb->data_len); - skb_postpull_rcsum(skb, skb->data, len); - return skb->data += len; + __skb_pull(skb, len); + skb_postpull_rcsum(skb, data, len); + return skb->data; } EXPORT_SYMBOL_GPL(skb_pull_rcsum); -- cgit v1.2.3 From 57a47532c4312159935c98b7f1cf0e62296b9171 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 29 Sep 2015 14:17:54 -0400 Subject: net: dsa: fix preparation of a port STP update Because of the default 0 value of ret in dsa_slave_port_attr_set, a driver may return -EOPNOTSUPP from the commit phase of a STP state, which triggers a WARN() from switchdev. This happened on a 6185 switch which does not support hardware bridging. Fixes: 3563606258cf ("switchdev: convert STP update to switchdev attr set") Reported-by: Andrew Lunn Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/slave.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/dsa/slave.c b/net/dsa/slave.c index cce97385f743..7d91f4612ac0 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -458,12 +458,17 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state) static int dsa_slave_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) { - int ret = 0; + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret; switch (attr->id) { case SWITCHDEV_ATTR_PORT_STP_STATE: - if (attr->trans == SWITCHDEV_TRANS_COMMIT) - ret = dsa_slave_stp_update(dev, attr->u.stp_state); + if (attr->trans == SWITCHDEV_TRANS_PREPARE) + ret = ds->drv->port_stp_update ? 0 : -EOPNOTSUPP; + else + ret = ds->drv->port_stp_update(ds, p->port, + attr->u.stp_state); break; default: ret = -EOPNOTSUPP; -- cgit v1.2.3 From b84f78782052ee4516903e5d0566a5eee365b771 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 29 Sep 2015 19:07:07 -0700 Subject: net: Initialize flow flags in input path The fib_table_lookup tracepoint found 2 places where the flowi4_flags is not initialized. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 1 + net/ipv4/route.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 6fcbd215cdbc..690bcbc59f26 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -340,6 +340,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_tun_key.tun_id = 0; + fl4.flowi4_flags = 0; no_addr = idev->ifa_list == NULL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c6ad99ad0ffb..c81deb85acb4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1737,6 +1737,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.flowi4_mark = skb->mark; fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + fl4.flowi4_flags = 0; fl4.daddr = daddr; fl4.saddr = saddr; err = fib_lookup(net, &fl4, &res, 0); -- cgit v1.2.3