From 4cded0a3a9c69d3c08b2ab15a7dd8206775142e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 1 Oct 2018 15:02:26 -0700 Subject: tcp/dccp: fix lockdep issue when SYN is backlogged [ Upstream commit 1ad98e9d1bdf4724c0a8532fabd84bf3c457c2bc ] In normal SYN processing, packets are handled without listener lock and in RCU protected ingress path. But syzkaller is known to be able to trick us and SYN packets might be processed in process context, after being queued into socket backlog. In commit 06f877d613be ("tcp/dccp: fix other lockdep splats accessing ireq_opt") I made a very stupid fix, that happened to work mostly because of the regular path being RCU protected. Really the thing protecting ireq->ireq_opt is RCU read lock, and the pseudo request refcnt is not relevant. This patch extends what I did in commit 449809a66c1d ("tcp/dccp: block BH for SYN processing") by adding an extra rcu_read_{lock|unlock} pair in the paths that might be taken when processing SYN from socket backlog (thus possibly in process context) Fixes: 06f877d613be ("tcp/dccp: fix other lockdep splats accessing ireq_opt") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9d0b73aa649f..3f8b5d3c1e59 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5978,11 +5978,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (th->fin) goto discard; /* It is possible that we process SYN packets from backlog, - * so we need to make sure to disable BH right there. + * so we need to make sure to disable BH and RCU right there. */ + rcu_read_lock(); local_bh_disable(); acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; local_bh_enable(); + rcu_read_unlock(); if (!acceptable) return 1; -- cgit v1.2.3 From b475cf3bf1e8212b0287c6d15249e2c942693ae5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Oct 2018 12:30:12 -0700 Subject: net: add rb_to_skb() and other rb tree helpers Geeralize private netem_rb_to_skb() TCP rtx queue will soon be converted to rb-tree, so we will need skb_rbtree_walk() helpers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller (cherry picked from commit 18a4c0eab2623cc95be98a1e6af1ad18e7695977) Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3f8b5d3c1e59..dbb153c6b21a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4406,7 +4406,7 @@ static void tcp_ofo_queue(struct sock *sk) p = rb_first(&tp->out_of_order_queue); while (p) { - skb = rb_entry(p, struct sk_buff, rbnode); + skb = rb_to_skb(p); if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) break; @@ -4470,7 +4470,7 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - struct rb_node **p, *q, *parent; + struct rb_node **p, *parent; struct sk_buff *skb1; u32 seq, end_seq; bool fragstolen; @@ -4529,7 +4529,7 @@ coalesce_done: parent = NULL; while (*p) { parent = *p; - skb1 = rb_entry(parent, struct sk_buff, rbnode); + skb1 = rb_to_skb(parent); if (before(seq, TCP_SKB_CB(skb1)->seq)) { p = &parent->rb_left; continue; @@ -4574,9 +4574,7 @@ insert: merge_right: /* Remove other segments covered by skb. */ - while ((q = rb_next(&skb->rbnode)) != NULL) { - skb1 = rb_entry(q, struct sk_buff, rbnode); - + while ((skb1 = skb_rb_next(skb)) != NULL) { if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) break; if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { @@ -4591,7 +4589,7 @@ merge_right: tcp_drop(sk, skb1); } /* If there is no skb after us, we are the last_skb ! */ - if (!q) + if (!skb1) tp->ooo_last_skb = skb; add_sack: @@ -4792,7 +4790,7 @@ static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *li if (list) return !skb_queue_is_last(list, skb) ? skb->next : NULL; - return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode); + return skb_rb_next(skb); } static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, @@ -4821,7 +4819,7 @@ static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) while (*p) { parent = *p; - skb1 = rb_entry(parent, struct sk_buff, rbnode); + skb1 = rb_to_skb(parent); if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq)) p = &parent->rb_left; else @@ -4941,19 +4939,12 @@ static void tcp_collapse_ofo_queue(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); u32 range_truesize, sum_tiny = 0; struct sk_buff *skb, *head; - struct rb_node *p; u32 start, end; - p = rb_first(&tp->out_of_order_queue); - skb = rb_entry_safe(p, struct sk_buff, rbnode); + skb = skb_rb_first(&tp->out_of_order_queue); new_range: if (!skb) { - p = rb_last(&tp->out_of_order_queue); - /* Note: This is possible p is NULL here. We do not - * use rb_entry_safe(), as ooo_last_skb is valid only - * if rbtree is not empty. - */ - tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode); + tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue); return; } start = TCP_SKB_CB(skb)->seq; @@ -4961,7 +4952,7 @@ new_range: range_truesize = skb->truesize; for (head = skb;;) { - skb = tcp_skb_next(skb, NULL); + skb = skb_rb_next(skb); /* Range is terminated when we see a gap or when * we are at the queue end. @@ -5017,7 +5008,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk) prev = rb_prev(node); rb_erase(node, &tp->out_of_order_queue); goal -= rb_to_skb(node)->truesize; - tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode)); + tcp_drop(sk, rb_to_skb(node)); if (!prev || goal <= 0) { sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && @@ -5027,7 +5018,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk) } node = prev; } while (node); - tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); + tp->ooo_last_skb = rb_to_skb(prev); /* Reset SACK state. A conforming SACK implementation will * do the same at a timeout based retransmit. When a connection -- cgit v1.2.3