summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-03-17 22:02:53 -0400
committerDavid S. Miller <davem@davemloft.net>2015-03-17 22:02:53 -0400
commit5284143057708af297eea10812a67d18e42e9abe (patch)
tree798f475f48fbdff0eae359c3f345a1a0eaf83ca2
parent9f2dbdd9b11d40f5fe0749eb91cd1cfc86fde575 (diff)
parent0470c8ca1d57927f2cc3e1d5add1fb2834609447 (diff)
Merge branch 'listener_refactor_part_12'
Eric Dumazet says: ==================== inet: tcp listener refactoring, part 12 By adding a pointer back to listener, we are preparing synack rtx handling to no longer be governed by listener keepalive timer, as this is the most problematic source of contention on listener spinlock. Note that TCP FastOpen had such pointer anyway, so we make it generic. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/tcp.h2
-rw-r--r--include/net/inet_connection_sock.h5
-rw-r--r--include/net/inet_sock.h22
-rw-r--r--include/net/request_sock.h23
-rw-r--r--net/core/request_sock.c18
-rw-r--r--net/dccp/ipv4.c3
-rw-r--r--net/dccp/ipv6.c3
-rw-r--r--net/ipv4/inet_connection_sock.c13
-rw-r--r--net/ipv4/syncookies.c16
-rw-r--r--net/ipv4/tcp_fastopen.c8
-rw-r--r--net/ipv4/tcp_input.c25
-rw-r--r--net/ipv6/syncookies.c12
12 files changed, 77 insertions, 73 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 97dbf16f7d9d..f869ae8afbaf 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -111,7 +111,7 @@ struct tcp_request_sock_ops;
struct tcp_request_sock {
struct inet_request_sock req;
const struct tcp_request_sock_ops *af_specific;
- struct sock *listener; /* needed for TFO */
+ bool tfo_listener;
u32 rcv_isn;
u32 snt_isn;
u32 snt_synack; /* synack sent time */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 191feec60205..b9a6b0a94cc6 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -275,11 +275,6 @@ static inline void inet_csk_reqsk_queue_add(struct sock *sk,
struct sock *child)
{
reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
- /* before letting lookups find us, make sure all req fields
- * are committed to memory.
- */
- smp_wmb();
- atomic_set(&req->rsk_refcnt, 1);
}
void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index c9ed91891887..b6c3737da4e9 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -81,7 +81,6 @@ struct inet_request_sock {
#define ir_cookie req.__req_common.skc_cookie
#define ireq_net req.__req_common.skc_net
#define ireq_state req.__req_common.skc_state
-#define ireq_refcnt req.__req_common.skc_refcnt
#define ireq_family req.__req_common.skc_family
kmemcheck_bitfield_begin(flags);
@@ -244,25 +243,8 @@ static inline unsigned int __inet_ehashfn(const __be32 laddr,
initval);
}
-static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops)
-{
- struct request_sock *req = reqsk_alloc(ops);
- struct inet_request_sock *ireq = inet_rsk(req);
-
- if (req != NULL) {
- kmemcheck_annotate_bitfield(ireq, flags);
- ireq->opt = NULL;
- atomic64_set(&ireq->ir_cookie, 0);
- ireq->ireq_state = TCP_NEW_SYN_RECV;
-
- /* Following is temporary. It is coupled with debugging
- * helpers in reqsk_put() & reqsk_free()
- */
- atomic_set(&ireq->ireq_refcnt, 0);
- }
-
- return req;
-}
+struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
+ struct sock *sk_listener);
static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
{
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 56dc2faba47e..3fa4f824900a 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -52,6 +52,7 @@ struct request_sock {
#define rsk_refcnt __req_common.skc_refcnt
struct request_sock *dl_next;
+ struct sock *rsk_listener;
u16 mss;
u8 num_retrans; /* number of retransmits */
u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */
@@ -67,13 +68,21 @@ struct request_sock {
u32 peer_secid;
};
-static inline struct request_sock *reqsk_alloc(const struct request_sock_ops *ops)
+static inline struct request_sock *
+reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener)
{
struct request_sock *req = kmem_cache_alloc(ops->slab, GFP_ATOMIC);
- if (req != NULL)
+ if (req) {
req->rsk_ops = ops;
-
+ sock_hold(sk_listener);
+ req->rsk_listener = sk_listener;
+
+ /* Following is temporary. It is coupled with debugging
+ * helpers in reqsk_put() & reqsk_free()
+ */
+ atomic_set(&req->rsk_refcnt, 0);
+ }
return req;
}
@@ -88,6 +97,8 @@ static inline void reqsk_free(struct request_sock *req)
WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0);
req->rsk_ops->destructor(req);
+ if (req->rsk_listener)
+ sock_put(req->rsk_listener);
kmem_cache_free(req->rsk_ops->slab, req);
}
@@ -286,6 +297,12 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
req->sk = NULL;
req->dl_next = lopt->syn_table[hash];
+ /* before letting lookups find us, make sure all req fields
+ * are committed to memory and refcnt initialized.
+ */
+ smp_wmb();
+ atomic_set(&req->rsk_refcnt, 1);
+
write_lock(&queue->syn_wait_lock);
lopt->syn_table[hash] = req;
write_unlock(&queue->syn_wait_lock);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index e910317ef6d9..cc39a2aa663a 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -153,24 +153,22 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
* case might also exist in tcp_v4_hnd_req() that will trigger this locking
* order.
*
- * When a TFO req is created, it needs to sock_hold its listener to prevent
- * the latter data structure from going away.
- *
- * This function also sets "treq->listener" to NULL and unreference listener
- * socket. treq->listener is used by the listener so it is protected by the
+ * This function also sets "treq->tfo_listener" to false.
+ * treq->tfo_listener is used by the listener so it is protected by the
* fastopenq->lock in this function.
*/
void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
bool reset)
{
- struct sock *lsk = tcp_rsk(req)->listener;
- struct fastopen_queue *fastopenq =
- inet_csk(lsk)->icsk_accept_queue.fastopenq;
+ struct sock *lsk = req->rsk_listener;
+ struct fastopen_queue *fastopenq;
+
+ fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq;
tcp_sk(sk)->fastopen_rsk = NULL;
spin_lock_bh(&fastopenq->lock);
fastopenq->qlen--;
- tcp_rsk(req)->listener = NULL;
+ tcp_rsk(req)->tfo_listener = false;
if (req->sk) /* the child socket hasn't been accepted yet */
goto out;
@@ -179,7 +177,6 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
* special RST handling below.
*/
spin_unlock_bh(&fastopenq->lock);
- sock_put(lsk);
reqsk_put(req);
return;
}
@@ -201,5 +198,4 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
fastopenq->qlen++;
out:
spin_unlock_bh(&fastopenq->lock);
- sock_put(lsk);
}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 7f6456afbaec..bf897829f4f0 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -624,7 +624,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
- req = inet_reqsk_alloc(&dccp_request_sock_ops);
+ req = inet_reqsk_alloc(&dccp_request_sock_ops, sk);
if (req == NULL)
goto drop;
@@ -641,7 +641,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
ireq = inet_rsk(req);
ireq->ir_loc_addr = ip_hdr(skb)->daddr;
ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
- write_pnet(&ireq->ireq_net, sock_net(sk));
ireq->ireq_family = AF_INET;
ireq->ir_iif = sk->sk_bound_dev_if;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 5166b0043f95..d7e7c7b0a3f1 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -386,7 +386,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
- req = inet_reqsk_alloc(&dccp6_request_sock_ops);
+ req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk);
if (req == NULL)
goto drop;
@@ -403,7 +403,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
ireq = inet_rsk(req);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
- write_pnet(&ireq->ireq_net, sock_net(sk));
ireq->ireq_family = AF_INET6;
if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 3390ba6f96b2..f0f91858aecf 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -293,8 +293,8 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
- struct sock *newsk;
struct request_sock *req;
+ struct sock *newsk;
int error;
lock_sock(sk);
@@ -323,9 +323,11 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
newsk = req->sk;
sk_acceptq_removed(sk);
- if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) {
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ tcp_rsk(req)->tfo_listener &&
+ queue->fastopenq) {
spin_lock_bh(&queue->fastopenq->lock);
- if (tcp_rsk(req)->listener) {
+ if (tcp_rsk(req)->tfo_listener) {
/* We are still waiting for the final ACK from 3WHS
* so can't free req now. Instead, we set req->sk to
* NULL to signify that the child socket is taken
@@ -817,9 +819,9 @@ void inet_csk_listen_stop(struct sock *sk)
percpu_counter_inc(sk->sk_prot->orphan_count);
- if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) {
+ if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
BUG_ON(tcp_sk(child)->fastopen_rsk != req);
- BUG_ON(sk != tcp_rsk(req)->listener);
+ BUG_ON(sk != req->rsk_listener);
/* Paranoid, to prevent race condition if
* an inbound pkt destined for child is
@@ -828,7 +830,6 @@ void inet_csk_listen_stop(struct sock *sk)
* tcp_v4_destroy_sock().
*/
tcp_sk(child)->fastopen_rsk = NULL;
- sock_put(sk);
}
inet_csk_destroy_sock(child);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5ae0c49f5e2e..34e755403715 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -227,11 +227,12 @@ static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct sock *child;
child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
- if (child)
+ if (child) {
+ atomic_set(&req->rsk_refcnt, 1);
inet_csk_reqsk_queue_add(sk, req, child);
- else
+ } else {
reqsk_free(req);
-
+ }
return child;
}
@@ -325,7 +326,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
+ req = inet_reqsk_alloc(&tcp_request_sock_ops, sk); /* for safety */
if (!req)
goto out;
@@ -345,8 +346,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
- treq->listener = NULL;
- write_pnet(&ireq->ireq_net, sock_net(sk));
+ treq->tfo_listener = false;
ireq->ireq_family = AF_INET;
ireq->ir_iif = sk->sk_bound_dev_if;
@@ -357,7 +357,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->opt = tcp_v4_save_options(skb);
if (security_inet_conn_request(sk, skb, req)) {
- reqsk_put(req);
+ reqsk_free(req);
goto out;
}
@@ -378,7 +378,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
security_req_classify_flow(req, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(sock_net(sk), &fl4);
if (IS_ERR(rt)) {
- reqsk_put(req);
+ reqsk_free(req);
goto out;
}
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 84381319e1bc..82e375a0cbcf 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -155,12 +155,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
tp = tcp_sk(child);
tp->fastopen_rsk = req;
- /* Do a hold on the listner sk so that if the listener is being
- * closed, the child that has been accepted can live on and still
- * access listen_lock.
- */
- sock_hold(sk);
- tcp_rsk(req)->listener = sk;
+ tcp_rsk(req)->tfo_listener = true;
/* RFC1323: The window in SYN & SYN/ACK segments is never
* scaled. So correct it appropriately.
@@ -174,6 +169,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
TCP_TIMEOUT_INIT, TCP_RTO_MAX);
+ atomic_set(&req->rsk_refcnt, 1);
/* Add the child socket directly into the accept queue */
inet_csk_reqsk_queue_add(sk, req, child);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7257eb206c07..1dfbaee3554e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5967,6 +5967,26 @@ static void tcp_openreq_init(struct request_sock *req,
ireq->ir_mark = inet_request_mark(sk, skb);
}
+struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
+ struct sock *sk_listener)
+{
+ struct request_sock *req = reqsk_alloc(ops, sk_listener);
+
+ if (req) {
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ kmemcheck_annotate_bitfield(ireq, flags);
+ ireq->opt = NULL;
+ atomic64_set(&ireq->ir_cookie, 0);
+ ireq->ireq_state = TCP_NEW_SYN_RECV;
+ write_pnet(&ireq->ireq_net, sock_net(sk_listener));
+
+ }
+
+ return req;
+}
+EXPORT_SYMBOL(inet_reqsk_alloc);
+
int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb)
@@ -6004,7 +6024,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop;
}
- req = inet_reqsk_alloc(rsk_ops);
+ req = inet_reqsk_alloc(rsk_ops, sk);
if (!req)
goto drop;
@@ -6020,7 +6040,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb, sk);
- write_pnet(&inet_rsk(req)->ireq_net, sock_net(sk));
/* Note: tcp_v6_init_req() might override ir_iif for link locals */
inet_rsk(req)->ir_iif = sk->sk_bound_dev_if;
@@ -6097,7 +6116,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (err || want_cookie)
goto drop_and_free;
- tcp_rsk(req)->listener = NULL;
+ tcp_rsk(req)->tfo_listener = false;
af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 58875ce8e178..da5823e5e5a7 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -49,11 +49,12 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct sock *child;
child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
- if (child)
+ if (child) {
+ atomic_set(&req->rsk_refcnt, 1);
inet_csk_reqsk_queue_add(sk, req, child);
- else
+ } else {
reqsk_free(req);
-
+ }
return child;
}
@@ -189,14 +190,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = inet_reqsk_alloc(&tcp6_request_sock_ops);
+ req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk);
if (!req)
goto out;
ireq = inet_rsk(req);
treq = tcp_rsk(req);
- treq->listener = NULL;
- write_pnet(&ireq->ireq_net, sock_net(sk));
+ treq->tfo_listener = false;
ireq->ireq_family = AF_INET6;
if (security_inet_conn_request(sk, skb, req))