summaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/options.c54
-rw-r--r--net/mptcp/pm.c20
-rw-r--r--net/mptcp/pm_kernel.c2
-rw-r--r--net/mptcp/protocol.c78
-rw-r--r--net/mptcp/protocol.h3
5 files changed, 122 insertions, 35 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 1103b3341a70..f24ae7d40e88 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -838,8 +838,11 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
opts->suboptions = 0;
+ /* Force later mptcp_write_options(), but do not use any actual
+ * option space.
+ */
if (unlikely(__mptcp_check_fallback(msk) && !mptcp_check_infinite_map(skb)))
- return false;
+ return true;
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
if (mptcp_established_options_fastclose(sk, &opt_size, remaining, opts) ||
@@ -1041,6 +1044,31 @@ static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
WRITE_ONCE(msk->snd_una, new_snd_una);
}
+static void rwin_update(struct mptcp_sock *msk, struct sock *ssk,
+ struct sk_buff *skb)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct tcp_sock *tp = tcp_sk(ssk);
+ u64 mptcp_rcv_wnd;
+
+ /* Avoid touching extra cachelines if TCP is going to accept this
+ * skb without filling the TCP-level window even with a possibly
+ * outdated mptcp-level rwin.
+ */
+ if (!skb->len || skb->len < tcp_receive_window(tp))
+ return;
+
+ mptcp_rcv_wnd = atomic64_read(&msk->rcv_wnd_sent);
+ if (!after64(mptcp_rcv_wnd, subflow->rcv_wnd_sent))
+ return;
+
+ /* Some other subflow grew the mptcp-level rwin since rcv_wup,
+ * resync.
+ */
+ tp->rcv_wnd += mptcp_rcv_wnd - subflow->rcv_wnd_sent;
+ subflow->rcv_wnd_sent = mptcp_rcv_wnd;
+}
+
static void ack_update_msk(struct mptcp_sock *msk,
struct sock *ssk,
struct mptcp_options_received *mp_opt)
@@ -1208,6 +1236,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
*/
if (mp_opt.use_ack)
ack_update_msk(msk, sk, &mp_opt);
+ rwin_update(msk, sk, skb);
/* Zero-data-length packets are dropped by the caller and not
* propagated to the MPTCP layer, so the skb extension does not
@@ -1294,6 +1323,10 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
if (rcv_wnd_new != rcv_wnd_old) {
raise_win:
+ /* The msk-level rcv wnd is after the tcp level one,
+ * sync the latter.
+ */
+ rcv_wnd_new = rcv_wnd_old;
win = rcv_wnd_old - ack_seq;
tp->rcv_wnd = min_t(u64, win, U32_MAX);
new_win = tp->rcv_wnd;
@@ -1317,6 +1350,21 @@ raise_win:
update_wspace:
WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
+ subflow->rcv_wnd_sent = rcv_wnd_new;
+}
+
+static void mptcp_track_rwin(struct tcp_sock *tp)
+{
+ const struct sock *ssk = (const struct sock *)tp;
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk;
+
+ if (!ssk)
+ return;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ msk = mptcp_sk(subflow->conn);
+ WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
}
__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
@@ -1611,6 +1659,10 @@ mp_rst:
opts->reset_transient,
opts->reset_reason);
return;
+ } else if (unlikely(!opts->suboptions)) {
+ /* Fallback to TCP */
+ mptcp_track_rwin(tp);
+ return;
}
if (OPTION_MPTCP_PRIO & opts->suboptions) {
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 2ff1b9499568..9604b91902b8 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -18,6 +18,7 @@ struct mptcp_pm_add_entry {
u8 retrans_times;
struct timer_list add_timer;
struct mptcp_sock *sock;
+ struct rcu_head rcu;
};
static DEFINE_SPINLOCK(mptcp_pm_list_lock);
@@ -155,7 +156,7 @@ bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk,
entry = mptcp_pm_del_add_timer(msk, addr, false);
ret = entry;
- kfree(entry);
+ kfree_rcu(entry, rcu);
return ret;
}
@@ -345,22 +346,27 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
{
struct mptcp_pm_add_entry *entry;
struct sock *sk = (struct sock *)msk;
- struct timer_list *add_timer = NULL;
+ bool stop_timer = false;
+
+ rcu_read_lock();
spin_lock_bh(&msk->pm.lock);
entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
if (entry && (!check_id || entry->addr.id == addr->id)) {
entry->retrans_times = ADD_ADDR_RETRANS_MAX;
- add_timer = &entry->add_timer;
+ stop_timer = true;
}
if (!check_id && entry)
list_del(&entry->list);
spin_unlock_bh(&msk->pm.lock);
- /* no lock, because sk_stop_timer_sync() is calling timer_delete_sync() */
- if (add_timer)
- sk_stop_timer_sync(sk, add_timer);
+ /* Note: entry might have been removed by another thread.
+ * We hold rcu_read_lock() to ensure it is not freed under us.
+ */
+ if (stop_timer)
+ sk_stop_timer_sync(sk, &entry->add_timer);
+ rcu_read_unlock();
return entry;
}
@@ -415,7 +421,7 @@ static void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
list_for_each_entry_safe(entry, tmp, &free_list, list) {
sk_stop_timer_sync(sk, &entry->add_timer);
- kfree(entry);
+ kfree_rcu(entry, rcu);
}
}
diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c
index 2ae95476dba3..0a50fd5edc06 100644
--- a/net/mptcp/pm_kernel.c
+++ b/net/mptcp/pm_kernel.c
@@ -672,7 +672,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id)
{
- if (rm_id && WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) {
+ if (rm_id && !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) {
u8 limit_add_addr_accepted =
mptcp_pm_get_limit_add_addr_accepted(msk);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 90b4aeca2596..33b5dce431c2 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -78,6 +78,13 @@ bool __mptcp_try_fallback(struct mptcp_sock *msk, int fb_mib)
if (__mptcp_check_fallback(msk))
return true;
+ /* The caller possibly is not holding the msk socket lock, but
+ * in the fallback case only the current subflow is touching
+ * the OoO queue.
+ */
+ if (!RB_EMPTY_ROOT(&msk->out_of_order_queue))
+ return false;
+
spin_lock_bh(&msk->fallback_lock);
if (!msk->allow_infinite_fallback) {
spin_unlock_bh(&msk->fallback_lock);
@@ -937,14 +944,19 @@ static void mptcp_reset_rtx_timer(struct sock *sk)
bool mptcp_schedule_work(struct sock *sk)
{
- if (inet_sk_state_load(sk) != TCP_CLOSE &&
- schedule_work(&mptcp_sk(sk)->work)) {
- /* each subflow already holds a reference to the sk, and the
- * workqueue is invoked by a subflow, so sk can't go away here.
- */
- sock_hold(sk);
+ if (inet_sk_state_load(sk) == TCP_CLOSE)
+ return false;
+
+ /* Get a reference on this socket, mptcp_worker() will release it.
+ * As mptcp_worker() might complete before us, we can not avoid
+ * a sock_hold()/sock_put() if schedule_work() returns false.
+ */
+ sock_hold(sk);
+
+ if (schedule_work(&mptcp_sk(sk)->work))
return true;
- }
+
+ sock_put(sk);
return false;
}
@@ -2399,7 +2411,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
/* flags for __mptcp_close_ssk() */
#define MPTCP_CF_PUSH BIT(1)
-#define MPTCP_CF_FASTCLOSE BIT(2)
/* be sure to send a reset only if the caller asked for it, also
* clean completely the subflow status when the subflow reaches
@@ -2410,7 +2421,7 @@ static void __mptcp_subflow_disconnect(struct sock *ssk,
unsigned int flags)
{
if (((1 << ssk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
- (flags & MPTCP_CF_FASTCLOSE)) {
+ subflow->send_fastclose) {
/* The MPTCP code never wait on the subflow sockets, TCP-level
* disconnect should never fail
*/
@@ -2457,14 +2468,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
- if ((flags & MPTCP_CF_FASTCLOSE) && !__mptcp_check_fallback(msk)) {
- /* be sure to force the tcp_close path
- * to generate the egress reset
- */
- ssk->sk_lingertime = 0;
- sock_set_flag(ssk, SOCK_LINGER);
- subflow->send_fastclose = 1;
- }
+ if (subflow->send_fastclose && ssk->sk_state != TCP_CLOSE)
+ tcp_set_state(ssk, TCP_CLOSE);
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
@@ -2560,7 +2565,8 @@ static void __mptcp_close_subflow(struct sock *sk)
if (ssk_state != TCP_CLOSE &&
(ssk_state != TCP_CLOSE_WAIT ||
- inet_sk_state_load(sk) != TCP_ESTABLISHED))
+ inet_sk_state_load(sk) != TCP_ESTABLISHED ||
+ __mptcp_check_fallback(msk)))
continue;
/* 'subflow_data_ready' will re-sched once rx queue is empty */
@@ -2768,9 +2774,26 @@ static void mptcp_do_fastclose(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(sk);
mptcp_set_state(sk, TCP_CLOSE);
- mptcp_for_each_subflow_safe(msk, subflow, tmp)
- __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
- subflow, MPTCP_CF_FASTCLOSE);
+
+ /* Explicitly send the fastclose reset as need */
+ if (__mptcp_check_fallback(msk))
+ return;
+
+ mptcp_for_each_subflow_safe(msk, subflow, tmp) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ lock_sock(ssk);
+
+ /* Some subflow socket states don't allow/need a reset.*/
+ if ((1 << ssk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ goto unlock;
+
+ subflow->send_fastclose = 1;
+ tcp_send_active_reset(ssk, ssk->sk_allocation,
+ SK_RST_REASON_TCP_ABORT_ON_CLOSE);
+unlock:
+ release_sock(ssk);
+ }
}
static void mptcp_worker(struct work_struct *work)
@@ -2797,7 +2820,11 @@ static void mptcp_worker(struct work_struct *work)
__mptcp_close_subflow(sk);
if (mptcp_close_tout_expired(sk)) {
+ struct mptcp_subflow_context *subflow, *tmp;
+
mptcp_do_fastclose(sk);
+ mptcp_for_each_subflow_safe(msk, subflow, tmp)
+ __mptcp_close_ssk(sk, subflow->tcp_sock, subflow, 0);
mptcp_close_wake_up(sk);
}
@@ -3222,7 +3249,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
/* msk->subflow is still intact, the following will not free the first
* subflow
*/
- mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
+ mptcp_do_fastclose(sk);
+ mptcp_destroy_common(msk);
/* The first subflow is already in TCP_CLOSE status, the following
* can't overlap with a fallback anymore
@@ -3401,7 +3429,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
}
-void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
+void mptcp_destroy_common(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow, *tmp;
struct sock *sk = (struct sock *)msk;
@@ -3410,7 +3438,7 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
/* join list will be eventually flushed (with rst) at sock lock release time */
mptcp_for_each_subflow_safe(msk, subflow, tmp)
- __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, flags);
+ __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, 0);
__skb_queue_purge(&sk->sk_receive_queue);
skb_rbtree_purge(&msk->out_of_order_queue);
@@ -3428,7 +3456,7 @@ static void mptcp_destroy(struct sock *sk)
/* allow the following to close even the initial subflow */
msk->free_first = 1;
- mptcp_destroy_common(msk, 0);
+ mptcp_destroy_common(msk);
sk_sockets_allocated_dec(sk);
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 379a88e14e8d..6ca97096607c 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -509,6 +509,7 @@ struct mptcp_subflow_context {
u64 remote_key;
u64 idsn;
u64 map_seq;
+ u64 rcv_wnd_sent;
u32 snd_isn;
u32 token;
u32 rel_write_seq;
@@ -976,7 +977,7 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
local_bh_enable();
}
-void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);
+void mptcp_destroy_common(struct mptcp_sock *msk);
#define MPTCP_TOKEN_MAX_RETRIES 4