summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2024-04-26 15:34:04 +0200
committerPaolo Abeni <pabeni@redhat.com>2024-04-26 15:34:05 +0200
commitd5115a55ffb5253743346ddf628a890417e2935e (patch)
tree3441c70fb71b9bb73af14f90af4c1ece881d4d23 /include
parent15fd021bc4270273d8f4b7f58fdda8a16214a377 (diff)
parentb533fb9cf4f7c6ca2aa255a5a1fdcde49fff2b24 (diff)
Merge branch 'implement-reset-reason-mechanism-to-detect'
Jason Xing says: ==================== Implement reset reason mechanism to detect From: Jason Xing <kernelxing@tencent.com> In production, there are so many cases about why the RST skb is sent but we don't have a very convenient/fast method to detect the exact underlying reasons. RST is implemented in two kinds: passive kind (like tcp_v4_send_reset()) and active kind (like tcp_send_active_reset()). The former can be traced carefully 1) in TCP, with the help of drop reasons, which is based on Eric's idea[1], 2) in MPTCP, with the help of reset options defined in RFC 8684. The latter is relatively independent, which should be implemented on our own, such as active reset reasons which can not be replace by skb drop reason or something like this. In this series, I focus on the fundamental implement mostly about how the rstreason mechanism works and give the detailed passive part as an example, not including the active reset part. In future, we can go further and refine those NOT_SPECIFIED reasons. Here are some examples when tracing: <idle>-0 [002] ..s1. 1830.262425: tcp_send_reset: skbaddr=x skaddr=x src=x dest=x state=x reason=NOT_SPECIFIED <idle>-0 [002] ..s1. 1830.262425: tcp_send_reset: skbaddr=x skaddr=x src=x dest=x state=x reason=NO_SOCKET [1] Link: https://lore.kernel.org/all/CANn89iJw8x-LqgsWOeJQQvgVg6DnL5aBRLi10QN2WBdr+X4k=w@mail.gmail.com/ ==================== Link: https://lore.kernel.org/r/20240425031340.46946-1-kerneljasonxing@gmail.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Diffstat (limited to 'include')
-rw-r--r--include/net/request_sock.h4
-rw-r--r--include/net/rstreason.h121
-rw-r--r--include/net/tcp.h3
-rw-r--r--include/trace/events/tcp.h26
4 files changed, 148 insertions, 6 deletions
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 004e651e6067..bdc737832da6 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -18,6 +18,7 @@
#include <linux/refcount.h>
#include <net/sock.h>
+#include <net/rstreason.h>
struct request_sock;
struct sk_buff;
@@ -34,7 +35,8 @@ struct request_sock_ops {
void (*send_ack)(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
void (*send_reset)(const struct sock *sk,
- struct sk_buff *skb);
+ struct sk_buff *skb,
+ enum sk_rst_reason reason);
void (*destructor)(struct request_sock *req);
void (*syn_ack_timeout)(const struct request_sock *req);
};
diff --git a/include/net/rstreason.h b/include/net/rstreason.h
new file mode 100644
index 000000000000..df3b6ac0c9b3
--- /dev/null
+++ b/include/net/rstreason.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _LINUX_RSTREASON_H
+#define _LINUX_RSTREASON_H
+#include <net/dropreason-core.h>
+#include <uapi/linux/mptcp.h>
+
+#define DEFINE_RST_REASON(FN, FNe) \
+ FN(NOT_SPECIFIED) \
+ FN(NO_SOCKET) \
+ FN(MPTCP_RST_EUNSPEC) \
+ FN(MPTCP_RST_EMPTCP) \
+ FN(MPTCP_RST_ERESOURCE) \
+ FN(MPTCP_RST_EPROHIBIT) \
+ FN(MPTCP_RST_EWQ2BIG) \
+ FN(MPTCP_RST_EBADPERF) \
+ FN(MPTCP_RST_EMIDDLEBOX) \
+ FN(ERROR) \
+ FNe(MAX)
+
+/**
+ * enum sk_rst_reason - the reasons of socket reset
+ *
+ * The reasons of sk reset, which are used in DCCP/TCP/MPTCP protocols.
+ *
+ * There are three parts in order:
+ * 1) skb drop reasons: relying on drop reasons for such as passive reset
+ * 2) independent reset reasons: such as active reset reasons
+ * 3) reset reasons in MPTCP: only for MPTCP use
+ */
+enum sk_rst_reason {
+ /* Refer to include/net/dropreason-core.h
+ * Rely on skb drop reasons because it indicates exactly why RST
+ * could happen.
+ */
+ /** @SK_RST_REASON_NOT_SPECIFIED: reset reason is not specified */
+ SK_RST_REASON_NOT_SPECIFIED,
+ /** @SK_RST_REASON_NO_SOCKET: no valid socket that can be used */
+ SK_RST_REASON_NO_SOCKET,
+
+ /* Copy from include/uapi/linux/mptcp.h.
+ * These reset fields will not be changed since they adhere to
+ * RFC 8684. So do not touch them. I'm going to list each definition
+ * of them respectively.
+ */
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EUNSPEC: Unspecified error.
+ * This is the default error; it implies that the subflow is no
+ * longer available. The presence of this option shows that the
+ * RST was generated by an MPTCP-aware device.
+ */
+ SK_RST_REASON_MPTCP_RST_EUNSPEC,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EMPTCP: MPTCP-specific error.
+ * An error has been detected in the processing of MPTCP options.
+ * This is the usual reason code to return in the cases where a RST
+ * is being sent to close a subflow because of an invalid response.
+ */
+ SK_RST_REASON_MPTCP_RST_EMPTCP,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_ERESOURCE: Lack of resources.
+ * This code indicates that the sending host does not have enough
+ * resources to support the terminated subflow.
+ */
+ SK_RST_REASON_MPTCP_RST_ERESOURCE,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EPROHIBIT: Administratively prohibited.
+ * This code indicates that the requested subflow is prohibited by
+ * the policies of the sending host.
+ */
+ SK_RST_REASON_MPTCP_RST_EPROHIBIT,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EWQ2BIG: Too much outstanding data.
+ * This code indicates that there is an excessive amount of data
+ * that needs to be transmitted over the terminated subflow while
+ * having already been acknowledged over one or more other subflows.
+ * This may occur if a path has been unavailable for a short period
+ * and it is more efficient to reset and start again than it is to
+ * retransmit the queued data.
+ */
+ SK_RST_REASON_MPTCP_RST_EWQ2BIG,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EBADPERF: Unacceptable performance.
+ * This code indicates that the performance of this subflow was
+ * too low compared to the other subflows of this Multipath TCP
+ * connection.
+ */
+ SK_RST_REASON_MPTCP_RST_EBADPERF,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EMIDDLEBOX: Middlebox interference.
+ * Middlebox interference has been detected over this subflow,
+ * making MPTCP signaling invalid. For example, this may be sent
+ * if the checksum does not validate.
+ */
+ SK_RST_REASON_MPTCP_RST_EMIDDLEBOX,
+
+ /** @SK_RST_REASON_ERROR: unexpected error happens */
+ SK_RST_REASON_ERROR,
+
+ /**
+ * @SK_RST_REASON_MAX: Maximum of socket reset reasons.
+ * It shouldn't be used as a real 'reason'.
+ */
+ SK_RST_REASON_MAX,
+};
+
+/* Convert skb drop reasons to enum sk_rst_reason type */
+static inline enum sk_rst_reason
+sk_rst_convert_drop_reason(enum skb_drop_reason reason)
+{
+ switch (reason) {
+ case SKB_DROP_REASON_NOT_SPECIFIED:
+ return SK_RST_REASON_NOT_SPECIFIED;
+ case SKB_DROP_REASON_NO_SOCKET:
+ return SK_RST_REASON_NO_SOCKET;
+ default:
+ /* If we don't have our own corresponding reason */
+ return SK_RST_REASON_NOT_SPECIFIED;
+ }
+}
+#endif
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ffc9371fe9de..a9eb21251195 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -670,7 +670,8 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
void tcp_send_probe0(struct sock *);
int tcp_write_wakeup(struct sock *, int mib);
void tcp_send_fin(struct sock *sk);
-void tcp_send_active_reset(struct sock *sk, gfp_t priority);
+void tcp_send_active_reset(struct sock *sk, gfp_t priority,
+ enum sk_rst_reason reason);
int tcp_send_synack(struct sock *);
void tcp_push_one(struct sock *, unsigned int mss_now);
void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 5c04a61a11c2..49b5ee091cf6 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -11,6 +11,7 @@
#include <net/ipv6.h>
#include <net/tcp.h>
#include <linux/sock_diag.h>
+#include <net/rstreason.h>
/*
* tcp event with arguments sk and skb
@@ -74,20 +75,32 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
TP_ARGS(sk, skb)
);
+#undef FN
+#define FN(reason) TRACE_DEFINE_ENUM(SK_RST_REASON_##reason);
+DEFINE_RST_REASON(FN, FN)
+
+#undef FN
+#undef FNe
+#define FN(reason) { SK_RST_REASON_##reason, #reason },
+#define FNe(reason) { SK_RST_REASON_##reason, #reason }
+
/*
* skb of trace_tcp_send_reset is the skb that caused RST. In case of
* active reset, skb should be NULL
*/
TRACE_EVENT(tcp_send_reset,
- TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+ TP_PROTO(const struct sock *sk,
+ const struct sk_buff *skb,
+ const enum sk_rst_reason reason),
- TP_ARGS(sk, skb),
+ TP_ARGS(sk, skb, reason),
TP_STRUCT__entry(
__field(const void *, skbaddr)
__field(const void *, skaddr)
__field(int, state)
+ __field(enum sk_rst_reason, reason)
__array(__u8, saddr, sizeof(struct sockaddr_in6))
__array(__u8, daddr, sizeof(struct sockaddr_in6))
),
@@ -113,14 +126,19 @@ TRACE_EVENT(tcp_send_reset,
*/
TP_STORE_ADDR_PORTS_SKB(skb, th, entry->daddr, entry->saddr);
}
+ __entry->reason = reason;
),
- TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s",
+ TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s reason=%s",
__entry->skbaddr, __entry->skaddr,
__entry->saddr, __entry->daddr,
- __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN")
+ __entry->state ? show_tcp_state_name(__entry->state) : "UNKNOWN",
+ __print_symbolic(__entry->reason, DEFINE_RST_REASON(FN, FNe)))
);
+#undef FN
+#undef FNe
+
/*
* tcp event with arguments sk
*