From de525be2ca2734865d29c4b67ddd29913b214906 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Thu, 25 Jan 2018 16:14:09 -0800 Subject: bpf: Support passing args to sock_ops bpf function Adds support for passing up to 4 arguments to sock_ops bpf functions. It reusues the reply union, so the bpf_sock_ops structures are not increased in size. Signed-off-by: Lawrence Brakmo Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 406c19d6016b..8d5874c2c4ff 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -952,8 +952,9 @@ struct bpf_map_info { struct bpf_sock_ops { __u32 op; union { - __u32 reply; - __u32 replylong[4]; + __u32 args[4]; /* Optionally passed to bpf program */ + __u32 reply; /* Returned by bpf program */ + __u32 replylong[4]; /* Optionally returned by bpf prog */ }; __u32 family; __u32 remote_ip4; /* Stored in network byte order */ -- cgit v1.2.3 From b13d880721729384757f235166068c315326f4a1 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Thu, 25 Jan 2018 16:14:10 -0800 Subject: bpf: Adds field bpf_sock_ops_cb_flags to tcp_sock Adds field bpf_sock_ops_cb_flags to tcp_sock and bpf_sock_ops. Its primary use is to determine if there should be calls to sock_ops bpf program at various points in the TCP code. The field is initialized to zero, disabling the calls. A sock_ops BPF program can set it, per connection and as necessary, when the connection is established. It also adds support for reading and writting the field within a sock_ops BPF program. Reading is done by accessing the field directly. However, writing is done through the helper function bpf_sock_ops_cb_flags_set, in order to return an error if a BPF program is trying to set a callback that is not supported in the current kernel (i.e. running an older kernel). The helper function returns 0 if it was able to set all of the bits set in the argument, a positive number containing the bits that could not be set, or -EINVAL if the socket is not a full TCP socket. Examples of where one could call the bpf program: 1) When RTO fires 2) When a packet is retransmitted 3) When the connection terminates 4) When a packet is sent 5) When a packet is received Signed-off-by: Lawrence Brakmo Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8d5874c2c4ff..aa128407c44d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -642,6 +642,14 @@ union bpf_attr { * @optlen: length of optval in bytes * Return: 0 or negative error * + * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags) + * Set callback flags for sock_ops + * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct + * @flags: flags value + * Return: 0 for no error + * -EINVAL if there is no full tcp socket + * bits in flags that are not supported by current kernel + * * int bpf_skb_adjust_room(skb, len_diff, mode, flags) * Grow or shrink room in sk_buff. * @skb: pointer to skb @@ -748,7 +756,8 @@ union bpf_attr { FN(perf_event_read_value), \ FN(perf_prog_read_value), \ FN(getsockopt), \ - FN(override_return), + FN(override_return), \ + FN(sock_ops_cb_flags_set), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -969,8 +978,14 @@ struct bpf_sock_ops { */ __u32 snd_cwnd; __u32 srtt_us; /* Averaged RTT << 3 in usecs */ + __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ }; +/* Definitions for bpf_sock_ops_cb_flags */ +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0 /* Mask of all currently + * supported cb flags + */ + /* List of known BPF sock_ops operators. * New entries can only be added at the end */ -- cgit v1.2.3 From f89013f66d0f1a0dad44c513318efb706399a36b Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Thu, 25 Jan 2018 16:14:11 -0800 Subject: bpf: Add sock_ops RTO callback Adds an optional call to sock_ops BPF program based on whether the BPF_SOCK_OPS_RTO_CB_FLAG is set in bpf_sock_ops_flags. The BPF program is passed 2 arguments: icsk_retransmits and whether the RTO has expired. Signed-off-by: Lawrence Brakmo Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index aa128407c44d..c8cecf9cf5bd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -982,7 +982,8 @@ struct bpf_sock_ops { }; /* Definitions for bpf_sock_ops_cb_flags */ -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0 /* Mask of all currently +#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x1 /* Mask of all currently * supported cb flags */ @@ -1019,6 +1020,11 @@ enum { * a congestion threshold. RTTs above * this indicate congestion */ + BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered. + * Arg1: value of icsk_retransmits + * Arg2: value of icsk_rto + * Arg3: whether RTO has expired + */ }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -- cgit v1.2.3 From 44f0e43037d3a17b043843ba67610ac7c7e37db6 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Thu, 25 Jan 2018 16:14:12 -0800 Subject: bpf: Add support for reading sk_state and more Add support for reading many more tcp_sock fields state, same as sk->sk_state rtt_min same as sk->rtt_min.s[0].v (current rtt_min) snd_ssthresh rcv_nxt snd_nxt snd_una mss_cache ecn_flags rate_delivered rate_interval_us packets_out retrans_out total_retrans segs_in data_segs_in segs_out data_segs_out lost_out sacked_out sk_txhash bytes_received (__u64) bytes_acked (__u64) Signed-off-by: Lawrence Brakmo Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c8cecf9cf5bd..46520eae37fa 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -979,6 +979,28 @@ struct bpf_sock_ops { __u32 snd_cwnd; __u32 srtt_us; /* Averaged RTT << 3 in usecs */ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ + __u32 state; + __u32 rtt_min; + __u32 snd_ssthresh; + __u32 rcv_nxt; + __u32 snd_nxt; + __u32 snd_una; + __u32 mss_cache; + __u32 ecn_flags; + __u32 rate_delivered; + __u32 rate_interval_us; + __u32 packets_out; + __u32 retrans_out; + __u32 total_retrans; + __u32 segs_in; + __u32 data_segs_in; + __u32 segs_out; + __u32 data_segs_out; + __u32 lost_out; + __u32 sacked_out; + __u32 sk_txhash; + __u64 bytes_received; + __u64 bytes_acked; }; /* Definitions for bpf_sock_ops_cb_flags */ -- cgit v1.2.3 From a31ad29e6a30cb0b9084a9425b819cdcd97273ce Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Thu, 25 Jan 2018 16:14:14 -0800 Subject: bpf: Add BPF_SOCK_OPS_RETRANS_CB Adds support for calling sock_ops BPF program when there is a retransmission. Three arguments are used; one for the sequence number, another for the number of segments retransmitted, and the last one for the return value of tcp_transmit_skb (0 => success). Does not include syn-ack retransmissions. New op: BPF_SOCK_OPS_RETRANS_CB. Signed-off-by: Lawrence Brakmo Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 46520eae37fa..31c93a0bdbc2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1005,7 +1005,8 @@ struct bpf_sock_ops { /* Definitions for bpf_sock_ops_cb_flags */ #define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x1 /* Mask of all currently +#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x3 /* Mask of all currently * supported cb flags */ @@ -1047,6 +1048,12 @@ enum { * Arg2: value of icsk_rto * Arg3: whether RTO has expired */ + BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted. + * Arg1: sequence number of 1st byte + * Arg2: # segments + * Arg3: return value of + * tcp_transmit_skb (0 => success) + */ }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -- cgit v1.2.3 From d44874910a26f3a8f81edf873a2473363f07f660 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Thu, 25 Jan 2018 16:14:15 -0800 Subject: bpf: Add BPF_SOCK_OPS_STATE_CB Adds support for calling sock_ops BPF program when there is a TCP state change. Two arguments are used; one for the old state and another for the new state. There is a new enum in include/uapi/linux/bpf.h that exports the TCP states that prepends BPF_ to the current TCP state names. If it is ever necessary to change the internal TCP state values (other than adding more to the end), then it will become necessary to convert from the internal TCP state value to the BPF value before calling the BPF sock_ops function. There are a set of compile checks added in tcp.c to detect if the internal and BPF values differ so we can make the necessary fixes. New op: BPF_SOCK_OPS_STATE_CB. Signed-off-by: Lawrence Brakmo Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 31c93a0bdbc2..db6bdc375126 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1006,7 +1006,8 @@ struct bpf_sock_ops { /* Definitions for bpf_sock_ops_cb_flags */ #define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) #define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x3 /* Mask of all currently +#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently * supported cb flags */ @@ -1054,6 +1055,32 @@ enum { * Arg3: return value of * tcp_transmit_skb (0 => success) */ + BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state. + * Arg1: old_state + * Arg2: new_state + */ +}; + +/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect + * changes between the TCP and BPF versions. Ideally this should never happen. + * If it does, we need to add code to convert them before calling + * the BPF sock_ops function. + */ +enum { + BPF_TCP_ESTABLISHED = 1, + BPF_TCP_SYN_SENT, + BPF_TCP_SYN_RECV, + BPF_TCP_FIN_WAIT1, + BPF_TCP_FIN_WAIT2, + BPF_TCP_TIME_WAIT, + BPF_TCP_CLOSE, + BPF_TCP_CLOSE_WAIT, + BPF_TCP_LAST_ACK, + BPF_TCP_LISTEN, + BPF_TCP_CLOSING, /* Now a valid state */ + BPF_TCP_NEW_SYN_RECV, + + BPF_TCP_MAX_STATES /* Leave at the end! */ }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -- cgit v1.2.3