From 5bc1701881e395cec51811d07ec6961f3d1b2612 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 17 May 2017 11:08:01 +0200 Subject: net: sched: introduce multichain support for filters Instead of having only one filter per block, introduce a list of chains for every block. Create chain 0 by default. UAPI is extended so the user can specify which chain he wants to change. If the new attribute is not specified, chain 0 is used. That allows to maintain backward compatibility. If chain does not exist and user wants to manipulate with it, new chain is created with specified index. Also, when last filter is removed from the chain, the chain is destroyed. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index cce061382e40..6487b21b2c1e 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -549,6 +549,7 @@ enum { TCA_STAB, TCA_PAD, TCA_DUMP_INVISIBLE, + TCA_CHAIN, __TCA_MAX }; -- cgit v1.2.3 From db50514f9a9c7ef1f17e9921b1cc0902746872f3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 17 May 2017 11:08:03 +0200 Subject: net: sched: add termination action to allow goto chain Introduce new type of termination action called "goto_chain". This allows user to specify a chain to be processed. This action type is then processed as a return value in tcf_classify loop in similar way as "reclassify" is, only it does not reset to the first filter in chain but rather reset to the first filter of the desired chain. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index d613be3b3239..1b9aa9e6b4fd 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -51,6 +51,7 @@ enum { (((combined) & (~TC_ACT_EXT_VAL_MASK)) == opcode) #define TC_ACT_JUMP __TC_ACT_EXT(1) +#define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2) /* Action type identifiers*/ enum { -- cgit v1.2.3 From b8210a9e4bea6354eccc5d8a50ecc21ea7486dc9 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 19 May 2017 17:52:35 +0200 Subject: net: define receive timestamp filter for NTP Add HWTSTAMP_FILTER_NTP_ALL to the hwtstamp_rx_filters enum for timestamping of NTP packets. There is currently only one driver (phyter) that could support it directly. CC: Richard Cochran CC: Willem de Bruijn Signed-off-by: Miroslav Lichvar Signed-off-by: David S. Miller --- include/uapi/linux/net_tstamp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 464dcca5ed68..0749fb13e517 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -125,6 +125,9 @@ enum hwtstamp_rx_filters { HWTSTAMP_FILTER_PTP_V2_SYNC, /* PTP v2/802.AS1, any layer, Delay_req packet */ HWTSTAMP_FILTER_PTP_V2_DELAY_REQ, + + /* NTP, UDP, all versions and packet modes */ + HWTSTAMP_FILTER_NTP_ALL, }; #endif /* _NET_TIMESTAMPING_H */ -- cgit v1.2.3 From aad9c8c470f2a8321a99eb053630ce0e199558d6 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 19 May 2017 17:52:38 +0200 Subject: net: add new control message for incoming HW-timestamped packets Add SOF_TIMESTAMPING_OPT_PKTINFO option to request a new control message for incoming packets with hardware timestamps. It contains the index of the real interface which received the packet and the length of the packet at layer 2. The index is useful with bonding, bridges and other interfaces, where IP_PKTINFO doesn't allow applications to determine which PHC made the timestamp. With the L2 length (and link speed) it is possible to transpose preamble timestamps to trailer timestamps, which are used in the NTP protocol. While this information could be provided by two new socket options independently from timestamping, it doesn't look like they would be very useful. With this option any performance impact is limited to hardware timestamping. Use dev_get_by_napi_id() to get the device and its index. On kernels with disabled CONFIG_NET_RX_BUSY_POLL or drivers not using NAPI, a zero index will be returned in the control message. CC: Richard Cochran Acked-by: Willem de Bruijn Signed-off-by: Miroslav Lichvar Signed-off-by: David S. Miller --- include/uapi/linux/net_tstamp.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 0749fb13e517..dee74d39da94 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -9,6 +9,7 @@ #ifndef _NET_TIMESTAMPING_H #define _NET_TIMESTAMPING_H +#include #include /* for SO_TIMESTAMPING */ /* SO_TIMESTAMPING gets an integer bit field comprised of these values */ @@ -26,8 +27,9 @@ enum { SOF_TIMESTAMPING_OPT_CMSG = (1<<10), SOF_TIMESTAMPING_OPT_TSONLY = (1<<11), SOF_TIMESTAMPING_OPT_STATS = (1<<12), + SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_STATS, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_PKTINFO, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; @@ -130,4 +132,11 @@ enum hwtstamp_rx_filters { HWTSTAMP_FILTER_NTP_ALL, }; +/* SCM_TIMESTAMPING_PKTINFO control message */ +struct scm_ts_pktinfo { + __u32 if_index; + __u32 pkt_length; + __u32 reserved[2]; +}; + #endif /* _NET_TIMESTAMPING_H */ -- cgit v1.2.3 From b50a5c70ffa4fd6b6da324ab54c84adf48fb17d9 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 19 May 2017 17:52:40 +0200 Subject: net: allow simultaneous SW and HW transmit timestamping Add SOF_TIMESTAMPING_OPT_TX_SWHW option to allow an outgoing packet to be looped to the socket's error queue with a software timestamp even when a hardware transmit timestamp is expected to be provided by the driver. Applications using this option will receive two separate messages from the error queue, one with a software timestamp and the other with a hardware timestamp. As the hardware timestamp is saved to the shared skb info, which may happen before the first message with software timestamp is received by the application, the hardware timestamp is copied to the SCM_TIMESTAMPING control message only when the skb has no software timestamp or it is an incoming packet. While changing sw_tx_timestamp(), inline it in skb_tx_timestamp() as there are no other users. CC: Richard Cochran CC: Willem de Bruijn Signed-off-by: Miroslav Lichvar Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/net_tstamp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index dee74d39da94..3d421d912193 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -28,8 +28,9 @@ enum { SOF_TIMESTAMPING_OPT_TSONLY = (1<<11), SOF_TIMESTAMPING_OPT_STATS = (1<<12), SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13), + SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_PKTINFO, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TX_SWHW, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; -- cgit v1.2.3 From fdfc7dd6ca39b117c709dceee8d32ac4447294d6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 23 May 2017 18:40:45 +0200 Subject: net/sched: flower: add support for matching on tcp flags Benefit from the support of tcp flags dissection and allow user to insert rules matching on tcp flags. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 1b9aa9e6b4fd..c6e8cf5e9c40 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -451,6 +451,9 @@ enum { TCA_FLOWER_KEY_MPLS_TC, /* u8 - 3 bits */ TCA_FLOWER_KEY_MPLS_LABEL, /* be32 - 20 bits */ + TCA_FLOWER_KEY_TCP_FLAGS, /* be16 */ + TCA_FLOWER_KEY_TCP_FLAGS_MASK, /* be16 */ + __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From 0be1b305d9b808e5b28e74f4ef807851c14c39f2 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Thu, 25 May 2017 10:42:38 -0700 Subject: net: ipv4: add new RTM_F_FIB_MATCH flag for use with RTM_GETROUTE This flag when specified will return matched fib result in response to a RTM_GETROUTE query. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 6487b21b2c1e..564790e854f7 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -278,6 +278,7 @@ enum rt_scope_t { #define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */ #define RTM_F_PREFIX 0x800 /* Prefix addresses */ #define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */ +#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */ /* Reserved table identifiers */ -- cgit v1.2.3 From 3d3ea5af5c0b382bc9d9aed378fd814fb5d4a011 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sat, 27 May 2017 10:14:34 -0400 Subject: rtnl: Add support for netdev event to link messages When netdev events happen, a rtnetlink_event() handler will send messages for every event in it's white list. These messages contain current information about a particular device, but they do not include the iformation about which event just happened. So, it is impossible to tell what just happend for these events. This patch adds a new extension to RTM_NEWLINK message called IFLA_EVENT that would have an encoding of event that triggered this message. This would allow the the message consumer to easily determine if it needs to perform certain actions. Signed-off-by: Vladislav Yasevich Acked-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 15ac20382aba..8ed679fe603f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -157,6 +157,7 @@ enum { IFLA_GSO_MAX_SIZE, IFLA_PAD, IFLA_XDP, + IFLA_EVENT, __IFLA_MAX }; @@ -911,4 +912,14 @@ enum { #define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1) +enum { + IFLA_EVENT_NONE, + IFLA_EVENT_REBOOT, /* internal reset / reboot */ + IFLA_EVENT_FEATURES, /* change in offload features */ + IFLA_EVENT_BONDING_FAILOVER, /* change in active slave */ + IFLA_EVENT_NOTIFY_PEERS, /* re-sent grat. arp/ndisc */ + IFLA_EVENT_IGMP_RESEND, /* re-sent IGMP JOIN */ + IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */ +}; + #endif /* _UAPI_LINUX_IF_LINK_H */ -- cgit v1.2.3 From 4d80cc0aaaab9efac14c9d3d702b69961800de20 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 1 Jun 2017 21:37:38 +0300 Subject: net/sched: cls_flower: add support for matching on ip tos and ttl Benefit from the support of ip header fields dissection and allow users to set rules matching on ipv4 tos and ttl or ipv6 traffic-class and hoplimit. Signed-off-by: Or Gerlitz Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index c6e8cf5e9c40..edf43ddf47b0 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -454,6 +454,11 @@ enum { TCA_FLOWER_KEY_TCP_FLAGS, /* be16 */ TCA_FLOWER_KEY_TCP_FLAGS_MASK, /* be16 */ + TCA_FLOWER_KEY_IP_TOS, /* u8 */ + TCA_FLOWER_KEY_IP_TOS_MASK, /* u8 */ + TCA_FLOWER_KEY_IP_TTL, /* u8 */ + TCA_FLOWER_KEY_IP_TTL_MASK, /* u8 */ + __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From b7d3ed5be9bd7e0689eee0f0f36702937cd8f7c8 Mon Sep 17 00:00:00 2001 From: Teng Qin Date: Fri, 2 Jun 2017 21:03:54 -0700 Subject: bpf: update perf event helper functions documentation This commit updates documentation of the bpf_perf_event_output and bpf_perf_event_read helpers to match their implementation. Signed-off-by: Teng Qin Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 94dfa9def355..e78aece03628 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -313,8 +313,11 @@ union bpf_attr { * @flags: room for future extensions * Return: 0 on success or negative error * - * u64 bpf_perf_event_read(&map, index) - * Return: Number events read or error code + * u64 bpf_perf_event_read(map, flags) + * read perf event counter value + * @map: pointer to perf_event_array map + * @flags: index of event in the map or bitmask flags + * Return: value of perf event counter read or error code * * int bpf_redirect(ifindex, flags) * redirect to another netdev @@ -328,11 +331,11 @@ union bpf_attr { * @skb: pointer to skb * Return: realm if != 0 * - * int bpf_perf_event_output(ctx, map, index, data, size) + * int bpf_perf_event_output(ctx, map, flags, data, size) * output perf raw sample * @ctx: struct pt_regs* * @map: pointer to perf_event_array map - * @index: index of event in the map + * @flags: index of event in the map or bitmask flags * @data: data on stack to be output as raw data * @size: size of data * Return: 0 on success or negative error -- cgit v1.2.3 From e25ea21ffa66a029acfa89d2611c0e7ef23e7d8c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 6 Jun 2017 14:12:02 +0200 Subject: net: sched: introduce a TRAP control action There is need to instruct the HW offloaded path to push certain matched packets to cpu/kernel for further analysis. So this patch introduces a new TRAP control action to TC. For kernel datapath, this action does not make much sense. So with the same logic as in HW, new TRAP behaves similar to STOLEN. The skb is just dropped in the datapath (and virtually ejected to an upper level, which does not exist in case of kernel). Signed-off-by: Jiri Pirko Reviewed-by: Yotam Gigi Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index edf43ddf47b0..2055783e6ee9 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -37,6 +37,13 @@ enum { #define TC_ACT_QUEUED 5 #define TC_ACT_REPEAT 6 #define TC_ACT_REDIRECT 7 +#define TC_ACT_TRAP 8 /* For hw path, this means "trap to cpu" + * and don't further process the frame + * in hardware. For sw path, this is + * equivalent of TC_ACT_STOLEN - drop + * the skb and act like everything + * is alright. + */ /* There is a special kind of actions called "extended actions", * which need a value parameter. These have a local opcode located in -- cgit v1.2.3 From 34ad5580f8f9c86cb273ebea25c149613cd1667e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 5 Jun 2017 12:15:48 -0700 Subject: bpf: Add BPF_(PROG|MAP)_GET_NEXT_ID command This patch adds BPF_PROG_GET_NEXT_ID and BPF_MAP_GET_NEXT_ID to allow userspace to iterate all bpf_prog IDs and bpf_map IDs. The API is trying to be consistent with the existing BPF_MAP_GET_NEXT_KEY. It is currently limited to CAP_SYS_ADMIN which we can consider to lift it in followup patches. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e78aece03628..629747a3f273 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -82,6 +82,8 @@ enum bpf_cmd { BPF_PROG_ATTACH, BPF_PROG_DETACH, BPF_PROG_TEST_RUN, + BPF_PROG_GET_NEXT_ID, + BPF_MAP_GET_NEXT_ID, }; enum bpf_map_type { @@ -209,6 +211,11 @@ union bpf_attr { __u32 repeat; __u32 duration; } test; + + struct { /* anonymous struct used by BPF_*_GET_NEXT_ID */ + __u32 start_id; + __u32 next_id; + }; } __attribute__((aligned(8))); /* BPF helper function descriptions: -- cgit v1.2.3 From b16d9aa4c2b90af8d2c3201e245150f8c430c3bc Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 5 Jun 2017 12:15:49 -0700 Subject: bpf: Add BPF_PROG_GET_FD_BY_ID Add BPF_PROG_GET_FD_BY_ID command to allow user to get a fd from a bpf_prog's ID. bpf_prog_inc_not_zero() is added and is called with prog_idr_lock held. __bpf_prog_put() is also added which has the 'bool do_idr_lock' param to decide if the prog_idr_lock should be acquired when freeing the prog->id. In the error path of bpf_prog_inc_not_zero(), it may have to call __bpf_prog_put(map, false) which does not need to take the prog_idr_lock when freeing the prog->id. It is currently limited to CAP_SYS_ADMIN which we can consider to lift it in followup patches. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 629747a3f273..d70cfed19d5e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -84,6 +84,7 @@ enum bpf_cmd { BPF_PROG_TEST_RUN, BPF_PROG_GET_NEXT_ID, BPF_MAP_GET_NEXT_ID, + BPF_PROG_GET_FD_BY_ID, }; enum bpf_map_type { @@ -212,8 +213,11 @@ union bpf_attr { __u32 duration; } test; - struct { /* anonymous struct used by BPF_*_GET_NEXT_ID */ - __u32 start_id; + struct { /* anonymous struct used by BPF_*_GET_*_ID */ + union { + __u32 start_id; + __u32 prog_id; + }; __u32 next_id; }; } __attribute__((aligned(8))); -- cgit v1.2.3 From bd5f5f4ecb78e2698dad655645b6d6a2f7012a8c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 5 Jun 2017 12:15:50 -0700 Subject: bpf: Add BPF_MAP_GET_FD_BY_ID Add BPF_MAP_GET_FD_BY_ID command to allow user to get a fd from a bpf_map's ID. bpf_map_inc_not_zero() is added and is called with map_idr_lock held. __bpf_map_put() is also added which has the 'bool do_idr_lock' param to decide if the map_idr_lock should be acquired when freeing the map->id. In the error path of bpf_map_inc_not_zero(), it may have to call __bpf_map_put(map, false) which does not need to take the map_idr_lock when freeing the map->id. It is currently limited to CAP_SYS_ADMIN which we can consider to lift it in followup patches. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d70cfed19d5e..dd23f47ff00c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -85,6 +85,7 @@ enum bpf_cmd { BPF_PROG_GET_NEXT_ID, BPF_MAP_GET_NEXT_ID, BPF_PROG_GET_FD_BY_ID, + BPF_MAP_GET_FD_BY_ID, }; enum bpf_map_type { @@ -217,6 +218,7 @@ union bpf_attr { union { __u32 start_id; __u32 prog_id; + __u32 map_id; }; __u32 next_id; }; -- cgit v1.2.3 From 1e270976908686ec25fb91b8a34145be54137976 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 5 Jun 2017 12:15:52 -0700 Subject: bpf: Add BPF_OBJ_GET_INFO_BY_FD A single BPF_OBJ_GET_INFO_BY_FD cmd is used to obtain the info for both bpf_prog and bpf_map. The kernel can figure out the fd is associated with a bpf_prog or bpf_map. The suggested struct bpf_prog_info and struct bpf_map_info are not meant to be a complete list and it is not the goal of this patch. New fields can be added in the future patch. The focus of this patch is to create the interface, BPF_OBJ_GET_INFO_BY_FD cmd for exposing the bpf_prog's and bpf_map's info. The obj's info, which will be extended (and get bigger) over time, is separated from the bpf_attr to avoid bloating the bpf_attr. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index dd23f47ff00c..9b2c10b45733 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -86,6 +86,7 @@ enum bpf_cmd { BPF_MAP_GET_NEXT_ID, BPF_PROG_GET_FD_BY_ID, BPF_MAP_GET_FD_BY_ID, + BPF_OBJ_GET_INFO_BY_FD, }; enum bpf_map_type { @@ -222,6 +223,12 @@ union bpf_attr { }; __u32 next_id; }; + + struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ + __u32 bpf_fd; + __u32 info_len; + __aligned_u64 info; + } info; } __attribute__((aligned(8))); /* BPF helper function descriptions: @@ -686,4 +693,25 @@ struct xdp_md { __u32 data_end; }; +#define BPF_TAG_SIZE 8 + +struct bpf_prog_info { + __u32 type; + __u32 id; + __u8 tag[BPF_TAG_SIZE]; + __u32 jited_prog_len; + __u32 xlated_prog_len; + __aligned_u64 jited_prog_insns; + __aligned_u64 xlated_prog_insns; +} __attribute__((aligned(8))); + +struct bpf_map_info { + __u32 type; + __u32 id; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 map_flags; +} __attribute__((aligned(8))); + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 0604475119de5f80dc051a5db055c6a2a75bd542 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Jun 2017 13:29:12 -0700 Subject: tcp: add TCPMemoryPressuresChrono counter DRAM supply shortage and poor memory pressure tracking in TCP stack makes any change in SO_SNDBUF/SO_RCVBUF (or equivalent autotuning limits) and tcp_mem[] quite hazardous. TCPMemoryPressures SNMP counter is an indication of tcp_mem sysctl limits being hit, but only tracking number of transitions. If TCP stack behavior under stress was perfect : 1) It would maintain memory usage close to the limit. 2) Memory pressure state would be entered for short times. We certainly prefer 100 events lasting 10ms compared to one event lasting 200 seconds. This patch adds a new SNMP counter tracking cumulative duration of memory pressure events, given in ms units. $ cat /proc/sys/net/ipv4/tcp_mem 3088 4117 6176 $ grep TCP /proc/net/sockstat TCP: inuse 180 orphan 0 tw 2 alloc 234 mem 4140 $ nstat -n ; sleep 10 ; nstat |grep Pressure TcpExtTCPMemoryPressures 1700 TcpExtTCPMemoryPressuresChrono 5209 v2: Used EXPORT_SYMBOL_GPL() instead of EXPORT_SYMBOL() as David instructed. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 95cffcb21dfd..d85693295798 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -228,6 +228,7 @@ enum LINUX_MIB_TCPABORTONLINGER, /* TCPAbortOnLinger */ LINUX_MIB_TCPABORTFAILED, /* TCPAbortFailed */ LINUX_MIB_TCPMEMORYPRESSURES, /* TCPMemoryPressures */ + LINUX_MIB_TCPMEMORYPRESSURESCHRONO, /* TCPMemoryPressuresChrono */ LINUX_MIB_TCPSACKDISCARD, /* TCPSACKDiscard */ LINUX_MIB_TCPDSACKIGNOREDOLD, /* TCPSACKIgnoredOld */ LINUX_MIB_TCPDSACKIGNOREDNOUNDO, /* TCPSACKIgnoredNoUndo */ -- cgit v1.2.3 From 9fe8bcec0dbc19604acc3a2cd469febf96f0d59a Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 8 Jun 2017 08:44:15 +0200 Subject: net: bridge: Receive notification about successful FDB offload When a new static FDB is added to the bridge a notification is sent to the driver for offload. In case of successful offload the driver should notify the bridge back, which in turn should mark the FDB as offloaded. Currently, externally learned is equivalent for being offloaded which is not correct due to the fact that FDBs which are added from user-space are also marked as externally learned. In order to specify if an FDB was successfully offloaded a new flag is introduced. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index f3d16dbe09d6..3199d28980b3 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -41,6 +41,7 @@ enum { #define NTF_MASTER 0x04 #define NTF_PROXY 0x08 /* == ATF_PUBL */ #define NTF_EXT_LEARNED 0x10 +#define NTF_OFFLOADED 0x20 #define NTF_ROUTER 0x80 /* -- cgit v1.2.3 From 772c344dbb23b2ce4568ac30afae92a842fa6d8f Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 7 Jun 2017 18:02:32 +0300 Subject: net: ipmr: add getlink support Currently there's no way to dump the VIF table for an ipmr table other than the default (via proc). This is a major issue when debugging ipmr issues and in general it is good to know which interfaces are configured. This patch adds support for RTM_GETLINK for the ipmr family so we can dump the VIF table and the ipmr table's current config for each table. We're protected by rtnl so no need to acquire RCU or mrt_lock. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/mroute.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index 1fe4c1e7d66e..f904367c0cee 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -110,6 +110,48 @@ struct igmpmsg { struct in_addr im_src,im_dst; }; +/* ipmr netlink table attributes */ +enum { + IPMRA_TABLE_UNSPEC, + IPMRA_TABLE_ID, + IPMRA_TABLE_CACHE_RES_QUEUE_LEN, + IPMRA_TABLE_MROUTE_REG_VIF_NUM, + IPMRA_TABLE_MROUTE_DO_ASSERT, + IPMRA_TABLE_MROUTE_DO_PIM, + IPMRA_TABLE_VIFS, + __IPMRA_TABLE_MAX +}; +#define IPMRA_TABLE_MAX (__IPMRA_TABLE_MAX - 1) + +/* ipmr netlink vif attribute format + * [ IPMRA_TABLE_VIFS ] - nested attribute + * [ IPMRA_VIF ] - nested attribute + * [ IPMRA_VIFA_xxx ] + */ +enum { + IPMRA_VIF_UNSPEC, + IPMRA_VIF, + __IPMRA_VIF_MAX +}; +#define IPMRA_VIF_MAX (__IPMRA_VIF_MAX - 1) + +/* vif-specific attributes */ +enum { + IPMRA_VIFA_UNSPEC, + IPMRA_VIFA_IFINDEX, + IPMRA_VIFA_VIF_ID, + IPMRA_VIFA_FLAGS, + IPMRA_VIFA_BYTES_IN, + IPMRA_VIFA_BYTES_OUT, + IPMRA_VIFA_PACKETS_IN, + IPMRA_VIFA_PACKETS_OUT, + IPMRA_VIFA_LOCAL_ADDR, + IPMRA_VIFA_REMOTE_ADDR, + IPMRA_VIFA_PAD, + __IPMRA_VIFA_MAX +}; +#define IPMRA_VIFA_MAX (__IPMRA_VIFA_MAX - 1) + /* That's all usermode folks */ #define MFC_ASSERT_THRESH (3*HZ) /* Maximal freq. of asserts */ -- cgit v1.2.3 From ded092cd73c2c56a394b936f86897f29b2e131c0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 11 Jun 2017 00:50:47 +0200 Subject: bpf: add bpf_set_hash helper for tc progs Allow for tc BPF programs to set a skb->hash, apart from clearing and triggering a recalc that we have right now. It allows for BPF to implement a custom hashing routine for skb_get_hash(). Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9b2c10b45733..f94b48b168dc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -513,6 +513,11 @@ union bpf_attr { * Get the owner uid of the socket stored inside sk_buff. * @skb: pointer to skb * Return: uid of the socket owner on success or overflowuid if failed. + * + * u32 bpf_set_hash(skb, hash) + * Set full skb->hash. + * @skb: pointer to skb + * @hash: hash to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -562,7 +567,8 @@ union bpf_attr { FN(xdp_adjust_head), \ FN(probe_read_str), \ FN(get_socket_cookie), \ - FN(get_socket_uid), + FN(get_socket_uid), \ + FN(set_hash), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 91b5ab628929d97357108594610e7c07be93e2fd Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Fri, 9 Jun 2017 13:08:42 +0100 Subject: cfg80211: support 4-way handshake offloading for WPA/WPA2-PSK Let drivers advertise support for station-mode 4-way handshake offloading with a new NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK flag. Extend use of NL80211_ATTR_PMK attribute indicating it might be passed as part of NL80211_CMD_CONNECT command, and contain the PSK (which is the PMK, hence the name.) The driver/device is assumed to handle the 4-way handshake by itself in this case (including key derivations, etc.), instead of relying on the supplicant. This patch is somewhat based on this one (by Vladimir Kondratiev): https://patchwork.kernel.org/patch/1309561/. Signed-off-by: Vladimir Kondratiev Signed-off-by: Eliad Peller Signed-off-by: Luca Coelho [arend.vanspriel@broadcom.com rebase dealing with existing ATTR_PMK] Signed-off-by: Arend van Spriel [reword NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK docs to indicate that this offload might be required] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index b8c44b98f12d..f1f7da25bca4 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -172,6 +172,18 @@ * Multiple such rules can be created. */ +/** + * DOC: WPA/WPA2 EAPOL handshake offload + * + * By setting @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK flag drivers + * can indicate they support offloading EAPOL handshakes for WPA/WPA2 + * preshared key authentication. In %NL80211_CMD_CONNECT the preshared + * key should be specified using %NL80211_ATTR_PMK. Drivers supporting + * this offload may reject the %NL80211_CMD_CONNECT when no preshared + * key material is provided, for example when that driver does not + * support setting the temporal keys through %CMD_NEW_KEY. + */ + /** * DOC: FILS shared key authentication offload * @@ -2080,8 +2092,10 @@ enum nl80211_commands { * identifying the scope of PMKSAs. This is used with * @NL80211_CMD_SET_PMKSA and @NL80211_CMD_DEL_PMKSA. * - * @NL80211_ATTR_PMK: PMK for the PMKSA identified by %NL80211_ATTR_PMKID. - * This is used with @NL80211_CMD_SET_PMKSA. + * @NL80211_ATTR_PMK: attribute for passing PMK key material. Used with + * %NL80211_CMD_SET_PMKSA for the PMKSA identified by %NL80211_ATTR_PMKID. + * For %NL80211_CMD_CONNECT it is used to provide PSK for offloading 4-way + * handshake for WPA/WPA2-PSK networks. * * @NL80211_ATTR_SCHED_SCAN_MULTI: flag attribute which user-space shall use to * indicate that it supports multiple active scheduled scan requests. @@ -4852,6 +4866,9 @@ enum nl80211_feature_flags { * RSSI threshold values to monitor rather than exactly one threshold. * @NL80211_EXT_FEATURE_FILS_SK_OFFLOAD: Driver SME supports FILS shared key * authentication with %NL80211_CMD_CONNECT. + * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK: Device wants to do 4-way + * handshake with PSK in station mode (PSK is passed as part of the connect + * and associate commands), doing it in the host might not be supported. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4872,6 +4889,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI, NL80211_EXT_FEATURE_CQM_RSSI_LIST, NL80211_EXT_FEATURE_FILS_SK_OFFLOAD, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 3a00df5707b6af715e78c26569800e0c2eb615fe Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Fri, 9 Jun 2017 13:08:43 +0100 Subject: cfg80211: support 4-way handshake offloading for 802.1X Add API for setting the PMK to the driver. For FT support, allow setting also the PMK-R0 Name. This can be used by drivers that support 4-Way handshake offload while IEEE802.1X authentication is managed by upper layers. Signed-off-by: Avraham Stern Signed-off-by: Johannes Berg [arend.vanspriel@broadcom.com: add WANT_1X_4WAY_HS attribute] Signed-off-by: Arend van Spriel [reword NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X docs a bit to say that the device may require it] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f1f7da25bca4..073e26850195 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -182,6 +182,17 @@ * this offload may reject the %NL80211_CMD_CONNECT when no preshared * key material is provided, for example when that driver does not * support setting the temporal keys through %CMD_NEW_KEY. + * + * Similarly @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X flag can be + * set by drivers indicating offload support of the PTK/GTK EAPOL + * handshakes during 802.1X authentication. In order to use the offload + * the %NL80211_CMD_CONNECT should have %NL80211_ATTR_WANT_1X_4WAY_HS + * attribute flag. Drivers supporting this offload may reject the + * %NL80211_CMD_CONNECT when the attribute flag is not present. + * + * For 802.1X the PMK or PMK-R0 are set by providing %NL80211_ATTR_PMK + * using %NL80211_CMD_SET_PMK. For offloaded FT support also + * %NL80211_ATTR_PMKR0_NAME must be provided. */ /** @@ -959,6 +970,14 @@ * does not result in a change for the current association. Currently, * only the %NL80211_ATTR_IE data is used and updated with this command. * + * @NL80211_CMD_SET_PMK: For offloaded 4-Way handshake, set the PMK or PMK-R0 + * for the given authenticator address (specified with &NL80211_ATTR_MAC). + * When &NL80211_ATTR_PMKR0_NAME is set, &NL80211_ATTR_PMK specifies the + * PMK-R0, otherwise it specifies the PMK. + * @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously + * configured PMK for the authenticator address identified by + * &NL80211_ATTR_MAC. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1158,6 +1177,9 @@ enum nl80211_commands { NL80211_CMD_UPDATE_CONNECT_PARAMS, + NL80211_CMD_SET_PMK, + NL80211_CMD_DEL_PMK, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2095,13 +2117,20 @@ enum nl80211_commands { * @NL80211_ATTR_PMK: attribute for passing PMK key material. Used with * %NL80211_CMD_SET_PMKSA for the PMKSA identified by %NL80211_ATTR_PMKID. * For %NL80211_CMD_CONNECT it is used to provide PSK for offloading 4-way - * handshake for WPA/WPA2-PSK networks. + * handshake for WPA/WPA2-PSK networks. For 802.1X authentication it is + * used with %NL80211_CMD_SET_PMK. For offloaded FT support this attribute + * specifies the PMK-R0 if NL80211_ATTR_PMKR0_NAME is included as well. * * @NL80211_ATTR_SCHED_SCAN_MULTI: flag attribute which user-space shall use to * indicate that it supports multiple active scheduled scan requests. * @NL80211_ATTR_SCHED_SCAN_MAX_REQS: indicates maximum number of scheduled * scan request that may be active for the device (u32). * + * @NL80211_ATTR_WANT_1X_4WAY_HS: flag attribute which user-space can include + * in %NL80211_CMD_CONNECT to indicate that for 802.1X authentication it + * wants to use the supported offload of the 4-way handshake. + * @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2524,6 +2553,9 @@ enum nl80211_attrs { NL80211_ATTR_SCHED_SCAN_MULTI, NL80211_ATTR_SCHED_SCAN_MAX_REQS, + NL80211_ATTR_WANT_1X_4WAY_HS, + NL80211_ATTR_PMKR0_NAME, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -4869,6 +4901,10 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK: Device wants to do 4-way * handshake with PSK in station mode (PSK is passed as part of the connect * and associate commands), doing it in the host might not be supported. + * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X: Device wants to do doing 4-way + * handshake with 802.1X in station mode (will pass EAP frames to the host + * and accept the set_pmk/del_pmk commands), doing it in the host might not + * be supported. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4890,6 +4926,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_CQM_RSSI_LIST, NL80211_EXT_FEATURE_FILS_SK_OFFLOAD, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From f45cbe6e691fcdeda480ecc9c66533a8277f0ca4 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Fri, 9 Jun 2017 13:08:45 +0100 Subject: nl80211: add authorized flag to ROAM event Drivers that initiate roaming while being connected to a network that uses 802.1X authentication need to inform user space if 802.1X authentication is further required after roaming. For example, when using the Fast transition protocol, roaming within the mobility domain does not require new 802.1X authentication, but roaming to another mobility domain does. In addition, some drivers may not support 802.1X authentication (so it has to be done in user space), while other drivers do. Add a flag to the roaming notification to indicate if user space is required to do 802.1X authentication after the roaming or not. This flag will only be used for networks that use 802.1X authentication. For networks that do not use 802.1X authentication it is assumed that no further action is required from user space after the roaming notification. Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho [arend.vanspriel@broadcom.com reuse NL80211_ATTR_PORT_AUTHORIZED] Signed-off-by: Arend van Spriel [rebase to apply w/o the flag in CONNECT] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 073e26850195..72f15c3fc5a6 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -571,6 +571,12 @@ * well to remain backwards compatible. * @NL80211_CMD_ROAM: request that the card roam (currently not implemented), * sent as an event when the card/driver roamed by itself. + * When used as an event, and the driver roamed in a network that requires + * 802.1X authentication, %NL80211_ATTR_PORT_AUTHORIZED should be set + * if the 802.1X authentication was done by the driver or if roaming was + * done using Fast Transition protocol (in which case 802.1X authentication + * is not needed). If %NL80211_ATTR_PORT_AUTHORIZED is not set, user space + * is responsible for the 802.1X authentication. * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify * userspace that a connection was dropped by the AP or due to other * reasons, for this the %NL80211_ATTR_DISCONNECTED_BY_AP and @@ -2130,6 +2136,10 @@ enum nl80211_commands { * in %NL80211_CMD_CONNECT to indicate that for 802.1X authentication it * wants to use the supported offload of the 4-way handshake. * @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT. + * @NL80211_ATTR_PORT_AUTHORIZED: flag attribute used in %NL80211_CMD_ROAMED + * notification indicating that that 802.1X authentication was done by + * the driver or is not needed (because roaming used the Fast Transition + * protocol). * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -2555,6 +2565,7 @@ enum nl80211_attrs { NL80211_ATTR_WANT_1X_4WAY_HS, NL80211_ATTR_PMKR0_NAME, + NL80211_ATTR_PORT_AUTHORIZED, /* add attributes here, update the policy in nl80211.c */ -- cgit v1.2.3 From ea304a99b06e6c05a61c85f05c75aac6ff545806 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Fri, 9 Jun 2017 13:08:46 +0100 Subject: nl80211: remove desciption about request from NL80211_CMD_ROAM The description of NL80211_CMD_ROAM indicated possibility for a request to roam issued by user-space. However, it also states that as not being implemented right now. This has been so since commit b23aa676ab9d ("cfg80211: connect/disconnect API") added in 2009. So it seems safe to assume it will not be added any time soon and thus remove it. Signed-off-by: Arend van Spriel Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 72f15c3fc5a6..828aa4703e22 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -569,14 +569,13 @@ * authentication/association or not receiving a response from the AP. * Non-zero %NL80211_ATTR_STATUS_CODE value is indicated in that case as * well to remain backwards compatible. - * @NL80211_CMD_ROAM: request that the card roam (currently not implemented), - * sent as an event when the card/driver roamed by itself. - * When used as an event, and the driver roamed in a network that requires - * 802.1X authentication, %NL80211_ATTR_PORT_AUTHORIZED should be set - * if the 802.1X authentication was done by the driver or if roaming was - * done using Fast Transition protocol (in which case 802.1X authentication - * is not needed). If %NL80211_ATTR_PORT_AUTHORIZED is not set, user space - * is responsible for the 802.1X authentication. + * @NL80211_CMD_ROAM: notifcation indicating the card/driver roamed by itself. + * When the driver roamed in a network that requires 802.1X authentication, + * %NL80211_ATTR_PORT_AUTHORIZED should be set if the 802.1X authentication + * was done by the driver or if roaming was done using Fast Transition + * protocol (in which case 802.1X authentication is not needed). If + * %NL80211_ATTR_PORT_AUTHORIZED is not set, user space is responsible for + * the 802.1X authentication. * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify * userspace that a connection was dropped by the AP or due to other * reasons, for this the %NL80211_ATTR_DISCONNECTED_BY_AP and -- cgit v1.2.3 From 734942cc4ea6478eed125af258da1bdbb4afe578 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Jun 2017 11:37:14 -0700 Subject: tcp: ULP infrastructure Add the infrustructure for attaching Upper Layer Protocols (ULPs) over TCP sockets. Based on a similar infrastructure in tcp_cong. The idea is that any ULP can add its own logic by changing the TCP proto_ops structure to its own methods. Example usage: setsockopt(sock, SOL_TCP, TCP_ULP, "tls", sizeof("tls")); modules will call: tcp_register_ulp(&tcp_tls_ulp_ops); to register/unregister their ulp, with an init function and name. A list of registered ulps will be returned by tcp_get_available_ulp, which is hooked up to /proc. Example: $ cat /proc/sys/net/ipv4/tcp_available_ulp tls There is currently no functionality to remove or chain ULPs, but it should be possible to add these in the future if needed. Signed-off-by: Boris Pismenny Signed-off-by: Dave Watson Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 38a2b07afdff..8204dcebc6f3 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -117,6 +117,7 @@ enum { #define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ #define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ #define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */ +#define TCP_ULP 31 /* Attach a ULP to a TCP connection */ struct tcp_repair_opt { __u32 opt_code; -- cgit v1.2.3 From 3c4d7559159bfe1e3b94df3a657b2cda3a34e218 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Jun 2017 11:37:39 -0700 Subject: tls: kernel TLS support Software implementation of transport layer security, implemented using ULP infrastructure. tcp proto_ops are replaced with tls equivalents of sendmsg and sendpage. Only symmetric crypto is done in the kernel, keys are passed by setsockopt after the handshake is complete. All control messages are supported via CMSG data - the actual symmetric encryption is the same, just the message type needs to be passed separately. For user API, please see Documentation patch. Pieces that can be shared between hw and sw implementation are in tls_main.c Signed-off-by: Boris Pismenny Signed-off-by: Ilya Lesokhin Signed-off-by: Aviad Yehezkel Signed-off-by: Dave Watson Signed-off-by: David S. Miller --- include/uapi/linux/tls.h | 79 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 include/uapi/linux/tls.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h new file mode 100644 index 000000000000..cc1d21db35d8 --- /dev/null +++ b/include/uapi/linux/tls.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UAPI_LINUX_TLS_H +#define _UAPI_LINUX_TLS_H + +#include +#include +#include +#include +#include + +/* TLS socket options */ +#define TLS_TX 1 /* Set transmit parameters */ + +/* Supported versions */ +#define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) +#define TLS_VERSION_MAJOR(ver) (((ver) >> 8) & 0xFF) + +#define TLS_VERSION_NUMBER(id) ((((id##_VERSION_MAJOR) & 0xFF) << 8) | \ + ((id##_VERSION_MINOR) & 0xFF)) + +#define TLS_1_2_VERSION_MAJOR 0x3 +#define TLS_1_2_VERSION_MINOR 0x3 +#define TLS_1_2_VERSION TLS_VERSION_NUMBER(TLS_1_2) + +/* Supported ciphers */ +#define TLS_CIPHER_AES_GCM_128 51 +#define TLS_CIPHER_AES_GCM_128_IV_SIZE 8 +#define TLS_CIPHER_AES_GCM_128_KEY_SIZE 16 +#define TLS_CIPHER_AES_GCM_128_SALT_SIZE 4 +#define TLS_CIPHER_AES_GCM_128_TAG_SIZE 16 +#define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE 8 + +#define TLS_SET_RECORD_TYPE 1 + +struct tls_crypto_info { + __u16 version; + __u16 cipher_type; +}; + +struct tls12_crypto_info_aes_gcm_128 { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_AES_GCM_128_IV_SIZE]; + unsigned char key[TLS_CIPHER_AES_GCM_128_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_AES_GCM_128_SALT_SIZE]; + unsigned char rec_seq[TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE]; +}; + +#endif /* _UAPI_LINUX_TLS_H */ -- cgit v1.2.3 From 86087e170cd1f19e9b25e5d944d9f52fad9470f4 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Wed, 14 Jun 2017 21:19:31 +0200 Subject: net: sched: act_tunnel_key: make UDP checksum configurable Allow requesting of zero UDP checksum for encapsulated packets. The name and meaning of the attribute is "NO_CSUM" in order to have the same meaning of the attribute missing and being 0. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_tunnel_key.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h index 84ea55e1076b..afcd4be953e2 100644 --- a/include/uapi/linux/tc_act/tc_tunnel_key.h +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -34,6 +34,7 @@ enum { TCA_TUNNEL_KEY_ENC_KEY_ID, /* be64 */ TCA_TUNNEL_KEY_PAD, TCA_TUNNEL_KEY_ENC_DST_PORT, /* be16 */ + TCA_TUNNEL_KEY_NO_CSUM, /* u8 */ __TCA_TUNNEL_KEY_MAX, }; -- cgit v1.2.3 From 58038695e62b4473e4d70e1503933579c640cd52 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 15 Jun 2017 17:29:09 -0700 Subject: net: Add IFLA_XDP_PROG_ID Expose prog_id through IFLA_XDP_PROG_ID. This patch makes modification to generic_xdp. The later patches will modify other xdp-supported drivers. prog_id is added to struct net_dev_xdp. iproute2 patch will be followed. Here is how the 'ip link' will look like: > ip link show eth0 3: eth0: mtu 1500 xdp(prog_id:1) qdisc fq_codel state UP mode DEFAULT group default qlen 1000 Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8ed679fe603f..dd88375a6580 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -907,6 +907,7 @@ enum { IFLA_XDP_FD, IFLA_XDP_ATTACHED, IFLA_XDP_FLAGS, + IFLA_XDP_PROG_ID, __IFLA_XDP_MAX, }; -- cgit v1.2.3 From 8917a777be3ba566377be05117f71b93a5fd909d Mon Sep 17 00:00:00 2001 From: Ivan Delalande Date: Thu, 15 Jun 2017 18:07:07 -0700 Subject: tcp: md5: add TCP_MD5SIG_EXT socket option to set a key address prefix Replace first padding in the tcp_md5sig structure with a new flag field and address prefix length so it can be specified when configuring a new key for TCP MD5 signature. The tcpm_flags field will only be used if the socket option is TCP_MD5SIG_EXT to avoid breaking existing programs, and tcpm_prefixlen only when the TCP_MD5SIG_FLAG_PREFIX flag is set. Signed-off-by: Bob Gilligan Signed-off-by: Eric Mowat Signed-off-by: Ivan Delalande Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 8204dcebc6f3..a5507c977497 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -117,7 +117,8 @@ enum { #define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ #define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ #define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */ -#define TCP_ULP 31 /* Attach a ULP to a TCP connection */ +#define TCP_ULP 31 /* Attach a ULP to a TCP connection */ +#define TCP_MD5SIG_EXT 32 /* TCP MD5 Signature with extensions */ struct tcp_repair_opt { __u32 opt_code; @@ -235,11 +236,15 @@ enum { /* for TCP_MD5SIG socket option */ #define TCP_MD5SIG_MAXKEYLEN 80 +/* tcp_md5sig extension flags for TCP_MD5SIG_EXT */ +#define TCP_MD5SIG_FLAG_PREFIX 1 /* address prefix length */ + struct tcp_md5sig { struct __kernel_sockaddr_storage tcpm_addr; /* address associated */ - __u16 __tcpm_pad1; /* zero */ + __u8 tcpm_flags; /* extension flags */ + __u8 tcpm_prefixlen; /* address prefix */ __u16 tcpm_keylen; /* key length */ - __u32 __tcpm_pad2; /* zero */ + __u32 __tcpm_pad; /* zero */ __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */ }; -- cgit v1.2.3 From a520b49ec15576784774f77c914d7020fa7aef13 Mon Sep 17 00:00:00 2001 From: Maya Erez Date: Fri, 16 Jun 2017 10:38:05 +0300 Subject: wil6210: remove ioctl interface Wireless drivers should not be using ioctl interface, hence remove this interface for wil6210 driver. Signed-off-by: Maya Erez Signed-off-by: Kalle Valo --- include/uapi/linux/wil6210_uapi.h | 87 --------------------------------------- 1 file changed, 87 deletions(-) delete mode 100644 include/uapi/linux/wil6210_uapi.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/wil6210_uapi.h b/include/uapi/linux/wil6210_uapi.h deleted file mode 100644 index 6a3cddd156c4..000000000000 --- a/include/uapi/linux/wil6210_uapi.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2014 Qualcomm Atheros, Inc. - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef __WIL6210_UAPI_H__ -#define __WIL6210_UAPI_H__ - -#if !defined(__KERNEL__) -#define __user -#endif - -#include - -/* Numbers SIOCDEVPRIVATE and SIOCDEVPRIVATE + 1 - * are used by Android devices to implement PNO (preferred network offload). - * Albeit it is temporary solution, use different numbers to avoid conflicts - */ - -/** - * Perform 32-bit I/O operation to the card memory - * - * User code should arrange data in memory like this: - * - * struct wil_memio io; - * struct ifreq ifr = { - * .ifr_data = &io, - * }; - */ -#define WIL_IOCTL_MEMIO (SIOCDEVPRIVATE + 2) - -/** - * Perform block I/O operation to the card memory - * - * User code should arrange data in memory like this: - * - * void *buf; - * struct wil_memio_block io = { - * .block = buf, - * }; - * struct ifreq ifr = { - * .ifr_data = &io, - * }; - */ -#define WIL_IOCTL_MEMIO_BLOCK (SIOCDEVPRIVATE + 3) - -/** - * operation to perform - * - * @wil_mmio_op_mask - bits defining operation, - * @wil_mmio_addr_mask - bits defining addressing mode - */ -enum wil_memio_op { - wil_mmio_read = 0, - wil_mmio_write = 1, - wil_mmio_op_mask = 0xff, - wil_mmio_addr_linker = 0 << 8, - wil_mmio_addr_ahb = 1 << 8, - wil_mmio_addr_bar = 2 << 8, - wil_mmio_addr_mask = 0xff00, -}; - -struct wil_memio { - uint32_t op; /* enum wil_memio_op */ - uint32_t addr; /* should be 32-bit aligned */ - uint32_t val; -}; - -struct wil_memio_block { - uint32_t op; /* enum wil_memio_op */ - uint32_t addr; /* should be 32-bit aligned */ - uint32_t size; /* should be multiple of 4 */ - void __user *block; /* block address */ -}; - -#endif /* __WIL6210_UAPI_H__ */ -- cgit v1.2.3 From 94df30a6521becea7fda16f2c12ff9a01cac1da7 Mon Sep 17 00:00:00 2001 From: Julien Gomes Date: Tue, 20 Jun 2017 13:54:15 -0700 Subject: rtnetlink: add NEWCACHEREPORT message type New NEWCACHEREPORT message type to be used for cache reports sent via Netlink, effectively allowing splitting cache report reception from mroute programming. Suggested-by: Ryan Halbrook Signed-off-by: Julien Gomes Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 564790e854f7..cd1afb900929 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -146,6 +146,9 @@ enum { RTM_GETSTATS = 94, #define RTM_GETSTATS RTM_GETSTATS + RTM_NEWCACHEREPORT = 96, +#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; -- cgit v1.2.3 From 5f729eaabef9308cfaa4b27c9b3f120253eff79b Mon Sep 17 00:00:00 2001 From: Julien Gomes Date: Tue, 20 Jun 2017 13:54:16 -0700 Subject: rtnetlink: add restricted rtnl groups for ipv4 and ipv6 mroute Add RTNLGRP_{IPV4,IPV6}_MROUTE_R as two new restricted groups for the NETLINK_ROUTE family. Binding to these groups specifically requires CAP_NET_ADMIN to allow multicast of sensitive messages (e.g. mroute cache reports). Suggested-by: Nikolay Aleksandrov Signed-off-by: Julien Gomes Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index cd1afb900929..d148505010a7 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -669,6 +669,10 @@ enum rtnetlink_groups { #define RTNLGRP_NSID RTNLGRP_NSID RTNLGRP_MPLS_NETCONF, #define RTNLGRP_MPLS_NETCONF RTNLGRP_MPLS_NETCONF + RTNLGRP_IPV4_MROUTE_R, +#define RTNLGRP_IPV4_MROUTE_R RTNLGRP_IPV4_MROUTE_R + RTNLGRP_IPV6_MROUTE_R, +#define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_R __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) -- cgit v1.2.3 From 5a645dd86c1be64728578bcb1bdfb96e21815acb Mon Sep 17 00:00:00 2001 From: Julien Gomes Date: Tue, 20 Jun 2017 13:54:17 -0700 Subject: ipmr: add netlink notifications on igmpmsg cache reports Add Netlink notifications on cache reports in ipmr, in addition to the existing igmpmsg sent to mroute_sk. Send RTM_NEWCACHEREPORT notifications to RTNLGRP_IPV4_MROUTE_R. MSGTYPE, VIF_ID, SRC_ADDR and DST_ADDR Netlink attributes contain the same data as their equivalent fields in the igmpmsg header. PKT attribute is the packet sent to mroute_sk, without the added igmpmsg header. Suggested-by: Ryan Halbrook Signed-off-by: Julien Gomes Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/mroute.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index f904367c0cee..e8e5041dea8e 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -152,6 +152,18 @@ enum { }; #define IPMRA_VIFA_MAX (__IPMRA_VIFA_MAX - 1) +/* ipmr netlink cache report attributes */ +enum { + IPMRA_CREPORT_UNSPEC, + IPMRA_CREPORT_MSGTYPE, + IPMRA_CREPORT_VIF_ID, + IPMRA_CREPORT_SRC_ADDR, + IPMRA_CREPORT_DST_ADDR, + IPMRA_CREPORT_PKT, + __IPMRA_CREPORT_MAX +}; +#define IPMRA_CREPORT_MAX (__IPMRA_CREPORT_MAX - 1) + /* That's all usermode folks */ #define MFC_ASSERT_THRESH (3*HZ) /* Maximal freq. of asserts */ -- cgit v1.2.3 From dd12d15c9a5b422331426980ddf70522c57c3392 Mon Sep 17 00:00:00 2001 From: Julien Gomes Date: Tue, 20 Jun 2017 13:54:18 -0700 Subject: ip6mr: add netlink notifications on mrt6msg cache reports Add Netlink notifications on cache reports in ip6mr, in addition to the existing mrt6msg sent to mroute6_sk. Send RTM_NEWCACHEREPORT notifications to RTNLGRP_IPV6_MROUTE_R. MSGTYPE, MIF_ID, SRC_ADDR and DST_ADDR Netlink attributes contain the same data as their equivalent fields in the mrt6msg header. PKT attribute is the packet sent to mroute6_sk, without the added mrt6msg header. Suggested-by: Ryan Halbrook Signed-off-by: Julien Gomes Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/mroute6.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index ed5721148768..e4746816c855 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -133,4 +133,16 @@ struct mrt6msg { struct in6_addr im6_src, im6_dst; }; +/* ip6mr netlink cache report attributes */ +enum { + IP6MRA_CREPORT_UNSPEC, + IP6MRA_CREPORT_MSGTYPE, + IP6MRA_CREPORT_MIF_ID, + IP6MRA_CREPORT_SRC_ADDR, + IP6MRA_CREPORT_DST_ADDR, + IP6MRA_CREPORT_PKT, + __IP6MRA_CREPORT_MAX +}; +#define IP6MRA_CREPORT_MAX (__IP6MRA_CREPORT_MAX - 1) + #endif /* _UAPI__LINUX_MROUTE6_H */ -- cgit v1.2.3 From e86283071fb0eed28136adb52997888f4beb202b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 21 Jun 2017 20:16:11 +0200 Subject: bpf: expose prog id for cls_bpf and act_bpf In order to be able to retrieve the attached programs from cls_bpf and act_bpf, we need to expose the prog ids via netlink so that an application can later on get an fd based on the id through the BPF_PROG_GET_FD_BY_ID command, and dump related prog info via BPF_OBJ_GET_INFO_BY_FD command for bpf(2). Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 1 + include/uapi/linux/tc_act/tc_bpf.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 2055783e6ee9..d5e2bf68d0d4 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -369,6 +369,7 @@ enum { TCA_BPF_FLAGS, TCA_BPF_FLAGS_GEN, TCA_BPF_TAG, + TCA_BPF_ID, __TCA_BPF_MAX, }; diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h index 975b50dc8d1d..8dc2ac05eecf 100644 --- a/include/uapi/linux/tc_act/tc_bpf.h +++ b/include/uapi/linux/tc_act/tc_bpf.h @@ -28,6 +28,7 @@ enum { TCA_ACT_BPF_NAME, TCA_ACT_BPF_PAD, TCA_ACT_BPF_TAG, + TCA_ACT_BPF_ID, __TCA_ACT_BPF_MAX, }; #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) -- cgit v1.2.3 From ee5d032f7d032e2cea354522a46b211de84c4e8c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Jun 2017 18:25:04 -0700 Subject: xdp: add HW offload mode flag for installing programs Add an installation-time flag for requesting that the program be installed only if it can be offloaded to HW. Internally new command for ndo_xdp is added, this way we avoid putting checks into drivers since they all return -EINVAL on an unknown command. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index dd88375a6580..ce777ec88e1e 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -891,9 +891,12 @@ enum { #define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) #define XDP_FLAGS_SKB_MODE (1U << 1) #define XDP_FLAGS_DRV_MODE (1U << 2) +#define XDP_FLAGS_HW_MODE (1U << 3) +#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \ + XDP_FLAGS_DRV_MODE | \ + XDP_FLAGS_HW_MODE) #define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ - XDP_FLAGS_SKB_MODE | \ - XDP_FLAGS_DRV_MODE) + XDP_FLAGS_MODES) /* These are stored into IFLA_XDP_ATTACHED on dump. */ enum { -- cgit v1.2.3 From ce158e580a5bdc93286a3b630638bdd47d4ec663 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Jun 2017 18:25:09 -0700 Subject: xdp: add reporting of offload mode Extend the XDP_ATTACHED_* values to include offloaded mode. Let drivers report whether program is installed in the driver or the HW by changing the prog_attached field from bool to u8 (type of the netlink attribute). Exploit the fact that the value of XDP_ATTACHED_DRV is 1, therefore since all drivers currently assign the mode with double negation: mode = !!xdp_prog; no drivers have to be modified. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ce777ec88e1e..8d062c58d5cb 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -903,6 +903,7 @@ enum { XDP_ATTACHED_NONE = 0, XDP_ATTACHED_DRV, XDP_ATTACHED_SKB, + XDP_ATTACHED_HW, }; enum { -- cgit v1.2.3 From 36a554cec119bbd20c4ec0cb96bd4712d124bfea Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Mon, 26 Jun 2017 19:52:30 +0300 Subject: nl80211: Don't verify owner_nlportid on NAN commands If NAN interface is created with NL80211_ATTR_SOCKET_OWNER, the socket that is used to create the interface is used for all NAN operations and reporting NAN events. However, it turns out that sending commands and receiving events on the same socket is not possible in a completely race-free way: If the socket buffer is overflowed by the events, the command response will not be sent. In that case the caller will block forever on recv. Using non-blocking socket for commands is more complicated and still the command response or ack may not be received. So, keep unicasting NAN events to the interface creator, but allow using a different socket for commands. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho Reviewed-by: Johannes Berg Signed-off-by: Kalle Valo --- include/uapi/linux/nl80211.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 828aa4703e22..51626b4175c0 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1909,11 +1909,10 @@ enum nl80211_commands { * that configured the indoor setting, and the indoor operation would be * cleared when the socket is closed. * If set during NAN interface creation, the interface will be destroyed - * if the socket is closed just like any other interface. Moreover, only - * the netlink socket that created the interface will be allowed to add - * and remove functions. NAN notifications will be sent in unicast to that - * socket. Without this attribute, any socket can add functions and the - * notifications will be sent to the %NL80211_MCGRP_NAN multicast group. + * if the socket is closed just like any other interface. Moreover, NAN + * notifications will be sent in unicast to that socket. Without this + * attribute, the notifications will be sent to the %NL80211_MCGRP_NAN + * multicast group. * If set during %NL80211_CMD_ASSOCIATE or %NL80211_CMD_CONNECT the * station will deauthenticate when the socket is closed. * -- cgit v1.2.3 From 2cb5c8e378d10a57aa1c9eaee36bea46c27dd2b9 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 30 Jun 2017 13:32:57 -0400 Subject: sctp: Add peeloff-flags socket option Based on a request raised on the sctp devel list, there is a need to augment the sctp_peeloff operation while specifying the O_CLOEXEC and O_NONBLOCK flags (simmilar to the socket syscall). Since modifying the SCTP_SOCKOPT_PEELOFF socket option would break user space ABI for existing programs, this patch creates a new socket option SCTP_SOCKOPT_PEELOFF_FLAGS, which accepts a third flags parameter to allow atomic assignment of the socket descriptor flags. Tested successfully by myself and the requestor Signed-off-by: Neil Horman CC: Vlad Yasevich CC: "David S. Miller" CC: Andreas Steinmetz CC: Marcelo Ricardo Leitner Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index ced9d8b97426..6217ff8500a1 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -121,6 +121,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_RESET_STREAMS 119 #define SCTP_RESET_ASSOC 120 #define SCTP_ADD_STREAMS 121 +#define SCTP_SOCKOPT_PEELOFF_FLAGS 122 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -978,6 +979,11 @@ typedef struct { int sd; } sctp_peeloff_arg_t; +typedef struct { + sctp_peeloff_arg_t p_arg; + unsigned flags; +} sctp_peeloff_flags_arg_t; + /* * Peer Address Thresholds socket option */ -- cgit v1.2.3 From 40304b2a1567fecc321f640ee4239556dd0f3ee0 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 30 Jun 2017 20:02:40 -0700 Subject: bpf: BPF support for sock_ops Created a new BPF program type, BPF_PROG_TYPE_SOCK_OPS, and a corresponding struct that allows BPF programs of this type to access some of the socket's fields (such as IP addresses, ports, etc.). It uses the existing bpf cgroups infrastructure so the programs can be attached per cgroup with full inheritance support. The program will be called at appropriate times to set relevant connections parameters such as buffer sizes, SYN and SYN-ACK RTOs, etc., based on connection information such as IP addresses, port numbers, etc. Alghough there are already 3 mechanisms to set parameters (sysctls, route metrics and setsockopts), this new mechanism provides some distinct advantages. Unlike sysctls, it can set parameters per connection. In contrast to route metrics, it can also use port numbers and information provided by a user level program. In addition, it could set parameters probabilistically for evaluation purposes (i.e. do something different on 10% of the flows and compare results with the other 90% of the flows). Also, in cases where IPv6 addresses contain geographic information, the rules to make changes based on the distance (or RTT) between the hosts are much easier than route metric rules and can be global. Finally, unlike setsockopt, it oes not require application changes and it can be updated easily at any time. Although the bpf cgroup framework already contains a sock related program type (BPF_PROG_TYPE_CGROUP_SOCK), I created the new type (BPF_PROG_TYPE_SOCK_OPS) beccause the existing type expects to be called only once during the connections's lifetime. In contrast, the new program type will be called multiple times from different places in the network stack code. For example, before sending SYN and SYN-ACKs to set an appropriate timeout, when the connection is established to set congestion control, etc. As a result it has "op" field to specify the type of operation requested. The purpose of this new program type is to simplify setting connection parameters, such as buffer sizes, TCP's SYN RTO, etc. For example, it is easy to use facebook's internal IPv6 addresses to determine if both hosts of a connection are in the same datacenter. Therefore, it is easy to write a BPF program to choose a small SYN RTO value when both hosts are in the same datacenter. This patch only contains the framework to support the new BPF program type, following patches add the functionality to set various connection parameters. This patch defines a new BPF program type: BPF_PROG_TYPE_SOCKET_OPS and a new bpf syscall command to load a new program of this type: BPF_PROG_LOAD_SOCKET_OPS. Two new corresponding structs (one for the kernel one for the user/BPF program): /* kernel version */ struct bpf_sock_ops_kern { struct sock *sk; __u32 op; union { __u32 reply; __u32 replylong[4]; }; }; /* user version * Some fields are in network byte order reflecting the sock struct * Use the bpf_ntohl helper macro in samples/bpf/bpf_endian.h to * convert them to host byte order. */ struct bpf_sock_ops { __u32 op; union { __u32 reply; __u32 replylong[4]; }; __u32 family; __u32 remote_ip4; /* In network byte order */ __u32 local_ip4; /* In network byte order */ __u32 remote_ip6[4]; /* In network byte order */ __u32 local_ip6[4]; /* In network byte order */ __u32 remote_port; /* In network byte order */ __u32 local_port; /* In host byte horder */ }; Currently there are two types of ops. The first type expects the BPF program to return a value which is then used by the caller (or a negative value to indicate the operation is not supported). The second type expects state changes to be done by the BPF program, for example through a setsockopt BPF helper function, and they ignore the return value. The reply fields of the bpf_sockt_ops struct are there in case a bpf program needs to return a value larger than an integer. Signed-off-by: Lawrence Brakmo Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f94b48b168dc..01cd485ccd4f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -120,12 +120,14 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_IN, BPF_PROG_TYPE_LWT_OUT, BPF_PROG_TYPE_LWT_XMIT, + BPF_PROG_TYPE_SOCK_OPS, }; enum bpf_attach_type { BPF_CGROUP_INET_INGRESS, BPF_CGROUP_INET_EGRESS, BPF_CGROUP_INET_SOCK_CREATE, + BPF_CGROUP_SOCK_OPS, __MAX_BPF_ATTACH_TYPE }; @@ -720,4 +722,32 @@ struct bpf_map_info { __u32 map_flags; } __attribute__((aligned(8))); +/* User bpf_sock_ops struct to access socket values and specify request ops + * and their replies. + * Some of this fields are in network (bigendian) byte order and may need + * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h). + * New fields can only be added at the end of this structure + */ +struct bpf_sock_ops { + __u32 op; + union { + __u32 reply; + __u32 replylong[4]; + }; + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ +}; + +/* List of known BPF sock_ops operators. + * New entries can only be added at the end + */ +enum { + BPF_SOCK_OPS_VOID, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 8550f328f45db6d37981eb2041bc465810245c03 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 30 Jun 2017 20:02:42 -0700 Subject: bpf: Support for per connection SYN/SYN-ACK RTOs This patch adds support for setting a per connection SYN and SYN_ACK RTOs from within a BPF_SOCK_OPS program. For example, to set small RTOs when it is known both hosts are within a datacenter. Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 01cd485ccd4f..00702b294447 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -748,6 +748,9 @@ struct bpf_sock_ops { */ enum { BPF_SOCK_OPS_VOID, + BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or + * -1 if default value should be used + */ }; #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 13d3b1ebe28762c79e981931a41914fae5d04386 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 30 Jun 2017 20:02:44 -0700 Subject: bpf: Support for setting initial receive window This patch adds suppport for setting the initial advertized window from within a BPF_SOCK_OPS program. This can be used to support larger initial cwnd values in environments where it is known to be safe. Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 00702b294447..94d7ded1a6cf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -751,6 +751,10 @@ enum { BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or * -1 if default value should be used */ + BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized + * window (in packets) or -1 if default + * value should be used + */ }; #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 8c4b4c7e9ff0447995750d9329949fa082520269 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 30 Jun 2017 20:02:46 -0700 Subject: bpf: Add setsockopt helper function to bpf Added support for calling a subset of socket setsockopts from BPF_PROG_TYPE_SOCK_OPS programs. The code was duplicated rather than making the changes to call the socket setsockopt function because the changes required would have been larger. The ops supported are: SO_RCVBUF SO_SNDBUF SO_MAX_PACING_RATE SO_PRIORITY SO_RCVLOWAT SO_MARK Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 94d7ded1a6cf..dd43b22758d6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -520,6 +520,17 @@ union bpf_attr { * Set full skb->hash. * @skb: pointer to skb * @hash: hash to set + * + * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) + * Calls setsockopt. Not all opts are available, only those with + * integer optvals plus TCP_CONGESTION. + * Supported levels: SOL_SOCKET and IPROTO_TCP + * @bpf_socket: pointer to bpf_socket + * @level: SOL_SOCKET or IPROTO_TCP + * @optname: option name + * @optval: pointer to option value + * @optlen: length of optval in byes + * Return: 0 or negative error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -570,7 +581,8 @@ union bpf_attr { FN(probe_read_str), \ FN(get_socket_cookie), \ FN(get_socket_uid), \ - FN(set_hash), + FN(set_hash), \ + FN(setsockopt), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 9872a4bde31b0b055448e9ac1f4c9ee62d978766 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 30 Jun 2017 20:02:47 -0700 Subject: bpf: Add TCP connection BPF callbacks Added callbacks to BPF SOCK_OPS type program before an active connection is intialized and after a passive or active connection is established. The following patch demostrates how they can be used to set send and receive buffer sizes. Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index dd43b22758d6..2405fe304c98 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -767,6 +767,17 @@ enum { * window (in packets) or -1 if default * value should be used */ + BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an + * active connection is initialized + */ + BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an + * active connection is + * established + */ + BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a + * passive connection is + * established + */ }; #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 91b5b21c7c16899abb37f4a9e4388b4e9aae0b9d Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 30 Jun 2017 20:02:49 -0700 Subject: bpf: Add support for changing congestion control Added support for changing congestion control for SOCK_OPS bpf programs through the setsockopt bpf helper function. It also adds a new SOCK_OPS op, BPF_SOCK_OPS_NEEDS_ECN, that is needed for congestion controls, like dctcp, that need to enable ECN in the SYN packets. Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2405fe304c98..cc4725982bd8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -778,6 +778,9 @@ enum { * passive connection is * established */ + BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control + * needs ECN + */ }; #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From fc7478103c84af437ca3bfae71a82631f770bf7e Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 30 Jun 2017 20:02:51 -0700 Subject: bpf: Adds support for setting initial cwnd Adds a new bpf_setsockopt for TCP sockets, TCP_BPF_IW, which sets the initial congestion window. This can be used when the hosts are far apart (large RTTs) and it is safe to start with a large inital cwnd. Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index cc4725982bd8..32755b538652 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -783,4 +783,6 @@ enum { */ }; +#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 13bf96411ad2bd162a4f9470d58c6bb579c96e21 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 30 Jun 2017 20:02:53 -0700 Subject: bpf: Adds support for setting sndcwnd clamp Adds a new bpf_setsockopt for TCP sockets, TCP_BPF_SNDCWND_CLAMP, which sets the initial congestion window. It is useful to limit the sndcwnd when the host are close to each other (small RTT). Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 32755b538652..a6a91e5e96fc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -784,5 +784,6 @@ enum { }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ +#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 2be7e212d5419a400d051c84ca9fdd083e5aacac Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 2 Jul 2017 02:13:26 +0200 Subject: bpf: add bpf_skb_adjust_room helper This work adds a helper that can be used to adjust net room of an skb. The helper is generic and can be further extended in future. Main use case is for having a programmatic way to add/remove room to v4/v6 header options along with cls_bpf on egress and ingress hook of the data path. It reuses most of the infrastructure that we added for the bpf_skb_change_type() helper which can be used in nat64 translations. Similarly, the helper only takes care of adjusting the room so that related data is populated and csum adapted out of the BPF program using it. Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a6a91e5e96fc..e99e3e6f8b37 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -531,6 +531,14 @@ union bpf_attr { * @optval: pointer to option value * @optlen: length of optval in byes * Return: 0 or negative error + * + * int bpf_skb_adjust_room(skb, len_diff, mode, flags) + * Grow or shrink room in sk_buff. + * @skb: pointer to skb + * @len_diff: (signed) amount of room to grow/shrink + * @mode: operation mode (enum bpf_adj_room_mode) + * @flags: reserved for future use + * Return: 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -582,7 +590,8 @@ union bpf_attr { FN(get_socket_cookie), \ FN(get_socket_uid), \ FN(set_hash), \ - FN(setsockopt), + FN(setsockopt), \ + FN(skb_adjust_room), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -632,6 +641,11 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output for sk_buff input context. */ #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) +/* Mode for BPF_FUNC_skb_adjust_room helper. */ +enum bpf_adj_room_mode { + BPF_ADJ_ROOM_NET, +}; + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ -- cgit v1.2.3