From 40e44a1e669d078946f46853808a60d29e6f0885 Mon Sep 17 00:00:00 2001 From: Scott Branden Date: Thu, 30 Nov 2017 11:35:59 -0800 Subject: net: ethtool: add support for reset of AP inside NIC interface. Add ETH_RESET_AP to reset the application processor(s) inside the NIC interface. Current ETH_RESET_MGMT supports a management processor inside this NIC. This is typically used for remote NIC management purposes. Application processors exist inside some SmartNICs to run various applications inside the NIC processor - be it a simple algorithm without an OS to as complex as hosting multiple VMs. Signed-off-by: Scott Branden Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index ac71559314e7..44a0b675a6bc 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1686,6 +1686,7 @@ enum ethtool_reset_flags { ETH_RESET_PHY = 1 << 6, /* Transceiver/PHY */ ETH_RESET_RAM = 1 << 7, /* RAM shared between * multiple components */ + ETH_RESET_AP = 1 << 8, /* Application processor */ ETH_RESET_DEDICATED = 0x0000ffff, /* All components dedicated to * this interface */ -- cgit v1.2.3 From f19397a5c65665d66e3866b42056f1f58b7a366b Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 1 Dec 2017 10:15:04 -0800 Subject: bpf: Add access to snd_cwnd and others in sock_ops Adds read access to snd_cwnd and srtt_us fields of tcp_sock. Since these fields are only valid if the socket associated with the sock_ops program call is a full socket, the field is_fullsock is also added to the bpf_sock_ops struct. If the socket is not a full socket, reading these fields returns 0. Note that in most cases it will not be necessary to check is_fullsock to know if there is a full socket. The context of the call, as specified by the 'op' field, can sometimes determine whether there is a full socket. The struct bpf_sock_ops has the following fields added: __u32 is_fullsock; /* Some TCP fields are only valid if * there is a full socket. If not, the * fields read as zero. */ __u32 snd_cwnd; __u32 srtt_us; /* Averaged RTT << 3 in usecs */ There is a new macro, SOCK_OPS_GET_TCP32(NAME), to make it easier to add read access to more 32 bit tcp_sock fields. Signed-off-by: Lawrence Brakmo Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4c223ab30293..80d62e88590c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -941,6 +941,12 @@ struct bpf_sock_ops { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + __u32 is_fullsock; /* Some TCP fields are only valid if + * there is a full socket. If not, the + * fields read as zero. + */ + __u32 snd_cwnd; + __u32 srtt_us; /* Averaged RTT << 3 in usecs */ }; /* List of known BPF sock_ops operators. -- cgit v1.2.3 From 96f84061620c6325a2ca9a9a05b410e6461d03c3 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 4 Dec 2017 17:31:23 +0800 Subject: tun: add eBPF based queue selection method This patch introduces an eBPF based queue selection method. With this, the policy could be offloaded to userspace completely through a new ioctl TUNSETSTEERINGEBPF. Signed-off-by: Jason Wang Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/if_tun.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 030d3e6d6029..fb38c1797131 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -57,6 +57,7 @@ */ #define TUNSETVNETBE _IOW('T', 222, int) #define TUNGETVNETBE _IOR('T', 223, int) +#define TUNSETSTEERINGEBPF _IOR('T', 224, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 -- cgit v1.2.3 From 772a58693fc3116d05b7969223a80a6376e639eb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 8 Dec 2017 21:03:58 +0800 Subject: sctp: add stream interleave enable members and sockopt This patch adds intl_enable in asoc and netns, and strm_interleave in sctp_sock to indicate if stream interleave is enabled and supported. netns intl_enable would be set via procfs, but that is not added yet until all stream interleave codes are completely implemented; asoc intl_enable will be set when doing 4-shakehands. sp strm_interleave can be set by sockopt SCTP_INTERLEAVING_SUPPORTED which is also added in this patch. This socket option is defined in section 4.3.1 of RFC8260. Note that strm_interleave can only be set by sockopt when both netns intl_enable and sp frag_interleave are set. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index d9adab32dbee..6ed934c65a5f 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -125,6 +125,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_SOCKOPT_PEELOFF_FLAGS 122 #define SCTP_STREAM_SCHEDULER 123 #define SCTP_STREAM_SCHEDULER_VALUE 124 +#define SCTP_INTERLEAVING_SUPPORTED 125 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 -- cgit v1.2.3 From 65f5e357839e40817aead853d7a7f61ff828b52b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 8 Dec 2017 21:04:08 +0800 Subject: sctp: implement abort_pd for sctp_stream_interleave abort_pd is added as a member of sctp_stream_interleave, used to abort partial delivery for data or idata, called in sctp_cmd_assoc_failed. Since stream interleave allows to do partial delivery for each stream at the same time, sctp_intl_abort_pd for idata would be very different from the old function sctp_ulpq_abort_pd for data. Note that sctp_ulpevent_make_pdapi will support per stream in this patch by adding pdapi_stream and pdapi_seq in sctp_pdapi_event, as described in section 6.1.7 of RFC6458. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 6ed934c65a5f..4c4db14786bd 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -460,6 +460,8 @@ struct sctp_pdapi_event { __u32 pdapi_length; __u32 pdapi_indication; sctp_assoc_t pdapi_assoc_id; + __u32 pdapi_stream; + __u32 pdapi_seq; }; enum { SCTP_PARTIAL_DELIVERY_ABORTED=0, }; -- cgit v1.2.3 From f371b304f12e31fe30207c41ca7754564e0ea4dc Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 11 Dec 2017 11:39:02 -0800 Subject: bpf/tracing: allow user space to query prog array on the same tp Commit e87c6bc3852b ("bpf: permit multiple bpf attachments for a single perf event") added support to attach multiple bpf programs to a single perf event. Although this provides flexibility, users may want to know what other bpf programs attached to the same tp interface. Besides getting visibility for the underlying bpf system, such information may also help consolidate multiple bpf programs, understand potential performance issues due to a large array, and debug (e.g., one bpf program which overwrites return code may impact subsequent program results). Commit 2541517c32be ("tracing, perf: Implement BPF programs attached to kprobes") utilized the existing perf ioctl interface and added the command PERF_EVENT_IOC_SET_BPF to attach a bpf program to a tracepoint. This patch adds a new ioctl command, given a perf event fd, to query the bpf program array attached to the same perf tracepoint event. The new uapi ioctl command: PERF_EVENT_IOC_QUERY_BPF The new uapi/linux/perf_event.h structure: struct perf_event_query_bpf { __u32 ids_len; __u32 prog_cnt; __u32 ids[0]; }; User space provides buffer "ids" for kernel to copy to. When returning from the kernel, the number of available programs in the array is set in "prog_cnt". The usage: struct perf_event_query_bpf *query = malloc(sizeof(*query) + sizeof(u32) * ids_len); query.ids_len = ids_len; err = ioctl(pmu_efd, PERF_EVENT_IOC_QUERY_BPF, query); if (err == 0) { /* query.prog_cnt is the number of available progs, * number of progs in ids: (ids_len == 0) ? 0 : query.prog_cnt */ } else if (errno == ENOSPC) { /* query.ids_len number of progs copied, * query.prog_cnt is the number of available progs */ } else { /* other errors */ } Signed-off-by: Yonghong Song Acked-by: Peter Zijlstra (Intel) Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- include/uapi/linux/perf_event.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b9a4953018ed..769533696483 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -418,6 +418,27 @@ struct perf_event_attr { __u16 __reserved_2; /* align to __u64 */ }; +/* + * Structure used by below PERF_EVENT_IOC_QUERY_BPF command + * to query bpf programs attached to the same perf tracepoint + * as the given perf event. + */ +struct perf_event_query_bpf { + /* + * The below ids array length + */ + __u32 ids_len; + /* + * Set by the kernel to indicate the number of + * available programs + */ + __u32 prog_cnt; + /* + * User provided buffer to store program ids + */ + __u32 ids[0]; +}; + #define perf_flags(attr) (*(&(attr)->read_format + 1)) /* @@ -433,6 +454,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) #define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, -- cgit v1.2.3 From 9802d86585db91655c7d1929a4f6bbe0952ea88e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Dec 2017 11:36:48 -0500 Subject: bpf: add a bpf_override_function helper Error injection is sloppy and very ad-hoc. BPF could fill this niche perfectly with it's kprobe functionality. We could make sure errors are only triggered in specific call chains that we care about with very specific situations. Accomplish this with the bpf_override_funciton helper. This will modify the probe'd callers return value to the specified value and set the PC to an override function that simply returns, bypassing the originally probed function. This gives us a nice clean way to implement systematic error injection for all of our code paths. Acked-by: Alexei Starovoitov Acked-by: Ingo Molnar Signed-off-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 80d62e88590c..595bda120cfb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -677,6 +677,10 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code + * + * int bpf_override_return(pt_regs, rc) + * @pt_regs: pointer to struct pt_regs + * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -736,7 +740,8 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), + FN(getsockopt), \ + FN(override_return), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 7db7d9f369a47e1a46f93c320b45cb89e81723e7 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 19 Nov 2017 15:05:11 +0100 Subject: batman-adv: Add SPDX license identifier above copyright header The "Linux kernel licensing rules" require that each file has a SPDX license identifier as first line (and sometimes as second line). The FSFE REUSE practices [1] would also require the same tags but have no restrictions on the placement in the source file. Using the "Linux kernel licensing rules" is therefore also fulfilling the FSFE REUSE practices requirements at the same time. [1] https://reuse.software/practices/ Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index efd641c8a5d6..fb4533826163 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: ISC */ /* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors: * * Matthias Schiffer -- cgit v1.2.3 From a010579273bdfbee6ee79422dbebba3dcf18ebf7 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 19 Nov 2017 15:05:16 +0100 Subject: batman-adv: Change batman_adv.h license to MIT The ISC license is considered as not recommended in "Linux kernel licensing rules". It should only be used for existing code or for importing code from a different project with that license. But the kernel still has the similar sounding MIT/Expat license under the preferred licenses. Switching to this license for this relatively new file should therefore allow batman-adv to better follow the new licensing rules. Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Acked-by: Matthias Schiffer Acked-by: Andrew Lunn Signed-off-by: Simon Wunderlich --- include/uapi/linux/batman_adv.h | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index fb4533826163..ae00c99cbed0 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -1,19 +1,25 @@ -/* SPDX-License-Identifier: ISC */ +/* SPDX-License-Identifier: MIT */ /* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors: * * Matthias Schiffer * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. */ #ifndef _UAPI_LINUX_BATMAN_ADV_H_ -- cgit v1.2.3 From f551c91de262ba36b20c3ac19538afb4f4507441 Mon Sep 17 00:00:00 2001 From: William Tu Date: Wed, 13 Dec 2017 16:38:56 -0800 Subject: net: erspan: introduce erspan v2 for ip_gre The patch adds support for erspan version 2. Not all features are supported in this patch. The SGT (security group tag), GRA (timestamp granularity), FT (frame type) are set to fixed value. Only hardware ID and direction are configurable. Optional subheader is also not supported. Signed-off-by: William Tu Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + include/uapi/linux/if_tunnel.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 3ee3bf7c8526..87b7529fcdfe 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -47,6 +47,7 @@ #define ETH_P_PUP 0x0200 /* Xerox PUP packet */ #define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */ #define ETH_P_TSN 0x22F0 /* TSN (IEEE 1722) packet */ +#define ETH_P_ERSPAN2 0x22EB /* ERSPAN version 2 (type III) */ #define ETH_P_IP 0x0800 /* Internet Protocol packet */ #define ETH_P_X25 0x0805 /* CCITT X.25 */ #define ETH_P_ARP 0x0806 /* Address Resolution packet */ diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index e68dadbd6d45..1b3d148c4560 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -137,6 +137,9 @@ enum { IFLA_GRE_IGNORE_DF, IFLA_GRE_FWMARK, IFLA_GRE_ERSPAN_INDEX, + IFLA_GRE_ERSPAN_VER, + IFLA_GRE_ERSPAN_DIR, + IFLA_GRE_ERSPAN_HWID, __IFLA_GRE_MAX, }; -- cgit v1.2.3 From cc8b0b92a1699bc32f7fec71daa2bfc90de43a4d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Dec 2017 17:55:05 -0800 Subject: bpf: introduce function calls (function boundaries) Allow arbitrary function calls from bpf function to another bpf function. Since the beginning of bpf all bpf programs were represented as a single function and program authors were forced to use always_inline for all functions in their C code. That was causing llvm to unnecessary inflate the code size and forcing developers to move code to header files with little code reuse. With a bit of additional complexity teach verifier to recognize arbitrary function calls from one bpf function to another as long as all of functions are presented to the verifier as a single bpf program. New program layout: r6 = r1 // some code .. r1 = .. // arg1 r2 = .. // arg2 call pc+1 // function call pc-relative exit .. = r1 // access arg1 .. = r2 // access arg2 .. call pc+20 // second level of function call ... It allows for better optimized code and finally allows to introduce the core bpf libraries that can be reused in different projects, since programs are no longer limited by single elf file. With function calls bpf can be compiled into multiple .o files. This patch is the first step. It detects programs that contain multiple functions and checks that calls between them are valid. It splits the sequence of bpf instructions (one program) into a set of bpf functions that call each other. Calls to only known functions are allowed. In the future the verifier may allow calls to unresolved functions and will do dynamic linking. This logic supports statically linked bpf functions only. Such function boundary detection could have been done as part of control flow graph building in check_cfg(), but it's cleaner to separate function boundary detection vs control flow checks within a subprogram (function) into logically indepedent steps. Follow up patches may split check_cfg() further, but not check_subprogs(). Only allow bpf-to-bpf calls for root only and for non-hw-offloaded programs. These restrictions can be relaxed in the future. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 595bda120cfb..d01f1cb3cfc0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -197,8 +197,14 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 +/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative + * offset to another bpf function + */ +#define BPF_PSEUDO_CALL 1 + /* flags for BPF_MAP_UPDATE_ELEM command */ #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ -- cgit v1.2.3 From 06ef0ccb5a36e1feba9b413ff59a04ecc4407c1c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 18 Dec 2017 10:13:44 -0800 Subject: bpf/cgroup: fix a verification error for a CGROUP_DEVICE type prog The tools/testing/selftests/bpf test program test_dev_cgroup fails with the following error when compiled with llvm 6.0. (I did not try with earlier versions.) libbpf: load bpf program failed: Permission denied libbpf: -- BEGIN DUMP LOG --- libbpf: 0: (61) r2 = *(u32 *)(r1 +4) 1: (b7) r0 = 0 2: (55) if r2 != 0x1 goto pc+8 R0=inv0 R1=ctx(id=0,off=0,imm=0) R2=inv1 R10=fp0 3: (69) r2 = *(u16 *)(r1 +0) invalid bpf_context access off=0 size=2 ... The culprit is the following statement in dev_cgroup.c: short type = ctx->access_type & 0xFFFF; This code is typical as the ctx->access_type is assigned as below in kernel/bpf/cgroup.c: struct bpf_cgroup_dev_ctx ctx = { .access_type = (access << 16) | dev_type, .major = major, .minor = minor, }; The compiler converts it to u16 access while the verifier cgroup_dev_is_valid_access rejects any non u32 access. This patch permits the field access_type to be accessible with type u16 and u8 as well. Signed-off-by: Yonghong Song Tested-by: Roman Gushchin Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d01f1cb3cfc0..69eabfcb9bdb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1012,7 +1012,8 @@ struct bpf_perf_event_value { #define BPF_DEVCG_DEV_CHAR (1ULL << 1) struct bpf_cgroup_dev_ctx { - __u32 access_type; /* (access << 16) | type */ + /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ + __u32 access_type; __u32 major; __u32 minor; }; -- cgit v1.2.3 From 983dafaab799511e092ffd006f3a064b37ccbccf Mon Sep 17 00:00:00 2001 From: Sunil Dutt Date: Wed, 13 Dec 2017 19:51:36 +0200 Subject: cfg80211: Scan results to also report the per chain signal strength This commit enhances the scan results to report the per chain signal strength based on the latest BSS update. This provides similar information to what is already available through STA information. Signed-off-by: Sunil Dutt Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f882fe1f9709..c587a61c32bf 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3862,6 +3862,9 @@ enum nl80211_bss_scan_width { * @NL80211_BSS_PARENT_BSSID. (u64). * @NL80211_BSS_PARENT_BSSID: the BSS according to which @NL80211_BSS_PARENT_TSF * is set. + * @NL80211_BSS_CHAIN_SIGNAL: per-chain signal strength of last BSS update. + * Contains a nested array of signal strength attributes (u8, dBm), + * using the nesting index as the antenna number. * @__NL80211_BSS_AFTER_LAST: internal * @NL80211_BSS_MAX: highest BSS attribute */ @@ -3885,6 +3888,7 @@ enum nl80211_bss { NL80211_BSS_PAD, NL80211_BSS_PARENT_TSF, NL80211_BSS_PARENT_BSSID, + NL80211_BSS_CHAIN_SIGNAL, /* keep last */ __NL80211_BSS_AFTER_LAST, -- cgit v1.2.3 From fec149f5d3234c037ec761d1db4cc8c0550e9964 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 21 Dec 2017 10:17:41 +0100 Subject: batman-adv: Convert packet.h to uapi header The header file is used by different userspace programs to inject packets or to decode sniffed packets. It should therefore be available to them as userspace header. Also other components in the kernel (like the flow dissector) require access to the packet definitions to be able to decode ETH_P_BATMAN ethernet packets. Signed-off-by: Sven Eckelmann Signed-off-by: David S. Miller --- include/uapi/linux/batadv_packet.h | 644 +++++++++++++++++++++++++++++++++++++ 1 file changed, 644 insertions(+) create mode 100644 include/uapi/linux/batadv_packet.h (limited to 'include/uapi') diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h new file mode 100644 index 000000000000..5cb360be2a11 --- /dev/null +++ b/include/uapi/linux/batadv_packet.h @@ -0,0 +1,644 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */ +/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _UAPI_LINUX_BATADV_PACKET_H_ +#define _UAPI_LINUX_BATADV_PACKET_H_ + +#include +#include +#include + +/** + * batadv_tp_is_error() - Check throughput meter return code for error + * @n: throughput meter return code + * + * Return: 0 when not error was detected, != 0 otherwise + */ +#define batadv_tp_is_error(n) ((__u8)(n) > 127 ? 1 : 0) + +/** + * enum batadv_packettype - types for batman-adv encapsulated packets + * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV + * @BATADV_BCAST: broadcast packets carrying broadcast payload + * @BATADV_CODED: network coded packets + * @BATADV_ELP: echo location packets for B.A.T.M.A.N. V + * @BATADV_OGM2: originator messages for B.A.T.M.A.N. V + * + * @BATADV_UNICAST: unicast packets carrying unicast payload traffic + * @BATADV_UNICAST_FRAG: unicast packets carrying a fragment of the original + * payload packet + * @BATADV_UNICAST_4ADDR: unicast packet including the originator address of + * the sender + * @BATADV_ICMP: unicast packet like IP ICMP used for ping or traceroute + * @BATADV_UNICAST_TVLV: unicast packet carrying TVLV containers + */ +enum batadv_packettype { + /* 0x00 - 0x3f: local packets or special rules for handling */ + BATADV_IV_OGM = 0x00, + BATADV_BCAST = 0x01, + BATADV_CODED = 0x02, + BATADV_ELP = 0x03, + BATADV_OGM2 = 0x04, + /* 0x40 - 0x7f: unicast */ +#define BATADV_UNICAST_MIN 0x40 + BATADV_UNICAST = 0x40, + BATADV_UNICAST_FRAG = 0x41, + BATADV_UNICAST_4ADDR = 0x42, + BATADV_ICMP = 0x43, + BATADV_UNICAST_TVLV = 0x44, +#define BATADV_UNICAST_MAX 0x7f + /* 0x80 - 0xff: reserved */ +}; + +/** + * enum batadv_subtype - packet subtype for unicast4addr + * @BATADV_P_DATA: user payload + * @BATADV_P_DAT_DHT_GET: DHT request message + * @BATADV_P_DAT_DHT_PUT: DHT store message + * @BATADV_P_DAT_CACHE_REPLY: ARP reply generated by DAT + */ +enum batadv_subtype { + BATADV_P_DATA = 0x01, + BATADV_P_DAT_DHT_GET = 0x02, + BATADV_P_DAT_DHT_PUT = 0x03, + BATADV_P_DAT_CACHE_REPLY = 0x04, +}; + +/* this file is included by batctl which needs these defines */ +#define BATADV_COMPAT_VERSION 15 + +/** + * enum batadv_iv_flags - flags used in B.A.T.M.A.N. IV OGM packets + * @BATADV_NOT_BEST_NEXT_HOP: flag is set when ogm packet is forwarded and was + * previously received from someone else than the best neighbor. + * @BATADV_PRIMARIES_FIRST_HOP: flag unused. + * @BATADV_DIRECTLINK: flag is for the first hop or if rebroadcasted from a + * one hop neighbor on the interface where it was originally received. + */ +enum batadv_iv_flags { + BATADV_NOT_BEST_NEXT_HOP = 1UL << 0, + BATADV_PRIMARIES_FIRST_HOP = 1UL << 1, + BATADV_DIRECTLINK = 1UL << 2, +}; + +/** + * enum batadv_icmp_packettype - ICMP message types + * @BATADV_ECHO_REPLY: success reply to BATADV_ECHO_REQUEST + * @BATADV_DESTINATION_UNREACHABLE: failure when route to destination not found + * @BATADV_ECHO_REQUEST: request BATADV_ECHO_REPLY from destination + * @BATADV_TTL_EXCEEDED: error after BATADV_ECHO_REQUEST traversed too many hops + * @BATADV_PARAMETER_PROBLEM: return code for malformed messages + * @BATADV_TP: throughput meter packet + */ +enum batadv_icmp_packettype { + BATADV_ECHO_REPLY = 0, + BATADV_DESTINATION_UNREACHABLE = 3, + BATADV_ECHO_REQUEST = 8, + BATADV_TTL_EXCEEDED = 11, + BATADV_PARAMETER_PROBLEM = 12, + BATADV_TP = 15, +}; + +/** + * enum batadv_mcast_flags - flags for multicast capabilities and settings + * @BATADV_MCAST_WANT_ALL_UNSNOOPABLES: we want all packets destined for + * 224.0.0.0/24 or ff02::1 + * @BATADV_MCAST_WANT_ALL_IPV4: we want all IPv4 multicast packets + * @BATADV_MCAST_WANT_ALL_IPV6: we want all IPv6 multicast packets + */ +enum batadv_mcast_flags { + BATADV_MCAST_WANT_ALL_UNSNOOPABLES = 1UL << 0, + BATADV_MCAST_WANT_ALL_IPV4 = 1UL << 1, + BATADV_MCAST_WANT_ALL_IPV6 = 1UL << 2, +}; + +/* tt data subtypes */ +#define BATADV_TT_DATA_TYPE_MASK 0x0F + +/** + * enum batadv_tt_data_flags - flags for tt data tvlv + * @BATADV_TT_OGM_DIFF: TT diff propagated through OGM + * @BATADV_TT_REQUEST: TT request message + * @BATADV_TT_RESPONSE: TT response message + * @BATADV_TT_FULL_TABLE: contains full table to replace existing table + */ +enum batadv_tt_data_flags { + BATADV_TT_OGM_DIFF = 1UL << 0, + BATADV_TT_REQUEST = 1UL << 1, + BATADV_TT_RESPONSE = 1UL << 2, + BATADV_TT_FULL_TABLE = 1UL << 4, +}; + +/** + * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field + * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not + */ +enum batadv_vlan_flags { + BATADV_VLAN_HAS_TAG = 1UL << 15, +}; + +/** + * enum batadv_bla_claimframe - claim frame types for the bridge loop avoidance + * @BATADV_CLAIM_TYPE_CLAIM: claim of a client mac address + * @BATADV_CLAIM_TYPE_UNCLAIM: unclaim of a client mac address + * @BATADV_CLAIM_TYPE_ANNOUNCE: announcement of backbone with current crc + * @BATADV_CLAIM_TYPE_REQUEST: request of full claim table + * @BATADV_CLAIM_TYPE_LOOPDETECT: mesh-traversing loop detect packet + */ +enum batadv_bla_claimframe { + BATADV_CLAIM_TYPE_CLAIM = 0x00, + BATADV_CLAIM_TYPE_UNCLAIM = 0x01, + BATADV_CLAIM_TYPE_ANNOUNCE = 0x02, + BATADV_CLAIM_TYPE_REQUEST = 0x03, + BATADV_CLAIM_TYPE_LOOPDETECT = 0x04, +}; + +/** + * enum batadv_tvlv_type - tvlv type definitions + * @BATADV_TVLV_GW: gateway tvlv + * @BATADV_TVLV_DAT: distributed arp table tvlv + * @BATADV_TVLV_NC: network coding tvlv + * @BATADV_TVLV_TT: translation table tvlv + * @BATADV_TVLV_ROAM: roaming advertisement tvlv + * @BATADV_TVLV_MCAST: multicast capability tvlv + */ +enum batadv_tvlv_type { + BATADV_TVLV_GW = 0x01, + BATADV_TVLV_DAT = 0x02, + BATADV_TVLV_NC = 0x03, + BATADV_TVLV_TT = 0x04, + BATADV_TVLV_ROAM = 0x05, + BATADV_TVLV_MCAST = 0x06, +}; + +#pragma pack(2) +/* the destination hardware field in the ARP frame is used to + * transport the claim type and the group id + */ +struct batadv_bla_claim_dst { + __u8 magic[3]; /* FF:43:05 */ + __u8 type; /* bla_claimframe */ + __be16 group; /* group id */ +}; + +#pragma pack() + +/** + * struct batadv_ogm_packet - ogm (routing protocol) packet + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @flags: contains routing relevant flags - see enum batadv_iv_flags + * @seqno: sequence identification + * @orig: address of the source node + * @prev_sender: address of the previous sender + * @reserved: reserved byte for alignment + * @tq: transmission quality + * @tvlv_len: length of tvlv data following the ogm header + */ +struct batadv_ogm_packet { + __u8 packet_type; + __u8 version; + __u8 ttl; + __u8 flags; + __be32 seqno; + __u8 orig[ETH_ALEN]; + __u8 prev_sender[ETH_ALEN]; + __u8 reserved; + __u8 tq; + __be16 tvlv_len; + /* __packed is not needed as the struct size is divisible by 4, + * and the largest data type in this struct has a size of 4. + */ +}; + +#define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet) + +/** + * struct batadv_ogm2_packet - ogm2 (routing protocol) packet + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header + * @flags: reseved for routing relevant flags - currently always 0 + * @seqno: sequence number + * @orig: originator mac address + * @tvlv_len: length of the appended tvlv buffer (in bytes) + * @throughput: the currently flooded path throughput + */ +struct batadv_ogm2_packet { + __u8 packet_type; + __u8 version; + __u8 ttl; + __u8 flags; + __be32 seqno; + __u8 orig[ETH_ALEN]; + __be16 tvlv_len; + __be32 throughput; + /* __packed is not needed as the struct size is divisible by 4, + * and the largest data type in this struct has a size of 4. + */ +}; + +#define BATADV_OGM2_HLEN sizeof(struct batadv_ogm2_packet) + +/** + * struct batadv_elp_packet - elp (neighbor discovery) packet + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @orig: originator mac address + * @seqno: sequence number + * @elp_interval: currently used ELP sending interval in ms + */ +struct batadv_elp_packet { + __u8 packet_type; + __u8 version; + __u8 orig[ETH_ALEN]; + __be32 seqno; + __be32 elp_interval; +}; + +#define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet) + +/** + * struct batadv_icmp_header - common members among all the ICMP packets + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @msg_type: ICMP packet type + * @dst: address of the destination node + * @orig: address of the source node + * @uid: local ICMP socket identifier + * @align: not used - useful for alignment purposes only + * + * This structure is used for ICMP packets parsing only and it is never sent + * over the wire. The alignment field at the end is there to ensure that + * members are padded the same way as they are in real packets. + */ +struct batadv_icmp_header { + __u8 packet_type; + __u8 version; + __u8 ttl; + __u8 msg_type; /* see ICMP message types above */ + __u8 dst[ETH_ALEN]; + __u8 orig[ETH_ALEN]; + __u8 uid; + __u8 align[3]; +}; + +/** + * struct batadv_icmp_packet - ICMP packet + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @msg_type: ICMP packet type + * @dst: address of the destination node + * @orig: address of the source node + * @uid: local ICMP socket identifier + * @reserved: not used - useful for alignment + * @seqno: ICMP sequence number + */ +struct batadv_icmp_packet { + __u8 packet_type; + __u8 version; + __u8 ttl; + __u8 msg_type; /* see ICMP message types above */ + __u8 dst[ETH_ALEN]; + __u8 orig[ETH_ALEN]; + __u8 uid; + __u8 reserved; + __be16 seqno; +}; + +/** + * struct batadv_icmp_tp_packet - ICMP TP Meter packet + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @msg_type: ICMP packet type + * @dst: address of the destination node + * @orig: address of the source node + * @uid: local ICMP socket identifier + * @subtype: TP packet subtype (see batadv_icmp_tp_subtype) + * @session: TP session identifier + * @seqno: the TP sequence number + * @timestamp: time when the packet has been sent. This value is filled in a + * TP_MSG and echoed back in the next TP_ACK so that the sender can compute the + * RTT. Since it is read only by the host which wrote it, there is no need to + * store it using network order + */ +struct batadv_icmp_tp_packet { + __u8 packet_type; + __u8 version; + __u8 ttl; + __u8 msg_type; /* see ICMP message types above */ + __u8 dst[ETH_ALEN]; + __u8 orig[ETH_ALEN]; + __u8 uid; + __u8 subtype; + __u8 session[2]; + __be32 seqno; + __be32 timestamp; +}; + +/** + * enum batadv_icmp_tp_subtype - ICMP TP Meter packet subtypes + * @BATADV_TP_MSG: Msg from sender to receiver + * @BATADV_TP_ACK: acknowledgment from receiver to sender + */ +enum batadv_icmp_tp_subtype { + BATADV_TP_MSG = 0, + BATADV_TP_ACK, +}; + +#define BATADV_RR_LEN 16 + +/** + * struct batadv_icmp_packet_rr - ICMP RouteRecord packet + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @msg_type: ICMP packet type + * @dst: address of the destination node + * @orig: address of the source node + * @uid: local ICMP socket identifier + * @rr_cur: number of entries the rr array + * @seqno: ICMP sequence number + * @rr: route record array + */ +struct batadv_icmp_packet_rr { + __u8 packet_type; + __u8 version; + __u8 ttl; + __u8 msg_type; /* see ICMP message types above */ + __u8 dst[ETH_ALEN]; + __u8 orig[ETH_ALEN]; + __u8 uid; + __u8 rr_cur; + __be16 seqno; + __u8 rr[BATADV_RR_LEN][ETH_ALEN]; +}; + +#define BATADV_ICMP_MAX_PACKET_SIZE sizeof(struct batadv_icmp_packet_rr) + +/* All packet headers in front of an ethernet header have to be completely + * divisible by 2 but not by 4 to make the payload after the ethernet + * header again 4 bytes boundary aligned. + * + * A packing of 2 is necessary to avoid extra padding at the end of the struct + * caused by a structure member which is larger than two bytes. Otherwise + * the structure would not fulfill the previously mentioned rule to avoid the + * misalignment of the payload after the ethernet header. It may also lead to + * leakage of information when the padding it not initialized before sending. + */ +#pragma pack(2) + +/** + * struct batadv_unicast_packet - unicast packet for network payload + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @ttvn: translation table version number + * @dest: originator destination of the unicast packet + */ +struct batadv_unicast_packet { + __u8 packet_type; + __u8 version; + __u8 ttl; + __u8 ttvn; /* destination translation table version number */ + __u8 dest[ETH_ALEN]; + /* "4 bytes boundary + 2 bytes" long to make the payload after the + * following ethernet header again 4 bytes boundary aligned + */ +}; + +/** + * struct batadv_unicast_4addr_packet - extended unicast packet + * @u: common unicast packet header + * @src: address of the source + * @subtype: packet subtype + * @reserved: reserved byte for alignment + */ +struct batadv_unicast_4addr_packet { + struct batadv_unicast_packet u; + __u8 src[ETH_ALEN]; + __u8 subtype; + __u8 reserved; + /* "4 bytes boundary + 2 bytes" long to make the payload after the + * following ethernet header again 4 bytes boundary aligned + */ +}; + +/** + * struct batadv_frag_packet - fragmented packet + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @dest: final destination used when routing fragments + * @orig: originator of the fragment used when merging the packet + * @no: fragment number within this sequence + * @priority: priority of frame, from ToS IP precedence or 802.1p + * @reserved: reserved byte for alignment + * @seqno: sequence identification + * @total_size: size of the merged packet + */ +struct batadv_frag_packet { + __u8 packet_type; + __u8 version; /* batman version field */ + __u8 ttl; +#if defined(__BIG_ENDIAN_BITFIELD) + __u8 no:4; + __u8 priority:3; + __u8 reserved:1; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + __u8 reserved:1; + __u8 priority:3; + __u8 no:4; +#else +#error "unknown bitfield endianness" +#endif + __u8 dest[ETH_ALEN]; + __u8 orig[ETH_ALEN]; + __be16 seqno; + __be16 total_size; +}; + +/** + * struct batadv_bcast_packet - broadcast packet for network payload + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @reserved: reserved byte for alignment + * @seqno: sequence identification + * @orig: originator of the broadcast packet + */ +struct batadv_bcast_packet { + __u8 packet_type; + __u8 version; /* batman version field */ + __u8 ttl; + __u8 reserved; + __be32 seqno; + __u8 orig[ETH_ALEN]; + /* "4 bytes boundary + 2 bytes" long to make the payload after the + * following ethernet header again 4 bytes boundary aligned + */ +}; + +/** + * struct batadv_coded_packet - network coded packet + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @first_source: original source of first included packet + * @first_orig_dest: original destinal of first included packet + * @first_crc: checksum of first included packet + * @first_ttvn: tt-version number of first included packet + * @second_ttl: ttl of second packet + * @second_dest: second receiver of this coded packet + * @second_source: original source of second included packet + * @second_orig_dest: original destination of second included packet + * @second_crc: checksum of second included packet + * @second_ttvn: tt version number of second included packet + * @coded_len: length of network coded part of the payload + */ +struct batadv_coded_packet { + __u8 packet_type; + __u8 version; /* batman version field */ + __u8 ttl; + __u8 first_ttvn; + /* __u8 first_dest[ETH_ALEN]; - saved in mac header destination */ + __u8 first_source[ETH_ALEN]; + __u8 first_orig_dest[ETH_ALEN]; + __be32 first_crc; + __u8 second_ttl; + __u8 second_ttvn; + __u8 second_dest[ETH_ALEN]; + __u8 second_source[ETH_ALEN]; + __u8 second_orig_dest[ETH_ALEN]; + __be32 second_crc; + __be16 coded_len; +}; + +#pragma pack() + +/** + * struct batadv_unicast_tvlv_packet - generic unicast packet with tvlv payload + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @reserved: reserved field (for packet alignment) + * @src: address of the source + * @dst: address of the destination + * @tvlv_len: length of tvlv data following the unicast tvlv header + * @align: 2 bytes to align the header to a 4 byte boundary + */ +struct batadv_unicast_tvlv_packet { + __u8 packet_type; + __u8 version; /* batman version field */ + __u8 ttl; + __u8 reserved; + __u8 dst[ETH_ALEN]; + __u8 src[ETH_ALEN]; + __be16 tvlv_len; + __u16 align; +}; + +/** + * struct batadv_tvlv_hdr - base tvlv header struct + * @type: tvlv container type (see batadv_tvlv_type) + * @version: tvlv container version + * @len: tvlv container length + */ +struct batadv_tvlv_hdr { + __u8 type; + __u8 version; + __be16 len; +}; + +/** + * struct batadv_tvlv_gateway_data - gateway data propagated through gw tvlv + * container + * @bandwidth_down: advertised uplink download bandwidth + * @bandwidth_up: advertised uplink upload bandwidth + */ +struct batadv_tvlv_gateway_data { + __be32 bandwidth_down; + __be32 bandwidth_up; +}; + +/** + * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container + * @flags: translation table flags (see batadv_tt_data_flags) + * @ttvn: translation table version number + * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by + * one batadv_tvlv_tt_vlan_data object per announced vlan + */ +struct batadv_tvlv_tt_data { + __u8 flags; + __u8 ttvn; + __be16 num_vlan; +}; + +/** + * struct batadv_tvlv_tt_vlan_data - vlan specific tt data propagated through + * the tt tvlv container + * @crc: crc32 checksum of the entries belonging to this vlan + * @vid: vlan identifier + * @reserved: unused, useful for alignment purposes + */ +struct batadv_tvlv_tt_vlan_data { + __be32 crc; + __be16 vid; + __u16 reserved; +}; + +/** + * struct batadv_tvlv_tt_change - translation table diff data + * @flags: status indicators concerning the non-mesh client (see + * batadv_tt_client_flags) + * @reserved: reserved field - useful for alignment purposes only + * @addr: mac address of non-mesh client that triggered this tt change + * @vid: VLAN identifier + */ +struct batadv_tvlv_tt_change { + __u8 flags; + __u8 reserved[3]; + __u8 addr[ETH_ALEN]; + __be16 vid; +}; + +/** + * struct batadv_tvlv_roam_adv - roaming advertisement + * @client: mac address of roaming client + * @vid: VLAN identifier + */ +struct batadv_tvlv_roam_adv { + __u8 client[ETH_ALEN]; + __be16 vid; +}; + +/** + * struct batadv_tvlv_mcast_data - payload of a multicast tvlv + * @flags: multicast flags announced by the orig node + * @reserved: reserved field + */ +struct batadv_tvlv_mcast_data { + __u8 flags; + __u8 reserved[3]; +}; + +#endif /* _UAPI_LINUX_BATADV_PACKET_H_ */ -- cgit v1.2.3 From f15bc54eeecd86dfba3885aab839cd1f45172a38 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 22 Dec 2017 15:10:18 +0100 Subject: l2tp: add peer_offset parameter Introduce peer_offset parameter in order to add the capability to specify two different values for payload offset on tx/rx side. If just offset is provided by userspace use it for rx side as well in order to maintain compatibility with older l2tp versions Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- include/uapi/linux/l2tp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index d84ce5c1c9aa..d6fee55dbded 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -127,6 +127,7 @@ enum { L2TP_ATTR_UDP_ZERO_CSUM6_TX, /* flag */ L2TP_ATTR_UDP_ZERO_CSUM6_RX, /* flag */ L2TP_ATTR_PAD, + L2TP_ATTR_PEER_OFFSET, /* u16 */ __L2TP_ATTR_MAX, }; -- cgit v1.2.3 From 675fc275a3a2d905535207237402c6d8dcb5fa4b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Dec 2017 18:39:09 -0800 Subject: bpf: offload: report device information for offloaded programs Report to the user ifindex and namespace information of offloaded programs. If device has disappeared return -ENODEV. Specify the namespace using dev/inode combination. CC: Eric W. Biederman Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 69eabfcb9bdb..f2f8b36e2ad4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -921,6 +921,9 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3 From bbb6189df4077cde8592cd2f804bb1122067dd32 Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Wed, 27 Dec 2017 18:27:58 +0100 Subject: inet_diag: Add equal-operator for ports inet_diag currently provides less/greater than or equal operators for comparing ports when filtering sockets. An equal comparison can be performed by combining the two existing operators, or a user can for example request a port range and then do the final filtering in userspace. However, these approaches both have drawbacks. Implementing equal using LE/GE causes the size and complexity of a filter to grow quickly as the number of ports increase, while it on busy machines would be great if the kernel only returns information about relevant sockets. This patch introduces source and destination port equal operators. INET_DIAG_BC_S_EQ is used to match a source port, INET_DIAG_BC_D_EQ a destination port, and usage is the same as for the existing port operators. I.e., the port to match is stored in the no-member of the next inet_diag_bc_op-struct in the filter. Signed-off-by: Kristian Evensen Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 817d807e9481..14565d703291 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -92,6 +92,8 @@ enum { INET_DIAG_BC_D_COND, INET_DIAG_BC_DEV_COND, /* u32 ifindex */ INET_DIAG_BC_MARK_COND, + INET_DIAG_BC_S_EQ, + INET_DIAG_BC_D_EQ, }; struct inet_diag_hostcond { -- cgit v1.2.3 From 863def15b9755d9016df4d93addf3127f1dc67f4 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Wed, 3 Jan 2018 22:48:04 +0000 Subject: l2tp: revert "l2tp: add peer_offset parameter" Revert commit f15bc54eeecd ("l2tp: add peer_offset parameter"). This is removed because it is adding another configurable offset and configurable offsets are being removed. Signed-off-by: James Chapman Signed-off-by: David S. Miller --- include/uapi/linux/l2tp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index d6fee55dbded..d84ce5c1c9aa 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -127,7 +127,6 @@ enum { L2TP_ATTR_UDP_ZERO_CSUM6_TX, /* flag */ L2TP_ATTR_UDP_ZERO_CSUM6_RX, /* flag */ L2TP_ATTR_PAD, - L2TP_ATTR_PEER_OFFSET, /* u16 */ __L2TP_ATTR_MAX, }; -- cgit v1.2.3 From 4887d8933a8dfdfa6602e0faaa0e31cd343ccefe Mon Sep 17 00:00:00 2001 From: James Chapman Date: Wed, 3 Jan 2018 22:48:07 +0000 Subject: l2tp: add comment in API header that L2TP_ATTR_OFFSET is not used Signed-off-by: James Chapman Signed-off-by: David S. Miller --- include/uapi/linux/l2tp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index d84ce5c1c9aa..f78eef4cc56a 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -94,7 +94,7 @@ enum { L2TP_ATTR_NONE, /* no data */ L2TP_ATTR_PW_TYPE, /* u16, enum l2tp_pwtype */ L2TP_ATTR_ENCAP_TYPE, /* u16, enum l2tp_encap_type */ - L2TP_ATTR_OFFSET, /* u16 */ + L2TP_ATTR_OFFSET, /* u16 (not used) */ L2TP_ATTR_DATA_SEQ, /* u16 */ L2TP_ATTR_L2SPEC_TYPE, /* u8, enum l2tp_l2spec_type */ L2TP_ATTR_L2SPEC_LEN, /* u8, enum l2tp_l2spec_type */ -- cgit v1.2.3 From 02dd3291b2f095bbc88e1d2628fd5bf2e92de69b Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 3 Jan 2018 11:26:14 +0100 Subject: bpf: finally expose xdp_rxq_info to XDP bpf-programs Now all XDP driver have been updated to setup xdp_rxq_info and assign this to xdp_buff->rxq. Thus, it is now safe to enable access to some of the xdp_rxq_info struct members. This patch extend xdp_md and expose UAPI to userspace for ingress_ifindex and rx_queue_index. Access happens via bpf instruction rewrite, that load data directly from struct xdp_rxq_info. * ingress_ifindex map to xdp_rxq_info->dev->ifindex * rx_queue_index map to xdp_rxq_info->queue_index Signed-off-by: Jesper Dangaard Brouer Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f2f8b36e2ad4..405317f9c064 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -899,6 +899,9 @@ struct xdp_md { __u32 data; __u32 data_end; __u32 data_meta; + /* Below access go though struct xdp_rxq_info */ + __u32 ingress_ifindex; /* rxq->dev->ifindex */ + __u32 rx_queue_index; /* rxq->queue_index */ }; enum sk_action { -- cgit v1.2.3 From e58f33cc84bc089c430ac955f3cad6380ae98591 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 7 Dec 2017 16:28:23 +0100 Subject: netfilter: add defines for arp/decnet max hooks The kernel already has defines for this, but they are in uapi exposed headers. Including these from netns.h causes build errors and also adds unneeded dependencies on heads that we don't need. So move these defines to netfilter_defs.h and place the uapi ones in ifndef __KERNEL__ to keep them for userspace. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_arp.h | 3 +++ include/uapi/linux/netfilter_decnet.h | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter_arp.h b/include/uapi/linux/netfilter_arp.h index 81b6a4cbcb72..791dfc5ae907 100644 --- a/include/uapi/linux/netfilter_arp.h +++ b/include/uapi/linux/netfilter_arp.h @@ -15,6 +15,9 @@ #define NF_ARP_IN 0 #define NF_ARP_OUT 1 #define NF_ARP_FORWARD 2 + +#ifndef __KERNEL__ #define NF_ARP_NUMHOOKS 3 +#endif #endif /* __LINUX_ARP_NETFILTER_H */ diff --git a/include/uapi/linux/netfilter_decnet.h b/include/uapi/linux/netfilter_decnet.h index 9089c38f6abe..61f1c7dfd033 100644 --- a/include/uapi/linux/netfilter_decnet.h +++ b/include/uapi/linux/netfilter_decnet.h @@ -24,6 +24,9 @@ #define NFC_DN_IF_IN 0x0004 /* Output device. */ #define NFC_DN_IF_OUT 0x0008 + +/* kernel define is in netfilter_defs.h */ +#define NF_DN_NUMHOOKS 7 #endif /* ! __KERNEL__ */ /* DECnet Hooks */ @@ -41,7 +44,6 @@ #define NF_DN_HELLO 5 /* Input Routing Packets */ #define NF_DN_ROUTE 6 -#define NF_DN_NUMHOOKS 7 enum nf_dn_hook_priorities { NF_DN_PRI_FIRST = INT_MIN, -- cgit v1.2.3 From 625c556118f3c2fd28bb8ef6da18c53bd4037be4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 9 Dec 2017 21:01:08 +0100 Subject: netfilter: connlimit: split xt_connlimit into front and backend This allows to reuse xt_connlimit infrastructure from nf_tables. The upcoming nf_tables frontend can just pass in an nftables register as input key, this allows limiting by any nft-supported key, including concatenations. For xt_connlimit, pass in the zone and the ip/ipv6 address. With help from Yi-Hung Wei. Signed-off-by: Florian Westphal Acked-by: Yi-Hung Wei Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_connlimit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/xt_connlimit.h b/include/uapi/linux/netfilter/xt_connlimit.h index 07e5e9d47882..d4d1943dcd11 100644 --- a/include/uapi/linux/netfilter/xt_connlimit.h +++ b/include/uapi/linux/netfilter/xt_connlimit.h @@ -27,7 +27,7 @@ struct xt_connlimit_info { __u32 flags; /* Used internally by the kernel */ - struct xt_connlimit_data *data __attribute__((aligned(8))); + struct nf_conncount_data *data __attribute__((aligned(8))); }; #endif /* _XT_CONNLIMIT_H */ -- cgit v1.2.3 From f6931f5f5b713705c3cc91e4f9c222f2b181e2ef Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Dec 2017 16:18:16 +0100 Subject: netfilter: meta: secpath support replacement for iptables "-m policy --dir in --policy {ipsec,none}". Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index a3ee277b17a1..2efbf9744c2a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -777,6 +777,7 @@ enum nft_exthdr_attributes { * @NFT_META_OIFGROUP: packet output interface group * @NFT_META_CGROUP: socket control group (skb->sk->sk_classid) * @NFT_META_PRANDOM: a 32bit pseudo-random number + * @NFT_META_SECPATH: boolean, secpath_exists (!!skb->sp) */ enum nft_meta_keys { NFT_META_LEN, @@ -804,6 +805,7 @@ enum nft_meta_keys { NFT_META_OIFGROUP, NFT_META_CGROUP, NFT_META_PRANDOM, + NFT_META_SECPATH, }; /** -- cgit v1.2.3 From 90964016e5d34758033e75884e41d68ccb93212e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 7 Jan 2018 01:03:56 +0100 Subject: netfilter: nf_conntrack: add IPS_OFFLOAD status bit This new bit tells us that the conntrack entry is owned by the flow table offload infrastructure. # cat /proc/net/nf_conntrack ipv4 2 tcp 6 src=10.141.10.2 dst=147.75.205.195 sport=36392 dport=443 src=147.75.205.195 dst=192.168.2.195 sport=443 dport=36392 [OFFLOAD] mark=0 zone=0 use=2 Note the [OFFLOAD] tag in the listing. The timer of such conntrack entries look like stopped from userspace. In practise, to make sure the conntrack entry does not go away, the conntrack timer is periodically set to an arbitrary large value that gets refreshed on every iteration from the garbage collector, so it never expires- and they display no internal state in the case of TCP flows. This allows us to save a bitcheck from the packet path via nf_ct_is_expired(). Conntrack entries that have been offloaded to the flow table infrastructure cannot be deleted/flushed via ctnetlink. The flow table infrastructure is also responsible for releasing this conntrack entry. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 3fea7709a441..fc8c15a24a43 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -101,12 +101,16 @@ enum ip_conntrack_status { IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), + /* Conntrack has been offloaded to flow table. */ + IPS_OFFLOAD_BIT = 14, + IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT), + /* Be careful here, modifying these bits can make things messy, * so don't let users modify them directly. */ IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | - IPS_SEQ_ADJUST | IPS_TEMPLATE), + IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD), __IPS_MAX_BIT = 14, }; -- cgit v1.2.3 From 3b49e2e94e6ebb8b23d0955d9e898254455734f8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 7 Jan 2018 01:04:07 +0100 Subject: netfilter: nf_tables: add flow table netlink frontend This patch introduces a netlink control plane to create, delete and dump flow tables. Flow tables are identified by name, this name is used from rules to refer to an specific flow table. Flow tables use the rhashtable class and a generic garbage collector to remove expired entries. This also adds the infrastructure to add different flow table types, so we can add one for each layer 3 protocol family. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 53 ++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 2efbf9744c2a..591b53bce070 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -92,6 +92,9 @@ enum nft_verdicts { * @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes) * @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes) * @NFT_MSG_GETOBJ_RESET: get and reset a stateful object (enum nft_obj_attributes) + * @NFT_MSG_NEWFLOWTABLE: add new flow table (enum nft_flowtable_attributes) + * @NFT_MSG_GETFLOWTABLE: get flow table (enum nft_flowtable_attributes) + * @NFT_MSG_DELFLOWTABLE: delete flow table (enum nft_flowtable_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -116,6 +119,9 @@ enum nf_tables_msg_types { NFT_MSG_GETOBJ, NFT_MSG_DELOBJ, NFT_MSG_GETOBJ_RESET, + NFT_MSG_NEWFLOWTABLE, + NFT_MSG_GETFLOWTABLE, + NFT_MSG_DELFLOWTABLE, NFT_MSG_MAX, }; @@ -1309,6 +1315,53 @@ enum nft_object_attributes { }; #define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1) +/** + * enum nft_flowtable_attributes - nf_tables flow table netlink attributes + * + * @NFTA_FLOWTABLE_TABLE: name of the table containing the expression (NLA_STRING) + * @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING) + * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32) + * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32) + */ +enum nft_flowtable_attributes { + NFTA_FLOWTABLE_UNSPEC, + NFTA_FLOWTABLE_TABLE, + NFTA_FLOWTABLE_NAME, + NFTA_FLOWTABLE_HOOK, + NFTA_FLOWTABLE_USE, + __NFTA_FLOWTABLE_MAX +}; +#define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1) + +/** + * enum nft_flowtable_hook_attributes - nf_tables flow table hook netlink attributes + * + * @NFTA_FLOWTABLE_HOOK_NUM: netfilter hook number (NLA_U32) + * @NFTA_FLOWTABLE_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + * @NFTA_FLOWTABLE_HOOK_DEVS: input devices this flow table is bound to (NLA_NESTED) + */ +enum nft_flowtable_hook_attributes { + NFTA_FLOWTABLE_HOOK_UNSPEC, + NFTA_FLOWTABLE_HOOK_NUM, + NFTA_FLOWTABLE_HOOK_PRIORITY, + NFTA_FLOWTABLE_HOOK_DEVS, + __NFTA_FLOWTABLE_HOOK_MAX +}; +#define NFTA_FLOWTABLE_HOOK_MAX (__NFTA_FLOWTABLE_HOOK_MAX - 1) + +/** + * enum nft_device_attributes - nf_tables device netlink attributes + * + * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) + */ +enum nft_devices_attributes { + NFTA_DEVICE_UNSPEC, + NFTA_DEVICE_NAME, + __NFTA_DEVICE_MAX +}; +#define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) + + /** * enum nft_trace_attributes - nf_tables trace netlink attributes * -- cgit v1.2.3 From a3c90f7a2323b331ae816d5b0633e68148e25d04 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 7 Jan 2018 01:04:26 +0100 Subject: netfilter: nf_tables: flow offload expression Add new instruction for the nf_tables VM that allows us to specify what flows are offloaded into a given flow table via name. This new instruction creates the flow entry and adds it to the flow table. Only established flows, ie. we have seen traffic in both directions, are added to the flow table. You can still decide to offload entries at a later stage via packet counting or checking the ct status in case you want to offload assured conntracks. This new extension depends on the conntrack subsystem. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 591b53bce070..53e8dd2a3a03 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -957,6 +957,17 @@ enum nft_ct_attributes { }; #define NFTA_CT_MAX (__NFTA_CT_MAX - 1) +/** + * enum nft_flow_attributes - ct offload expression attributes + * @NFTA_FLOW_TABLE_NAME: flow table name (NLA_STRING) + */ +enum nft_offload_attributes { + NFTA_FLOW_UNSPEC, + NFTA_FLOW_TABLE_NAME, + __NFTA_FLOW_MAX, +}; +#define NFTA_FLOW_MAX (__NFTA_FLOW_MAX - 1) + enum nft_limit_type { NFT_LIMIT_PKTS, NFT_LIMIT_PKT_BYTES -- cgit v1.2.3 From 23fe846f9a48d5375722b3bd060e0a02ad1ca7f1 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 5 Jan 2018 19:47:14 +0100 Subject: l2tp: adjust comments about L2TPv3 offsets The "offset" option has been removed by commit 900631ee6a26 ("l2tp: remove configurable payload offset"). Signed-off-by: Guillaume Nault Acked-by: James Chapman Signed-off-by: David S. Miller --- include/uapi/linux/l2tp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index f78eef4cc56a..71e62795104d 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -65,7 +65,7 @@ struct sockaddr_l2tpip6 { * TUNNEL_MODIFY - CONN_ID, udpcsum * TUNNEL_GETSTATS - CONN_ID, (stats) * TUNNEL_GET - CONN_ID, (...) - * SESSION_CREATE - SESSION_ID, PW_TYPE, offset, data_seq, cookie, peer_cookie, offset, l2spec + * SESSION_CREATE - SESSION_ID, PW_TYPE, data_seq, cookie, peer_cookie, l2spec * SESSION_DELETE - SESSION_ID * SESSION_MODIFY - SESSION_ID, data_seq * SESSION_GET - SESSION_ID, (...) -- cgit v1.2.3 From c5a9f6f0ab4054082dd5ce9bbdaa8e8ff05cf365 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Mon, 17 Jul 2017 13:47:07 +0300 Subject: net/core: Add drop counters to VF statistics Modern hardware can decide to drop packets going to/from a VF. Add receive and transmit drop counters to be displayed at hypervisor layer in iproute2 per VF statistics. Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed --- include/uapi/linux/if_link.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 19fc02660e0c..f8f04fed6186 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -732,6 +732,8 @@ enum { IFLA_VF_STATS_BROADCAST, IFLA_VF_STATS_MULTICAST, IFLA_VF_STATS_PAD, + IFLA_VF_STATS_RX_DROPPED, + IFLA_VF_STATS_TX_DROPPED, __IFLA_VF_STATS_MAX, }; -- cgit v1.2.3 From ccfdec9089229503d3a305e02accac01817d293e Mon Sep 17 00:00:00 2001 From: Felix Walter Date: Fri, 5 Jan 2018 14:33:31 +0100 Subject: macsec: Add support for GCM-AES-256 cipher suite This adds support for the GCM-AES-256 cipher suite as specified in IEEE 802.1AEbn-2011. The prepared cipher suite selection mechanism is used, with GCM-AES-128 being the default cipher suite as defined in the standard. Signed-off-by: Felix Walter Cc: Sabrina Dubroca Signed-off-by: David S. Miller --- include/uapi/linux/if_macsec.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h index 719d243471f4..2e522835a4af 100644 --- a/include/uapi/linux/if_macsec.h +++ b/include/uapi/linux/if_macsec.h @@ -18,12 +18,17 @@ #define MACSEC_GENL_NAME "macsec" #define MACSEC_GENL_VERSION 1 -#define MACSEC_MAX_KEY_LEN 128 +#define MACSEC_MAX_KEY_LEN 256 #define MACSEC_KEYID_LEN 16 -#define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL -#define MACSEC_DEFAULT_CIPHER_ALT 0x0080C20001000001ULL +/* cipher IDs as per IEEE802.1AEbn-2011 */ +#define MACSEC_CIPHER_ID_GCM_AES_128 0x0080C20001000001ULL +#define MACSEC_CIPHER_ID_GCM_AES_256 0x0080C20001000002ULL + +#define MACSEC_DEFAULT_CIPHER_ID MACSEC_CIPHER_ID_GCM_AES_128 +/* deprecated cipher ID for GCM-AES-128 */ +#define MACSEC_DEFAULT_CIPHER_ALT 0x0080020001000001ULL #define MACSEC_MIN_ICV_LEN 8 #define MACSEC_MAX_ICV_LEN 32 -- cgit v1.2.3 From faa9b39f0e9ddfa8c83725b3f230784976dd3c7f Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 5 Jan 2018 17:44:54 -0500 Subject: virtio_net: propagate linkspeed/duplex settings from the hypervisor The ability to set speed and duplex for virtio_net is useful in various scenarios as described here: 16032be virtio_net: add ethtool support for set and get of settings However, it would be nice to be able to set this from the hypervisor, such that virtio_net doesn't require custom guest ethtool commands. Introduce a new feature flag, VIRTIO_NET_F_SPEED_DUPLEX, which allows the hypervisor to export a linkspeed and duplex setting. The user can subsequently overwrite it later if desired via: 'ethtool -s'. Note that VIRTIO_NET_F_SPEED_DUPLEX is defined as bit 63, the intention is that device feature bits are to grow down from bit 63, since the transports are starting from bit 24 and growing up. Signed-off-by: Jason Baron Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: virtio-dev@lists.oasis-open.org Signed-off-by: David S. Miller --- include/uapi/linux/virtio_net.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index fc353b518288..5de6ed37695b 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -57,6 +57,8 @@ * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ +#define VIRTIO_NET_F_SPEED_DUPLEX 63 /* Device set linkspeed and duplex */ + #ifndef VIRTIO_NET_NO_LEGACY #define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */ #endif /* VIRTIO_NET_NO_LEGACY */ @@ -76,6 +78,17 @@ struct virtio_net_config { __u16 max_virtqueue_pairs; /* Default maximum transmit unit advice */ __u16 mtu; + /* + * speed, in units of 1Mb. All values 0 to INT_MAX are legal. + * Any other value stands for unknown. + */ + __u32 speed; + /* + * 0x00 - half duplex + * 0x01 - full duplex + * Any other value stands for unknown. + */ + __u8 duplex; } __attribute__((packed)); /* -- cgit v1.2.3 From 232d07b74a33b9f5d48516dc1d8ce41723ada593 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 8 Jan 2018 21:03:30 +0100 Subject: tipc: improve groupcast scope handling When a member joins a group, it also indicates a binding scope. This makes it possible to create both node local groups, invisible to other nodes, as well as cluster global groups, visible everywhere. In order to avoid that different members end up having permanently differing views of group size and memberhip, we must inhibit locally and globally bound members from joining the same group. We do this by using the binding scope as an additional separator between groups. I.e., a member must ignore all membership events from sockets using a different scope than itself, and all lookups for message destinations must require an exact match between the message's lookup scope and the potential target's binding scope. Apart from making it possible to create local groups using the same identity on different nodes, a side effect of this is that it now also becomes possible to create a cluster global group with the same identity across the same nodes, without interfering with the local groups. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- include/uapi/linux/tipc.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 35f79d1f8c3a..14bacc7e6cef 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -117,10 +117,9 @@ static inline unsigned int tipc_node(__u32 addr) /* * Publication scopes when binding port names and port name sequences */ - -#define TIPC_ZONE_SCOPE 1 -#define TIPC_CLUSTER_SCOPE 2 -#define TIPC_NODE_SCOPE 3 +#define TIPC_ZONE_SCOPE 1 +#define TIPC_CLUSTER_SCOPE 2 +#define TIPC_NODE_SCOPE 3 /* * Limiting values for messages -- cgit v1.2.3 From 202a8ff545ccdaa5ac2000d9201df3453c8816be Mon Sep 17 00:00:00 2001 From: Ahmed Abdelsalam Date: Sun, 7 Jan 2018 19:22:02 +0100 Subject: netfilter: add IPv6 segment routing header 'srh' match It allows matching packets based on Segment Routing Header (SRH) information. The implementation considers revision 7 of the SRH draft. https://tools.ietf.org/html/draft-ietf-6man-segment-routing-header-07 Currently supported match options include: (1) Next Header (2) Hdr Ext Len (3) Segments Left (4) Last Entry (5) Tag value of SRH Signed-off-by: Ahmed Abdelsalam Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_ipv6/ip6t_srh.h | 57 ++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 include/uapi/linux/netfilter_ipv6/ip6t_srh.h (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_srh.h b/include/uapi/linux/netfilter_ipv6/ip6t_srh.h new file mode 100644 index 000000000000..f3cc0ef514a7 --- /dev/null +++ b/include/uapi/linux/netfilter_ipv6/ip6t_srh.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _IP6T_SRH_H +#define _IP6T_SRH_H + +#include +#include + +/* Values for "mt_flags" field in struct ip6t_srh */ +#define IP6T_SRH_NEXTHDR 0x0001 +#define IP6T_SRH_LEN_EQ 0x0002 +#define IP6T_SRH_LEN_GT 0x0004 +#define IP6T_SRH_LEN_LT 0x0008 +#define IP6T_SRH_SEGS_EQ 0x0010 +#define IP6T_SRH_SEGS_GT 0x0020 +#define IP6T_SRH_SEGS_LT 0x0040 +#define IP6T_SRH_LAST_EQ 0x0080 +#define IP6T_SRH_LAST_GT 0x0100 +#define IP6T_SRH_LAST_LT 0x0200 +#define IP6T_SRH_TAG 0x0400 +#define IP6T_SRH_MASK 0x07FF + +/* Values for "mt_invflags" field in struct ip6t_srh */ +#define IP6T_SRH_INV_NEXTHDR 0x0001 +#define IP6T_SRH_INV_LEN_EQ 0x0002 +#define IP6T_SRH_INV_LEN_GT 0x0004 +#define IP6T_SRH_INV_LEN_LT 0x0008 +#define IP6T_SRH_INV_SEGS_EQ 0x0010 +#define IP6T_SRH_INV_SEGS_GT 0x0020 +#define IP6T_SRH_INV_SEGS_LT 0x0040 +#define IP6T_SRH_INV_LAST_EQ 0x0080 +#define IP6T_SRH_INV_LAST_GT 0x0100 +#define IP6T_SRH_INV_LAST_LT 0x0200 +#define IP6T_SRH_INV_TAG 0x0400 +#define IP6T_SRH_INV_MASK 0x07FF + +/** + * struct ip6t_srh - SRH match options + * @ next_hdr: Next header field of SRH + * @ hdr_len: Extension header length field of SRH + * @ segs_left: Segments left field of SRH + * @ last_entry: Last entry field of SRH + * @ tag: Tag field of SRH + * @ mt_flags: match options + * @ mt_invflags: Invert the sense of match options + */ + +struct ip6t_srh { + __u8 next_hdr; + __u8 hdr_len; + __u8 segs_left; + __u8 last_entry; + __u16 tag; + __u16 mt_flags; + __u16 mt_invflags; +}; + +#endif /*_IP6T_SRH_H*/ -- cgit v1.2.3 From 902d6a4c2a4f411582689e53fb101895ffe99028 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 10 Jan 2018 20:51:57 -0700 Subject: netfilter: nf_defrag: Skip defrag if NOTRACK is set conntrack defrag is needed only if some module like CONNTRACK or NAT explicitly requests it. For plain forwarding scenarios, defrag is not needed and can be skipped if NOTRACK is set in a rule. Since conntrack defrag is currently higher priority than raw table, setting NOTRACK is not sufficient. We need to move raw to a higher priority for iptables only. This is achieved by introducing a module parameter "raw_before_defrag" which allows to change the priority of raw table to place it before defrag. By default, the parameter is disabled and the priority of raw table is NF_IP_PRI_RAW to support legacy behavior. If the module parameter is enabled, then the priority of the raw table is set to NF_IP_PRI_RAW_BEFORE_DEFRAG. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_ipv4.h | 1 + include/uapi/linux/netfilter_ipv6.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter_ipv4.h b/include/uapi/linux/netfilter_ipv4.h index e6b1a84f5dd3..c3b060775e13 100644 --- a/include/uapi/linux/netfilter_ipv4.h +++ b/include/uapi/linux/netfilter_ipv4.h @@ -57,6 +57,7 @@ enum nf_ip_hook_priorities { NF_IP_PRI_FIRST = INT_MIN, + NF_IP_PRI_RAW_BEFORE_DEFRAG = -450, NF_IP_PRI_CONNTRACK_DEFRAG = -400, NF_IP_PRI_RAW = -300, NF_IP_PRI_SELINUX_FIRST = -225, diff --git a/include/uapi/linux/netfilter_ipv6.h b/include/uapi/linux/netfilter_ipv6.h index 2f9724611cc2..dc624fd24d25 100644 --- a/include/uapi/linux/netfilter_ipv6.h +++ b/include/uapi/linux/netfilter_ipv6.h @@ -62,6 +62,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_FIRST = INT_MIN, + NF_IP6_PRI_RAW_BEFORE_DEFRAG = -450, NF_IP6_PRI_CONNTRACK_DEFRAG = -400, NF_IP6_PRI_RAW = -300, NF_IP6_PRI_SELINUX_FIRST = -225, -- cgit v1.2.3 From daaf24c634ab951cad3dcef28492001ef9c931d0 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 11 Jan 2018 17:39:09 +0100 Subject: bpf: simplify xdp_convert_ctx_access for xdp_rxq_info As pointed out by Daniel Borkmann, using bpf_target_off() is not necessary for xdp_rxq_info when extracting queue_index and ifindex, as these members are u32 like BPF_W. Also fix trivial spelling mistake introduced in same commit. Fixes: 02dd3291b2f0 ("bpf: finally expose xdp_rxq_info to XDP bpf-programs") Reported-by: Daniel Borkmann Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 405317f9c064..395d261948de 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -899,7 +899,7 @@ struct xdp_md { __u32 data; __u32 data_end; __u32 data_meta; - /* Below access go though struct xdp_rxq_info */ + /* Below access go through struct xdp_rxq_info */ __u32 ingress_ifindex; /* rxq->dev->ifindex */ __u32 rx_queue_index; /* rxq->queue_index */ }; -- cgit v1.2.3 From a38845729ea3985db5d2544ec3ef3dc8f6313a27 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 11 Jan 2018 20:29:09 -0800 Subject: bpf: offload: add map offload infrastructure BPF map offload follow similar path to program offload. At creation time users may specify ifindex of the device on which they want to create the map. Map will be validated by the kernel's .map_alloc_check callback and device driver will be called for the actual allocation. Map will have an empty set of operations associated with it (save for alloc and free callbacks). The real device callbacks are kept in map->offload->dev_ops because they have slightly different signatures. Map operations are called in process context so the driver may communicate with HW freely, msleep(), wait() etc. Map alloc and free callbacks are muxed via existing .ndo_bpf, and are always called with rtnl lock held. Maps and programs are guaranteed to be destroyed before .ndo_uninit (i.e. before unregister_netdev() returns). Map callbacks are invoked with bpf_devs_lock *read* locked, drivers must take care of exclusive locking if necessary. All offload-specific branches are marked with unlikely() (through bpf_map_is_dev_bound()), given that branch penalty will be negligible compared to IO anyway, and we don't want to penalize SW path unnecessarily. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 395d261948de..7c2259e8bc54 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -245,6 +245,7 @@ union bpf_attr { * BPF_F_NUMA_NODE is set). */ char map_name[BPF_OBJ_NAME_LEN]; + __u32 map_ifindex; /* ifindex of netdev to create on */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ -- cgit v1.2.3 From 2290aefa2e90a43af8555ad6431d49de43259aa3 Mon Sep 17 00:00:00 2001 From: Franklin S Cooper Jr Date: Wed, 10 Jan 2018 16:25:18 +0530 Subject: can: dev: Add support for limiting configured bitrate Various CAN or CAN-FD IP may be able to run at a faster rate than what the transceiver the CAN node is connected to. This can lead to unexpected errors. However, CAN transceivers typically have fixed limitations and provide no means to discover these limitations at runtime. Therefore, add support for a can-transceiver node that can be reused by other CAN peripheral drivers to determine for both CAN and CAN-FD what the max bitrate that can be used. If the user tries to configure CAN to pass these maximum bitrates it will throw an error. Also add support for reading bitrate_max via the netlink interface. Reviewed-by: Suman Anna Signed-off-by: Franklin S Cooper Jr [nsekhar@ti.com: fix build error with !CONFIG_OF] Signed-off-by: Sekhar Nori Signed-off-by: Faiz Abbas Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index 96710e76d5ce..9f56fad4785b 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -132,6 +132,7 @@ enum { IFLA_CAN_TERMINATION_CONST, IFLA_CAN_BITRATE_CONST, IFLA_CAN_DATA_BITRATE_CONST, + IFLA_CAN_BITRATE_MAX, __IFLA_CAN_MAX }; -- cgit v1.2.3 From d9f9b9a4d05fab693fd23a9ecaa330e03ebe2c31 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 15 Jan 2018 08:59:03 +0100 Subject: devlink: Add support for resource abstraction Add support for hardware resource abstraction over devlink. Each resource is identified via id, furthermore it contains information regarding its size and its related sub resources. Each resource can also provide its current occupancy. In some cases the sizes of some resources can be changed, yet for those changes to take place a hot driver reload may be needed. The reload capability will be introduced in the next patch. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 6665df69e26a..f89950443e17 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -70,6 +70,8 @@ enum devlink_command { DEVLINK_CMD_DPIPE_ENTRIES_GET, DEVLINK_CMD_DPIPE_HEADERS_GET, DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, + DEVLINK_CMD_RESOURCE_SET, + DEVLINK_CMD_RESOURCE_DUMP, /* add new commands above here */ __DEVLINK_CMD_MAX, @@ -202,6 +204,18 @@ enum devlink_attr { DEVLINK_ATTR_PAD, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, /* u8 */ + DEVLINK_ATTR_RESOURCE_LIST, /* nested */ + DEVLINK_ATTR_RESOURCE, /* nested */ + DEVLINK_ATTR_RESOURCE_NAME, /* string */ + DEVLINK_ATTR_RESOURCE_ID, /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE, /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_NEW, /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_VALID, /* u8 */ + DEVLINK_ATTR_RESOURCE_SIZE_MIN, /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_MAX, /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_GRAN, /* u64 */ + DEVLINK_ATTR_RESOURCE_UNIT, /* u8 */ + DEVLINK_ATTR_RESOURCE_OCC, /* u64 */ /* add new attributes above here, update the policy in devlink.c */ @@ -245,4 +259,8 @@ enum devlink_dpipe_header_id { DEVLINK_DPIPE_HEADER_IPV6, }; +enum devlink_resource_unit { + DEVLINK_RESOURCE_UNIT_ENTRY, +}; + #endif /* _UAPI_LINUX_DEVLINK_H_ */ -- cgit v1.2.3 From 2d8dc5bbf4e7603747875eb5cadcd67c1fa8b1bb Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 15 Jan 2018 08:59:04 +0100 Subject: devlink: Add support for reload Add support for performing driver hot reload. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index f89950443e17..555ddcaf0be2 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -73,6 +73,11 @@ enum devlink_command { DEVLINK_CMD_RESOURCE_SET, DEVLINK_CMD_RESOURCE_DUMP, + /* Hot driver reload, makes configuration changes take place. The + * devlink instance is not released during the process. + */ + DEVLINK_CMD_RELOAD, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 -- cgit v1.2.3 From 56dc7cd0a87a1ff4f49ee1e67bd88e768385d51a Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 15 Jan 2018 08:59:05 +0100 Subject: devlink: Add relation between dpipe and resource The hardware processes which are modeled via dpipe commonly use some internal hardware resources. Such relation can improve the understanding of hardware limitations. The number of resource's unit consumed per table's entry are also provided for each table. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 555ddcaf0be2..1df65a4c2044 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -221,6 +221,8 @@ enum devlink_attr { DEVLINK_ATTR_RESOURCE_SIZE_GRAN, /* u64 */ DEVLINK_ATTR_RESOURCE_UNIT, /* u8 */ DEVLINK_ATTR_RESOURCE_OCC, /* u64 */ + DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID, /* u64 */ + DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,/* u64 */ /* add new attributes above here, update the policy in devlink.c */ -- cgit v1.2.3 From 7960d1daf278cbe23bb48974fe6ae6a1e44c3c15 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 17 Jan 2018 11:46:51 +0100 Subject: net: sched: use block index as a handle instead of qdisc when block is shared As the tcm_ifindex with value TCM_IFINDEX_MAGIC_BLOCK is invalid ifindex, use it to indicate that we work with block, instead of qdisc. So if tcm_ifindex is set to TCM_IFINDEX_MAGIC_BLOCK, tcm_parent is used to carry block_index. If the block is set to be shared between at least 2 qdiscs, it is forbidden to use the qdisc handle to add/delete filters. In that case, userspace has to pass block_index. Also, for dump of the filters, in case the block is shared in between at least 2 qdiscs, the each filter is dumped with tcm_ifindex value TCM_IFINDEX_MAGIC_BLOCK and tcm_parent set to block_index. That gives the user clear indication, that the filter belongs to a shared block and not only to one qdisc under which it is dumped. Suggested-by: David Ahern Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Acked-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 843e29aa3cac..da878f2e7c39 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -541,9 +541,19 @@ struct tcmsg { int tcm_ifindex; __u32 tcm_handle; __u32 tcm_parent; +/* tcm_block_index is used instead of tcm_parent + * in case tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK + */ +#define tcm_block_index tcm_parent __u32 tcm_info; }; +/* For manipulation of filters in shared block, tcm_ifindex is set to + * TCM_IFINDEX_MAGIC_BLOCK, and tcm_parent is aliased to tcm_block_index + * which is the block index. + */ +#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU) + enum { TCA_UNSPEC, TCA_KIND, -- cgit v1.2.3 From d47a6b0e7c492a4ba4524d557db388e34fd0a47a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 17 Jan 2018 11:46:52 +0100 Subject: net: sched: introduce ingress/egress block index attributes for qdisc Introduce two new attributes to be used for qdisc creation and dumping. One for ingress block, one for egress block. Introduce a set of ops that qdisc which supports block sharing would implement. Passing block indexes in qdisc change is not supported yet and it is checked and forbidded. In future, these attributes are to be reused for specifying block indexes for classes as well. As of this moment however, it is not supported so a check is in place to forbid it. Suggested-by: Roopa Prabhu Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Acked-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index da878f2e7c39..9b15005955fa 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -568,6 +568,8 @@ enum { TCA_DUMP_INVISIBLE, TCA_CHAIN, TCA_HW_OFFLOAD, + TCA_INGRESS_BLOCK, + TCA_EGRESS_BLOCK, __TCA_MAX }; -- cgit v1.2.3 From aff3d70a07fffc0abb53663e4a4acb059d2f36af Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 16 Jan 2018 16:31:02 +0800 Subject: tun: allow to attach ebpf socket filter This patch allows userspace to attach eBPF filter to tun. This will allow to implement VM dataplane filtering in a more efficient way compared to cBPF filter by allowing either qemu or libvirt to attach eBPF filter to tun. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/uapi/linux/if_tun.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index fb38c1797131..ee432cd3018c 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -58,6 +58,7 @@ #define TUNSETVNETBE _IOW('T', 222, int) #define TUNGETVNETBE _IOR('T', 223, int) #define TUNSETSTEERINGEBPF _IOR('T', 224, int) +#define TUNSETFILTEREBPF _IOR('T', 225, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 -- cgit v1.2.3 From cb5f7334d479414adc6afe60105283277e297489 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 17 Jan 2018 12:05:36 +0100 Subject: bpf: add comments to BPF ld/ldx sizes Doc BPF ld/ldx size defines as comments in code, as it makes in faster to lookup in a programming/review setting, than looking up the sizes in Documentation/networking/filter.txt. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 2 +- include/uapi/linux/bpf_common.h | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7c2259e8bc54..74dc4dc98681 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -17,7 +17,7 @@ #define BPF_ALU64 0x07 /* alu mode in double word width */ /* ld/ldx fields */ -#define BPF_DW 0x18 /* double word */ +#define BPF_DW 0x18 /* double word (64-bit) */ #define BPF_XADD 0xc0 /* exclusive add */ /* alu/jmp fields */ diff --git a/include/uapi/linux/bpf_common.h b/include/uapi/linux/bpf_common.h index 18be90725ab0..ee97668bdadb 100644 --- a/include/uapi/linux/bpf_common.h +++ b/include/uapi/linux/bpf_common.h @@ -15,9 +15,10 @@ /* ld/ldx fields */ #define BPF_SIZE(code) ((code) & 0x18) -#define BPF_W 0x00 -#define BPF_H 0x08 -#define BPF_B 0x10 +#define BPF_W 0x00 /* 32-bit */ +#define BPF_H 0x08 /* 16-bit */ +#define BPF_B 0x10 /* 8-bit */ +/* eBPF BPF_DW 0x18 64-bit */ #define BPF_MODE(code) ((code) & 0xe0) #define BPF_IMM 0x00 #define BPF_ABS 0x20 -- cgit v1.2.3 From 52775b33bb5072fbc07b02c0cf4fe8da1f7ee7cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 17 Jan 2018 19:13:28 -0800 Subject: bpf: offload: report device information about offloaded maps Tell user space about device on which the map was created. Unfortunate reality of user ABI makes sharing this code with program offload difficult but the information is the same. Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 74dc4dc98681..406c19d6016b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -938,6 +938,9 @@ struct bpf_map_info { __u32 max_entries; __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops -- cgit v1.2.3 From 3ecbfd65f50e5ff9c538c1bfa3356ef52cc66586 Mon Sep 17 00:00:00 2001 From: Harsha Sharma Date: Wed, 27 Dec 2017 00:59:00 +0530 Subject: netfilter: nf_tables: allocate handle and delete objects via handle This patch allows deletion of objects via unique handle which can be listed via '-a' option. Signed-off-by: Harsha Sharma Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 53e8dd2a3a03..66dceee0ae30 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -174,6 +174,8 @@ enum nft_table_attributes { NFTA_TABLE_NAME, NFTA_TABLE_FLAGS, NFTA_TABLE_USE, + NFTA_TABLE_HANDLE, + NFTA_TABLE_PAD, __NFTA_TABLE_MAX }; #define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) @@ -317,6 +319,7 @@ enum nft_set_desc_attributes { * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32) * @NFTA_SET_USERDATA: user data (NLA_BINARY) * @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*) + * @NFTA_SET_HANDLE: set handle (NLA_U64) */ enum nft_set_attributes { NFTA_SET_UNSPEC, @@ -335,6 +338,7 @@ enum nft_set_attributes { NFTA_SET_USERDATA, NFTA_SET_PAD, NFTA_SET_OBJ_TYPE, + NFTA_SET_HANDLE, __NFTA_SET_MAX }; #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) @@ -1314,6 +1318,7 @@ enum nft_ct_helper_attributes { * @NFTA_OBJ_TYPE: stateful object type (NLA_U32) * @NFTA_OBJ_DATA: stateful object data (NLA_NESTED) * @NFTA_OBJ_USE: number of references to this expression (NLA_U32) + * @NFTA_OBJ_HANDLE: object handle (NLA_U64) */ enum nft_object_attributes { NFTA_OBJ_UNSPEC, @@ -1322,6 +1327,8 @@ enum nft_object_attributes { NFTA_OBJ_TYPE, NFTA_OBJ_DATA, NFTA_OBJ_USE, + NFTA_OBJ_HANDLE, + NFTA_OBJ_PAD, __NFTA_OBJ_MAX }; #define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1) @@ -1333,6 +1340,7 @@ enum nft_object_attributes { * @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING) * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32) * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32) + * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64) */ enum nft_flowtable_attributes { NFTA_FLOWTABLE_UNSPEC, @@ -1340,6 +1348,8 @@ enum nft_flowtable_attributes { NFTA_FLOWTABLE_NAME, NFTA_FLOWTABLE_HOOK, NFTA_FLOWTABLE_USE, + NFTA_FLOWTABLE_HANDLE, + NFTA_FLOWTABLE_PAD, __NFTA_FLOWTABLE_MAX }; #define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1) -- cgit v1.2.3 From 4db5a802e565f0a60e08bd39a055f0095689802b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 16 Jan 2018 23:01:57 +0100 Subject: l2tp: mark L2TP_ATTR_L2SPEC_LEN as not used Reviewed-by: Guillaume Nault Tested-by: Guillaume Nault Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- include/uapi/linux/l2tp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 71e62795104d..7d570c7bd117 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -97,7 +97,7 @@ enum { L2TP_ATTR_OFFSET, /* u16 (not used) */ L2TP_ATTR_DATA_SEQ, /* u16 */ L2TP_ATTR_L2SPEC_TYPE, /* u8, enum l2tp_l2spec_type */ - L2TP_ATTR_L2SPEC_LEN, /* u8, enum l2tp_l2spec_type */ + L2TP_ATTR_L2SPEC_LEN, /* u8 (not used) */ L2TP_ATTR_PROTO_VERSION, /* u8 */ L2TP_ATTR_IFNAME, /* string */ L2TP_ATTR_CONN_ID, /* u32 */ -- cgit v1.2.3 From e8660ded7f5a9889395d33ce3d5e8c729a462bf5 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 18 Jan 2018 17:48:18 +0100 Subject: macsec: restore uAPI after addition of GCM-AES-256 Commit ccfdec908922 ("macsec: Add support for GCM-AES-256 cipher suite") changed a few values in the uapi headers for MACsec. Because of existing userspace implementations, we need to preserve the value of MACSEC_DEFAULT_CIPHER_ID. Not doing that resulted in wpa_supplicant segfaults when a secure channel was created using the default cipher. Thus, swap MACSEC_DEFAULT_CIPHER_{ID,ALT} back to their original values. Changing the maximum length of the MACSEC_SA_ATTR_KEY attribute is unnecessary, as the previous value (MACSEC_MAX_KEY_LEN, which was 128B) is large enough to carry 32-bytes keys. This patch reverts MACSEC_MAX_KEY_LEN to 128B and restores the old length check on MACSEC_SA_ATTR_KEY. Fixes: ccfdec908922 ("macsec: Add support for GCM-AES-256 cipher suite") Signed-off-by: Davide Caratti Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/uapi/linux/if_macsec.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h index 2e522835a4af..98e4d5d7c45c 100644 --- a/include/uapi/linux/if_macsec.h +++ b/include/uapi/linux/if_macsec.h @@ -18,7 +18,7 @@ #define MACSEC_GENL_NAME "macsec" #define MACSEC_GENL_VERSION 1 -#define MACSEC_MAX_KEY_LEN 256 +#define MACSEC_MAX_KEY_LEN 128 #define MACSEC_KEYID_LEN 16 @@ -26,9 +26,9 @@ #define MACSEC_CIPHER_ID_GCM_AES_128 0x0080C20001000001ULL #define MACSEC_CIPHER_ID_GCM_AES_256 0x0080C20001000002ULL -#define MACSEC_DEFAULT_CIPHER_ID MACSEC_CIPHER_ID_GCM_AES_128 /* deprecated cipher ID for GCM-AES-128 */ -#define MACSEC_DEFAULT_CIPHER_ALT 0x0080020001000001ULL +#define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL +#define MACSEC_DEFAULT_CIPHER_ALT MACSEC_CIPHER_ID_GCM_AES_128 #define MACSEC_MIN_ICV_LEN 8 #define MACSEC_MAX_ICV_LEN 32 -- cgit v1.2.3 From b2d3bcfa26a7a8de41f358a6cae8b848673b3c6e Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Thu, 18 Jan 2018 09:59:13 -0800 Subject: net: core: Expose number of link up/down transitions Expose the number of times the link has been going UP or DOWN, and update the "carrier_changes" counter to be the sum of these two events. While at it, also update the sysfs-class-net documentation to cover: carrier_changes (3.15), carrier_up_count (4.16) and carrier_down_count (4.16) Signed-off-by: David Decotigny [Florian: * rebase * add documentation * merge carrier_changes with up/down counters] Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index f8f04fed6186..8616131e2c61 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -161,6 +161,8 @@ enum { IFLA_EVENT, IFLA_NEW_NETNSID, IFLA_IF_NETNSID, + IFLA_CARRIER_UP_COUNT, + IFLA_CARRIER_DOWN_COUNT, __IFLA_MAX }; -- cgit v1.2.3 From de525be2ca2734865d29c4b67ddd29913b214906 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Thu, 25 Jan 2018 16:14:09 -0800 Subject: bpf: Support passing args to sock_ops bpf function Adds support for passing up to 4 arguments to sock_ops bpf functions. It reusues the reply union, so the bpf_sock_ops structures are not increased in size. Signed-off-by: Lawrence Brakmo Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 406c19d6016b..8d5874c2c4ff 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -952,8 +952,9 @@ struct bpf_map_info { struct bpf_sock_ops { __u32 op; union { - __u32 reply; - __u32 replylong[4]; + __u32 args[4]; /* Optionally passed to bpf program */ + __u32 reply; /* Returned by bpf program */ + __u32 replylong[4]; /* Optionally returned by bpf prog */ }; __u32 family; __u32 remote_ip4; /* Stored in network byte order */ -- cgit v1.2.3 From b13d880721729384757f235166068c315326f4a1 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Thu, 25 Jan 2018 16:14:10 -0800 Subject: bpf: Adds field bpf_sock_ops_cb_flags to tcp_sock Adds field bpf_sock_ops_cb_flags to tcp_sock and bpf_sock_ops. Its primary use is to determine if there should be calls to sock_ops bpf program at various points in the TCP code. The field is initialized to zero, disabling the calls. A sock_ops BPF program can set it, per connection and as necessary, when the connection is established. It also adds support for reading and writting the field within a sock_ops BPF program. Reading is done by accessing the field directly. However, writing is done through the helper function bpf_sock_ops_cb_flags_set, in order to return an error if a BPF program is trying to set a callback that is not supported in the current kernel (i.e. running an older kernel). The helper function returns 0 if it was able to set all of the bits set in the argument, a positive number containing the bits that could not be set, or -EINVAL if the socket is not a full TCP socket. Examples of where one could call the bpf program: 1) When RTO fires 2) When a packet is retransmitted 3) When the connection terminates 4) When a packet is sent 5) When a packet is received Signed-off-by: Lawrence Brakmo Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8d5874c2c4ff..aa128407c44d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -642,6 +642,14 @@ union bpf_attr { * @optlen: length of optval in bytes * Return: 0 or negative error * + * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags) + * Set callback flags for sock_ops + * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct + * @flags: flags value + * Return: 0 for no error + * -EINVAL if there is no full tcp socket + * bits in flags that are not supported by current kernel + * * int bpf_skb_adjust_room(skb, len_diff, mode, flags) * Grow or shrink room in sk_buff. * @skb: pointer to skb @@ -748,7 +756,8 @@ union bpf_attr { FN(perf_event_read_value), \ FN(perf_prog_read_value), \ FN(getsockopt), \ - FN(override_return), + FN(override_return), \ + FN(sock_ops_cb_flags_set), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -969,8 +978,14 @@ struct bpf_sock_ops { */ __u32 snd_cwnd; __u32 srtt_us; /* Averaged RTT << 3 in usecs */ + __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ }; +/* Definitions for bpf_sock_ops_cb_flags */ +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0 /* Mask of all currently + * supported cb flags + */ + /* List of known BPF sock_ops operators. * New entries can only be added at the end */ -- cgit v1.2.3 From f89013f66d0f1a0dad44c513318efb706399a36b Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Thu, 25 Jan 2018 16:14:11 -0800 Subject: bpf: Add sock_ops RTO callback Adds an optional call to sock_ops BPF program based on whether the BPF_SOCK_OPS_RTO_CB_FLAG is set in bpf_sock_ops_flags. The BPF program is passed 2 arguments: icsk_retransmits and whether the RTO has expired. Signed-off-by: Lawrence Brakmo Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index aa128407c44d..c8cecf9cf5bd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -982,7 +982,8 @@ struct bpf_sock_ops { }; /* Definitions for bpf_sock_ops_cb_flags */ -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0 /* Mask of all currently +#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x1 /* Mask of all currently * supported cb flags */ @@ -1019,6 +1020,11 @@ enum { * a congestion threshold. RTTs above * this indicate congestion */ + BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered. + * Arg1: value of icsk_retransmits + * Arg2: value of icsk_rto + * Arg3: whether RTO has expired + */ }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -- cgit v1.2.3 From 44f0e43037d3a17b043843ba67610ac7c7e37db6 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Thu, 25 Jan 2018 16:14:12 -0800 Subject: bpf: Add support for reading sk_state and more Add support for reading many more tcp_sock fields state, same as sk->sk_state rtt_min same as sk->rtt_min.s[0].v (current rtt_min) snd_ssthresh rcv_nxt snd_nxt snd_una mss_cache ecn_flags rate_delivered rate_interval_us packets_out retrans_out total_retrans segs_in data_segs_in segs_out data_segs_out lost_out sacked_out sk_txhash bytes_received (__u64) bytes_acked (__u64) Signed-off-by: Lawrence Brakmo Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c8cecf9cf5bd..46520eae37fa 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -979,6 +979,28 @@ struct bpf_sock_ops { __u32 snd_cwnd; __u32 srtt_us; /* Averaged RTT << 3 in usecs */ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ + __u32 state; + __u32 rtt_min; + __u32 snd_ssthresh; + __u32 rcv_nxt; + __u32 snd_nxt; + __u32 snd_una; + __u32 mss_cache; + __u32 ecn_flags; + __u32 rate_delivered; + __u32 rate_interval_us; + __u32 packets_out; + __u32 retrans_out; + __u32 total_retrans; + __u32 segs_in; + __u32 data_segs_in; + __u32 segs_out; + __u32 data_segs_out; + __u32 lost_out; + __u32 sacked_out; + __u32 sk_txhash; + __u64 bytes_received; + __u64 bytes_acked; }; /* Definitions for bpf_sock_ops_cb_flags */ -- cgit v1.2.3 From a31ad29e6a30cb0b9084a9425b819cdcd97273ce Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Thu, 25 Jan 2018 16:14:14 -0800 Subject: bpf: Add BPF_SOCK_OPS_RETRANS_CB Adds support for calling sock_ops BPF program when there is a retransmission. Three arguments are used; one for the sequence number, another for the number of segments retransmitted, and the last one for the return value of tcp_transmit_skb (0 => success). Does not include syn-ack retransmissions. New op: BPF_SOCK_OPS_RETRANS_CB. Signed-off-by: Lawrence Brakmo Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 46520eae37fa..31c93a0bdbc2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1005,7 +1005,8 @@ struct bpf_sock_ops { /* Definitions for bpf_sock_ops_cb_flags */ #define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x1 /* Mask of all currently +#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x3 /* Mask of all currently * supported cb flags */ @@ -1047,6 +1048,12 @@ enum { * Arg2: value of icsk_rto * Arg3: whether RTO has expired */ + BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted. + * Arg1: sequence number of 1st byte + * Arg2: # segments + * Arg3: return value of + * tcp_transmit_skb (0 => success) + */ }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -- cgit v1.2.3 From d44874910a26f3a8f81edf873a2473363f07f660 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Thu, 25 Jan 2018 16:14:15 -0800 Subject: bpf: Add BPF_SOCK_OPS_STATE_CB Adds support for calling sock_ops BPF program when there is a TCP state change. Two arguments are used; one for the old state and another for the new state. There is a new enum in include/uapi/linux/bpf.h that exports the TCP states that prepends BPF_ to the current TCP state names. If it is ever necessary to change the internal TCP state values (other than adding more to the end), then it will become necessary to convert from the internal TCP state value to the BPF value before calling the BPF sock_ops function. There are a set of compile checks added in tcp.c to detect if the internal and BPF values differ so we can make the necessary fixes. New op: BPF_SOCK_OPS_STATE_CB. Signed-off-by: Lawrence Brakmo Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 31c93a0bdbc2..db6bdc375126 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1006,7 +1006,8 @@ struct bpf_sock_ops { /* Definitions for bpf_sock_ops_cb_flags */ #define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) #define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x3 /* Mask of all currently +#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently * supported cb flags */ @@ -1054,6 +1055,32 @@ enum { * Arg3: return value of * tcp_transmit_skb (0 => success) */ + BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state. + * Arg1: old_state + * Arg2: new_state + */ +}; + +/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect + * changes between the TCP and BPF versions. Ideally this should never happen. + * If it does, we need to add code to convert them before calling + * the BPF sock_ops function. + */ +enum { + BPF_TCP_ESTABLISHED = 1, + BPF_TCP_SYN_SENT, + BPF_TCP_SYN_RECV, + BPF_TCP_FIN_WAIT1, + BPF_TCP_FIN_WAIT2, + BPF_TCP_TIME_WAIT, + BPF_TCP_CLOSE, + BPF_TCP_CLOSE_WAIT, + BPF_TCP_LAST_ACK, + BPF_TCP_LISTEN, + BPF_TCP_CLOSING, /* Now a valid state */ + BPF_TCP_NEW_SYN_RECV, + + BPF_TCP_MAX_STATES /* Leave at the end! */ }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -- cgit v1.2.3 From d350a823020e71e20a10d1dfa44f1d1d653b0334 Mon Sep 17 00:00:00 2001 From: William Tu Date: Thu, 25 Jan 2018 13:20:10 -0800 Subject: net: erspan: create erspan metadata uapi header The patch adds a new uapi header file, erspan.h, and moves the 'struct erspan_metadata' from internal erspan.h to it. Signed-off-by: William Tu Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/erspan.h | 52 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 include/uapi/linux/erspan.h (limited to 'include/uapi') diff --git a/include/uapi/linux/erspan.h b/include/uapi/linux/erspan.h new file mode 100644 index 000000000000..841573019ae1 --- /dev/null +++ b/include/uapi/linux/erspan.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * ERSPAN Tunnel Metadata + * + * Copyright (c) 2018 VMware + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * Userspace API for metadata mode ERSPAN tunnel + */ +#ifndef _UAPI_ERSPAN_H +#define _UAPI_ERSPAN_H + +#include /* For __beXX in userspace */ +#include + +/* ERSPAN version 2 metadata header */ +struct erspan_md2 { + __be32 timestamp; + __be16 sgt; /* security group tag */ +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 hwid_upper:2, + ft:5, + p:1; + __u8 o:1, + gra:2, + dir:1, + hwid:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 p:1, + ft:5, + hwid_upper:2; + __u8 hwid:4, + dir:1, + gra:2, + o:1; +#else +#error "Please fix " +#endif +}; + +struct erspan_metadata { + int version; + union { + __be32 index; /* Version 1 (type II)*/ + struct erspan_md2 md2; /* Version 2 (type III) */ + } u; +}; + +#endif /* _UAPI_ERSPAN_H */ -- cgit v1.2.3 From fc1372f89ffe1f58b589643b75f679e452350703 Mon Sep 17 00:00:00 2001 From: William Tu Date: Thu, 25 Jan 2018 13:20:11 -0800 Subject: openvswitch: add erspan version I and II support The patch adds support for openvswitch to configure erspan v1 and v2. The OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS attr is added to uapi as a binary blob to support all ERSPAN v1 and v2's fields. Note that Previous commit "openvswitch: Add erspan tunnel support." was reverted since it does not design properly. Signed-off-by: William Tu Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index dcfab5e3b55c..713e56ce681f 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -363,6 +363,7 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_IPV6_SRC, /* struct in6_addr src IPv6 address. */ OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */ OVS_TUNNEL_KEY_ATTR_PAD, + OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* struct erspan_metadata */ __OVS_TUNNEL_KEY_ATTR_MAX }; -- cgit v1.2.3 From 38e01b30563a5b5ade7b54e5d739d16a2b02fe82 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Jan 2018 15:01:39 +0100 Subject: dev: advertise the new ifindex when the netns iface changes The goal is to let the user follow an interface that moves to another netns. CC: Jiri Benc CC: Christian Brauner Signed-off-by: Nicolas Dichtel Reviewed-by: Jiri Benc Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8616131e2c61..6d9447700e18 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -163,6 +163,7 @@ enum { IFLA_IF_NETNSID, IFLA_CARRIER_UP_COUNT, IFLA_CARRIER_DOWN_COUNT, + IFLA_NEW_IFINDEX, __IFLA_MAX }; -- cgit v1.2.3