From 51f52547df700819db0d0e2b17b677cb209212b4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 17 Jun 2021 14:42:58 -0500 Subject: dma-buf: Document DMA_BUF_IOCTL_SYNC (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a new "DMA Buffer ioctls" section to the dma-buf docs and adds documentation for DMA_BUF_IOCTL_SYNC. v2 (Daniel Vetter): - Fix a couple typos - Add commentary about synchronization with other devices - Use item list format for describing flags v3 (Pekka Paalanen): - Clarify stalling requirements. - Be more clear that that DMA_BUF_IOCTL_SYNC with SINC_END has to be called before more GPU work happens. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Acked-by: Christian König Acked-by: Pekka Paalanen Cc: Sumit Semwal Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210617194258.579011-1-jason@jlekstrand.net --- include/uapi/linux/dma-buf.h | 50 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index 7f30393b92c3..8e4a2ca0bcbf 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -22,8 +22,56 @@ #include -/* begin/end dma-buf functions used for userspace mmap. */ +/** + * struct dma_buf_sync - Synchronize with CPU access. + * + * When a DMA buffer is accessed from the CPU via mmap, it is not always + * possible to guarantee coherency between the CPU-visible map and underlying + * memory. To manage coherency, DMA_BUF_IOCTL_SYNC must be used to bracket + * any CPU access to give the kernel the chance to shuffle memory around if + * needed. + * + * Prior to accessing the map, the client must call DMA_BUF_IOCTL_SYNC + * with DMA_BUF_SYNC_START and the appropriate read/write flags. Once the + * access is complete, the client should call DMA_BUF_IOCTL_SYNC with + * DMA_BUF_SYNC_END and the same read/write flags. + * + * The synchronization provided via DMA_BUF_IOCTL_SYNC only provides cache + * coherency. It does not prevent other processes or devices from + * accessing the memory at the same time. If synchronization with a GPU or + * other device driver is required, it is the client's responsibility to + * wait for buffer to be ready for reading or writing before calling this + * ioctl with DMA_BUF_SYNC_START. Likewise, the client must ensure that + * follow-up work is not submitted to GPU or other device driver until + * after this ioctl has been called with DMA_BUF_SYNC_END? + * + * If the driver or API with which the client is interacting uses implicit + * synchronization, waiting for prior work to complete can be done via + * poll() on the DMA buffer file descriptor. If the driver or API requires + * explicit synchronization, the client may have to wait on a sync_file or + * other synchronization primitive outside the scope of the DMA buffer API. + */ struct dma_buf_sync { + /** + * @flags: Set of access flags + * + * DMA_BUF_SYNC_START: + * Indicates the start of a map access session. + * + * DMA_BUF_SYNC_END: + * Indicates the end of a map access session. + * + * DMA_BUF_SYNC_READ: + * Indicates that the mapped DMA buffer will be read by the + * client via the CPU map. + * + * DMA_BUF_SYNC_WRITE: + * Indicates that the mapped DMA buffer will be written by the + * client via the CPU map. + * + * DMA_BUF_SYNC_RW: + * An alias for DMA_BUF_SYNC_READ | DMA_BUF_SYNC_WRITE. + */ __u64 flags; }; -- cgit v1.2.3 From c156174a67070042d51d2c866146d3c934d5468c Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:56 +0800 Subject: ethtool: add a new command for getting PHC virtual clocks Add an interface for getting PHC (PTP Hardware Clock) virtual clocks, which are based on PHC physical clock providing hardware timestamp to network packets. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index c7135c9c37a5..b3b93710eff7 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -46,6 +46,7 @@ enum { ETHTOOL_MSG_FEC_SET, ETHTOOL_MSG_MODULE_EEPROM_GET, ETHTOOL_MSG_STATS_GET, + ETHTOOL_MSG_PHC_VCLOCKS_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -88,6 +89,7 @@ enum { ETHTOOL_MSG_FEC_NTF, ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY, ETHTOOL_MSG_STATS_GET_REPLY, + ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -440,6 +442,19 @@ enum { ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) }; +/* PHC VCLOCKS */ + +enum { + ETHTOOL_A_PHC_VCLOCKS_UNSPEC, + ETHTOOL_A_PHC_VCLOCKS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PHC_VCLOCKS_NUM, /* u32 */ + ETHTOOL_A_PHC_VCLOCKS_INDEX, /* array, s32 */ + + /* add new constants above here */ + __ETHTOOL_A_PHC_VCLOCKS_CNT, + ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1) +}; + /* CABLE TEST */ enum { -- cgit v1.2.3 From d463126e23f112629edb01594141ca437a92a108 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:59 +0800 Subject: net: sock: extend SO_TIMESTAMPING for PHC binding Since PTP virtual clock support is added, there can be several PTP virtual clocks based on one PTP physical clock for timestamping. This patch is to extend SO_TIMESTAMPING API to support PHC (PTP Hardware Clock) binding by adding a new flag SOF_TIMESTAMPING_BIND_PHC. When PTP virtual clocks are in use, user space can configure to bind one for timestamping, but PTP physical clock is not supported and not needed to bind. This patch is preparation for timestamp conversion from raw timestamp to a specific PTP virtual clock time in core net. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- include/uapi/linux/net_tstamp.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 7ed0b3d1c00a..fcc61c73a666 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -13,7 +13,7 @@ #include #include /* for SO_TIMESTAMPING */ -/* SO_TIMESTAMPING gets an integer bit field comprised of these values */ +/* SO_TIMESTAMPING flags */ enum { SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), SOF_TIMESTAMPING_TX_SOFTWARE = (1<<1), @@ -30,8 +30,9 @@ enum { SOF_TIMESTAMPING_OPT_STATS = (1<<12), SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13), SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), + SOF_TIMESTAMPING_BIND_PHC = (1 << 15), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TX_SWHW, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_BIND_PHC, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; @@ -46,6 +47,18 @@ enum { SOF_TIMESTAMPING_TX_SCHED | \ SOF_TIMESTAMPING_TX_ACK) +/** + * struct so_timestamping - SO_TIMESTAMPING parameter + * + * @flags: SO_TIMESTAMPING flags + * @bind_phc: Index of PTP virtual clock bound to sock. This is available + * if flag SOF_TIMESTAMPING_BIND_PHC is set. + */ +struct so_timestamping { + int flags; + int bind_phc; +}; + /** * struct hwtstamp_config - %SIOCGHWTSTAMP and %SIOCSHWTSTAMP parameter * -- cgit v1.2.3 From d322957ebfb9c21c2c72b66680f7c3ccd724e081 Mon Sep 17 00:00:00 2001 From: Duncan Roe Date: Wed, 7 Jul 2021 10:57:51 +1000 Subject: netfilter: uapi: refer to nfnetlink_conntrack.h, not nf_conntrack_netlink.h nf_conntrack_netlink.h does not exist, refer to nfnetlink_conntrack.h instead. Signed-off-by: Duncan Roe Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_log.h | 2 +- include/uapi/linux/netfilter/nfnetlink_queue.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_log.h b/include/uapi/linux/netfilter/nfnetlink_log.h index 45c8d3b027e0..0af9c113d665 100644 --- a/include/uapi/linux/netfilter/nfnetlink_log.h +++ b/include/uapi/linux/netfilter/nfnetlink_log.h @@ -61,7 +61,7 @@ enum nfulnl_attr_type { NFULA_HWTYPE, /* hardware type */ NFULA_HWHEADER, /* hardware header */ NFULA_HWLEN, /* hardware header length */ - NFULA_CT, /* nf_conntrack_netlink.h */ + NFULA_CT, /* nfnetlink_conntrack.h */ NFULA_CT_INFO, /* enum ip_conntrack_info */ NFULA_VLAN, /* nested attribute: packet vlan info */ NFULA_L2HDR, /* full L2 header */ diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h index bcb2cb5d40b9..aed90c4df0c8 100644 --- a/include/uapi/linux/netfilter/nfnetlink_queue.h +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h @@ -51,11 +51,11 @@ enum nfqnl_attr_type { NFQA_IFINDEX_PHYSOUTDEV, /* __u32 ifindex */ NFQA_HWADDR, /* nfqnl_msg_packet_hw */ NFQA_PAYLOAD, /* opaque data payload */ - NFQA_CT, /* nf_conntrack_netlink.h */ + NFQA_CT, /* nfnetlink_conntrack.h */ NFQA_CT_INFO, /* enum ip_conntrack_info */ NFQA_CAP_LEN, /* __u32 length of captured packet */ NFQA_SKB_INFO, /* __u32 skb meta information */ - NFQA_EXP, /* nf_conntrack_netlink.h */ + NFQA_EXP, /* nfnetlink_conntrack.h */ NFQA_UID, /* __u32 sk uid */ NFQA_GID, /* __u32 sk gid */ NFQA_SECCTX, /* security context string */ -- cgit v1.2.3 From 47316f4a305367794fc04f23e5c778678d8f1d8e Mon Sep 17 00:00:00 2001 From: Zvi Effron Date: Wed, 7 Jul 2021 22:16:55 +0000 Subject: bpf: Support input xdp_md context in BPF_PROG_TEST_RUN Support passing a xdp_md via ctx_in/ctx_out in bpf_attr for BPF_PROG_TEST_RUN. The intended use case is to pass some XDP meta data to the test runs of XDP programs that are used as tail calls. For programs that use bpf_prog_test_run_xdp, support xdp_md input and output. Unlike with an actual xdp_md during a non-test run, data_meta must be 0 because it must point to the start of the provided user data. From the initial xdp_md, use data and data_end to adjust the pointers in the generated xdp_buff. All other non-zero fields are prohibited (with EINVAL). If the user has set ctx_out/ctx_size_out, copy the (potentially different) xdp_md back to the userspace. We require all fields of input xdp_md except the ones we explicitly support to be set to zero. The expectation is that in the future we might add support for more fields and we want to fail explicitly if the user runs the program on the kernel where we don't yet support them. Co-developed-by: Cody Haas Co-developed-by: Lisa Watanabe Signed-off-by: Cody Haas Signed-off-by: Lisa Watanabe Signed-off-by: Zvi Effron Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210707221657.3985075-3-zeffron@riotgames.com --- include/uapi/linux/bpf.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bf9252c7381e..b46a383e8db7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -324,9 +324,6 @@ union bpf_iter_link_info { * **BPF_PROG_TYPE_SK_LOOKUP** * *data_in* and *data_out* must be NULL. * - * **BPF_PROG_TYPE_XDP** - * *ctx_in* and *ctx_out* must be NULL. - * * **BPF_PROG_TYPE_RAW_TRACEPOINT**, * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE** * -- cgit v1.2.3 From caa7302b3a346d9a9ec85568cff52d2cee5f32c4 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 18 Jun 2021 16:21:36 +0200 Subject: media: include/uapi/linux/cec.h: typo: SATERDAY -> SATURDAY Fix typo in a define: CEC_OP_REC_SEQ_SATERDAY -> CEC_OP_REC_SEQ_SATURDAY This isn't used yet in actual applications to the best of my knowledge, and it certainly doesn't break the ABI since the value doesn't change. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/cec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h index dc8879d179fd..de936f5e446d 100644 --- a/include/uapi/linux/cec.h +++ b/include/uapi/linux/cec.h @@ -642,7 +642,7 @@ struct cec_event { #define CEC_OP_REC_SEQ_WEDNESDAY 0x08 #define CEC_OP_REC_SEQ_THURSDAY 0x10 #define CEC_OP_REC_SEQ_FRIDAY 0x20 -#define CEC_OP_REC_SEQ_SATERDAY 0x40 +#define CEC_OP_REC_SEQ_SATURDAY 0x40 #define CEC_OP_REC_SEQ_ONCE_ONLY 0x00 #define CEC_MSG_CLEAR_DIGITAL_TIMER 0x99 -- cgit v1.2.3 From b48c7236b13cb5ef1b5fdf744aa8841df0f7b43a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 29 Jun 2021 15:11:44 -0500 Subject: exit/bdflush: Remove the deprecated bdflush system call The bdflush system call has been deprecated for a very long time. Recently Michael Schmitz tested[1] and found that the last known caller of of the bdflush system call is unaffected by it's removal. Since the code is not needed delete it. [1] https://lkml.kernel.org/r/36123b5d-daa0-6c2b-f2d4-a942f069fd54@gmail.com Link: https://lkml.kernel.org/r/87sg10quue.fsf_-_@disp2133 Tested-by: Michael Schmitz Acked-by: Geert Uytterhoeven Reviewed-by: Arnd Bergmann Acked-by: Cyril Hrubis Signed-off-by: "Eric W. Biederman" --- include/uapi/linux/capability.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 2ddb4226cd23..463d1ba2232a 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -243,7 +243,6 @@ struct vfs_ns_cap_data { /* Allow examination and configuration of disk quotas */ /* Allow setting the domainname */ /* Allow setting the hostname */ -/* Allow calling bdflush() */ /* Allow mount() and umount(), setting up new smb connection */ /* Allow some autofs root ioctls */ /* Allow nfsservctl */ -- cgit v1.2.3 From f170acda7ffaf0473d06e1e17c12cd9fd63904f5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 14 Jul 2021 21:43:17 +0900 Subject: bpf: Fix a typo of reuseport map in bpf.h. Fix s/BPF_MAP_TYPE_REUSEPORT_ARRAY/BPF_MAP_TYPE_REUSEPORT_SOCKARRAY/ typo in bpf.h. Fixes: 2dbb9b9e6df6 ("bpf: Introduce BPF_PROG_TYPE_SK_REUSEPORT") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210714124317.67526-1-kuniyu@amazon.co.jp --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b46a383e8db7..bafb6282032b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3246,7 +3246,7 @@ union bpf_attr { * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a - * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. * It checks the selected socket is matching the incoming * request in the socket buffer. * Return -- cgit v1.2.3 From b00628b1c7d595ae5b544e059c27b1f5828314b4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 14 Jul 2021 17:54:09 -0700 Subject: bpf: Introduce bpf timers. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce 'struct bpf_timer { __u64 :64; __u64 :64; };' that can be embedded in hash/array/lru maps as a regular field and helpers to operate on it: // Initialize the timer. // First 4 bits of 'flags' specify clockid. // Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, int flags); // Configure the timer to call 'callback_fn' static function. long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn); // Arm the timer to expire 'nsec' nanoseconds from the current time. long bpf_timer_start(struct bpf_timer *timer, u64 nsec, u64 flags); // Cancel the timer and wait for callback_fn to finish if it was running. long bpf_timer_cancel(struct bpf_timer *timer); Here is how BPF program might look like: struct map_elem { int counter; struct bpf_timer timer; }; struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 1000); __type(key, int); __type(value, struct map_elem); } hmap SEC(".maps"); static int timer_cb(void *map, int *key, struct map_elem *val); /* val points to particular map element that contains bpf_timer. */ SEC("fentry/bpf_fentry_test1") int BPF_PROG(test1, int a) { struct map_elem *val; int key = 0; val = bpf_map_lookup_elem(&hmap, &key); if (val) { bpf_timer_init(&val->timer, &hmap, CLOCK_REALTIME); bpf_timer_set_callback(&val->timer, timer_cb); bpf_timer_start(&val->timer, 1000 /* call timer_cb2 in 1 usec */, 0); } } This patch adds helper implementations that rely on hrtimers to call bpf functions as timers expire. The following patches add necessary safety checks. Only programs with CAP_BPF are allowed to use bpf_timer. The amount of timers used by the program is constrained by the memcg recorded at map creation time. The bpf_timer_init() helper needs explicit 'map' argument because inner maps are dynamic and not known at load time. While the bpf_timer_set_callback() is receiving hidden 'aux->prog' argument supplied by the verifier. The prog pointer is needed to do refcnting of bpf program to make sure that program doesn't get freed while the timer is armed. This approach relies on "user refcnt" scheme used in prog_array that stores bpf programs for bpf_tail_call. The bpf_timer_set_callback() will increment the prog refcnt which is paired with bpf_timer_cancel() that will drop the prog refcnt. The ops->map_release_uref is responsible for cancelling the timers and dropping prog refcnt when user space reference to a map reaches zero. This uref approach is done to make sure that Ctrl-C of user space process will not leave timers running forever unless the user space explicitly pinned a map that contained timers in bpffs. bpf_timer_init() and bpf_timer_set_callback() will return -EPERM if map doesn't have user references (is not held by open file descriptor from user space and not pinned in bpffs). The bpf_map_delete_elem() and bpf_map_update_elem() operations cancel and free the timer if given map element had it allocated. "bpftool map update" command can be used to cancel timers. The 'struct bpf_timer' is explicitly __attribute__((aligned(8))) because '__u64 :64' has 1 byte alignment of 8 byte padding. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Andrii Nakryiko Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20210715005417.78572-4-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bafb6282032b..3544ec5234f0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4777,6 +4777,70 @@ union bpf_attr { * Execute close syscall for given FD. * Return * A syscall result. + * + * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags) + * Description + * Initialize the timer. + * First 4 bits of *flags* specify clockid. + * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. + * All other bits of *flags* are reserved. + * The verifier will reject the program if *timer* is not from + * the same *map*. + * Return + * 0 on success. + * **-EBUSY** if *timer* is already initialized. + * **-EINVAL** if invalid *flags* are passed. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn) + * Description + * Configure the timer to call *callback_fn* static function. + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags) + * Description + * Set timer expiration N nanoseconds from the current time. The + * configured callback will be invoked in soft irq context on some cpu + * and will not repeat unless another bpf_timer_start() is made. + * In such case the next invocation can migrate to a different cpu. + * Since struct bpf_timer is a field inside map element the map + * owns the timer. The bpf_timer_set_callback() will increment refcnt + * of BPF program to make sure that callback_fn code stays valid. + * When user space reference to a map reaches zero all timers + * in a map are cancelled and corresponding program's refcnts are + * decremented. This is done to make sure that Ctrl-C of a user + * process doesn't leave any timers running. If map is pinned in + * bpffs the callback_fn can re-arm itself indefinitely. + * bpf_map_update/delete_elem() helpers and user space sys_bpf commands + * cancel and free the timer in the given map element. + * The map can contain timers that invoke callback_fn-s from different + * programs. The same callback_fn can serve different timers from + * different maps if key/value layout matches across maps. + * Every bpf_timer_set_callback() can have different callback_fn. + * + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier + * or invalid *flags* are passed. + * + * long bpf_timer_cancel(struct bpf_timer *timer) + * Description + * Cancel the timer and wait for callback_fn to finish if it was running. + * Return + * 0 if the timer was not active. + * 1 if the timer was active. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its + * own timer which would have led to a deadlock otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4948,6 +5012,10 @@ union bpf_attr { FN(sys_bpf), \ FN(btf_find_by_name_kind), \ FN(sys_close), \ + FN(timer_init), \ + FN(timer_set_callback), \ + FN(timer_start), \ + FN(timer_cancel), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -6074,6 +6142,11 @@ struct bpf_spin_lock { __u32 val; }; +struct bpf_timer { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. -- cgit v1.2.3 From 9b99edcae5c80c8fb9f8e7149bae528c9e610a72 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 14 Jul 2021 11:43:55 +0200 Subject: bpf: Add bpf_get_func_ip helper for tracing programs Adding bpf_get_func_ip helper for BPF_PROG_TYPE_TRACING programs, specifically for all trampoline attach types. The trampoline's caller IP address is stored in (ctx - 8) address. so there's no reason to actually call the helper, but rather fixup the call instruction and return [ctx - 8] value directly. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210714094400.396467-4-jolsa@kernel.org --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3544ec5234f0..89688f16ad60 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4841,6 +4841,12 @@ union bpf_attr { * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its * own timer which would have led to a deadlock otherwise. + * + * u64 bpf_get_func_ip(void *ctx) + * Description + * Get address of the traced function (for tracing programs). + * Return + * Address of the traced function. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5016,6 +5022,7 @@ union bpf_attr { FN(timer_set_callback), \ FN(timer_start), \ FN(timer_cancel), \ + FN(get_func_ip), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 9ffd9f3ff7193933dae171740ab70a103d460065 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 14 Jul 2021 11:43:56 +0200 Subject: bpf: Add bpf_get_func_ip helper for kprobe programs Adding bpf_get_func_ip helper for BPF_PROG_TYPE_KPROBE programs, so it's now possible to call bpf_get_func_ip from both kprobe and kretprobe programs. Taking the caller's address from 'struct kprobe::addr', which is defined for both kprobe and kretprobe. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Reviewed-by: Masami Hiramatsu Link: https://lore.kernel.org/bpf/20210714094400.396467-5-jolsa@kernel.org --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 89688f16ad60..2db6925e04f4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4844,7 +4844,7 @@ union bpf_attr { * * u64 bpf_get_func_ip(void *ctx) * Description - * Get address of the traced function (for tracing programs). + * Get address of the traced function (for tracing and kprobe programs). * Return * Address of the traced function. */ -- cgit v1.2.3 From b83d23a2a38b1770da0491257ae81d52307f7816 Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Thu, 15 Jul 2021 08:27:54 -0400 Subject: openvswitch: Introduce per-cpu upcall dispatch The Open vSwitch kernel module uses the upcall mechanism to send packets from kernel space to user space when it misses in the kernel space flow table. The upcall sends packets via a Netlink socket. Currently, a Netlink socket is created for every vport. In this way, there is a 1:1 mapping between a vport and a Netlink socket. When a packet is received by a vport, if it needs to be sent to user space, it is sent via the corresponding Netlink socket. This mechanism, with various iterations of the corresponding user space code, has seen some limitations and issues: * On systems with a large number of vports, there is a correspondingly large number of Netlink sockets which can limit scaling. (https://bugzilla.redhat.com/show_bug.cgi?id=1526306) * Packet reordering on upcalls. (https://bugzilla.redhat.com/show_bug.cgi?id=1844576) * A thundering herd issue. (https://bugzilla.redhat.com/show_bug.cgi?id=1834444) This patch introduces an alternative, feature-negotiated, upcall mode using a per-cpu dispatch rather than a per-vport dispatch. In this mode, the Netlink socket to be used for the upcall is selected based on the CPU of the thread that is executing the upcall. In this way, it resolves the issues above as: a) The number of Netlink sockets scales with the number of CPUs rather than the number of vports. b) Ordering per-flow is maintained as packets are distributed to CPUs based on mechanisms such as RSS and flows are distributed to a single user space thread. c) Packets from a flow can only wake up one user space thread. The corresponding user space code can be found at: https://mail.openvswitch.org/pipermail/ovs-dev/2021-July/385139.html Bugzilla: https://bugzilla.redhat.com/1844576 Signed-off-by: Mark Gray Acked-by: Flavio Leitner Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 8d16744edc31..6571b57b2268 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -70,6 +70,8 @@ enum ovs_datapath_cmd { * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should * not be sent. + * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the @@ -87,6 +89,9 @@ enum ovs_datapath_attr { OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ OVS_DP_ATTR_PAD, OVS_DP_ATTR_MASKS_CACHE_SIZE, + OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in per-cpu + * dispatch mode + */ __OVS_DP_ATTR_MAX }; @@ -127,6 +132,9 @@ struct ovs_vport_stats { /* Allow tc offload recirc sharing */ #define OVS_DP_F_TC_RECIRC_SHARING (1 << 2) +/* Allow per-cpu dispatch of upcalls */ +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3) + /* Fixed logical ports. */ #define OVSP_LOCAL ((__u32)0) -- cgit v1.2.3 From f4b7002a7076f025dce59647a77c8251175d2b34 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 19 Jul 2021 20:06:28 +0300 Subject: net: bridge: add vlan mcast snooping knob Add a global knob that controls if vlan multicast snooping is enabled. The proper contexts (vlan or bridge-wide) will be chosen based on the knob when processing packets and changing bridge device state. Note that vlans have their individual mcast snooping enabled by default, but this knob is needed to turn on bridge vlan snooping. It is disabled by default. To enable the knob vlan filtering must also be enabled, it doesn't make sense to have vlan mcast snooping without vlan filtering since that would lead to inconsistencies. Disabling vlan filtering will also automatically disable vlan mcast snooping. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 6b56a7549531..7927ad80ee86 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -720,12 +720,14 @@ struct br_mcast_stats { /* bridge boolean options * BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets + * BR_BOOLOPT_MCAST_VLAN_SNOOPING - control vlan multicast snooping * * IMPORTANT: if adding a new option do not forget to handle * it in br_boolopt_toggle/get and bridge sysfs */ enum br_boolopt_id { BR_BOOLOPT_NO_LL_LEARN, + BR_BOOLOPT_MCAST_VLAN_SNOOPING, BR_BOOLOPT_MAX }; -- cgit v1.2.3 From 1e9ca45662d6bb65fb60d3fbb7737b081d9cffc9 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 19 Jul 2021 20:06:33 +0300 Subject: net: bridge: multicast: include router port vlan id in notifications Use the port multicast context to check if the router port is a vlan and in case it is include its vlan id in the notification. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 7927ad80ee86..90ac9e11c15b 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -629,6 +629,7 @@ enum { MDBA_ROUTER_PATTR_TYPE, MDBA_ROUTER_PATTR_INET_TIMER, MDBA_ROUTER_PATTR_INET6_TIMER, + MDBA_ROUTER_PATTR_VID, __MDBA_ROUTER_PATTR_MAX }; #define MDBA_ROUTER_PATTR_MAX (__MDBA_ROUTER_PATTR_MAX - 1) -- cgit v1.2.3 From 47ecd2dbd8ec43125ea75d7d2e73c888cda8663f Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 19 Jul 2021 20:06:34 +0300 Subject: net: bridge: vlan: add support for global options We can have two types of vlan options depending on context: - per-device vlan options (split in per-bridge and per-port) - global vlan options The second type wasn't supported in the bridge until now, but we need them for per-vlan multicast support, per-vlan STP support and other options which require global vlan context. They are contained in the global bridge vlan context even if the vlan is not configured on the bridge device itself. This patch adds initial netlink attributes and support for setting these global vlan options, they can only be set (RTM_NEWVLAN) and the operation must use the bridge device. Since there are no such options yet it shouldn't have any functional effect. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 90ac9e11c15b..4ed57d1a5d89 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -485,10 +485,15 @@ enum { * [BRIDGE_VLANDB_ENTRY_INFO] * ... * } + * [BRIDGE_VLANDB_GLOBAL_OPTIONS] = { + * [BRIDGE_VLANDB_GOPTS_ID] + * ... + * } */ enum { BRIDGE_VLANDB_UNSPEC, BRIDGE_VLANDB_ENTRY, + BRIDGE_VLANDB_GLOBAL_OPTIONS, __BRIDGE_VLANDB_MAX, }; #define BRIDGE_VLANDB_MAX (__BRIDGE_VLANDB_MAX - 1) @@ -538,6 +543,14 @@ enum { }; #define BRIDGE_VLANDB_STATS_MAX (__BRIDGE_VLANDB_STATS_MAX - 1) +enum { + BRIDGE_VLANDB_GOPTS_UNSPEC, + BRIDGE_VLANDB_GOPTS_ID, + BRIDGE_VLANDB_GOPTS_RANGE, + __BRIDGE_VLANDB_GOPTS_MAX +}; +#define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) + /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { -- cgit v1.2.3 From 743a53d9636aad83da63a8638e8365e817ef6365 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 19 Jul 2021 20:06:35 +0300 Subject: net: bridge: vlan: add support for dumping global vlan options Add a new vlan options dump flag which causes only global vlan options to be dumped. The dumps are done only with bridge devices, ports are ignored. They support vlan compression if the options in sequential vlans are equal (currently always true). Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 4ed57d1a5d89..946ccf33dc53 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -479,6 +479,7 @@ enum { /* flags used in BRIDGE_VLANDB_DUMP_FLAGS attribute to affect dumps */ #define BRIDGE_VLANDB_DUMPF_STATS (1 << 0) /* Include stats in the dump */ +#define BRIDGE_VLANDB_DUMPF_GLOBAL (1 << 1) /* Dump global vlan options only */ /* Bridge vlan RTM attributes * [BRIDGE_VLANDB_ENTRY] = { -- cgit v1.2.3 From 9dee572c384846f4ece029ab5688faed0682e48a Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 19 Jul 2021 20:06:37 +0300 Subject: net: bridge: vlan: add mcast snooping control Add a new global vlan option which controls whether multicast snooping is enabled or disabled for a single vlan. It controls the vlan private flag: BR_VLFLAG_GLOBAL_MCAST_ENABLED. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 946ccf33dc53..5aca85874447 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -548,6 +548,7 @@ enum { BRIDGE_VLANDB_GOPTS_UNSPEC, BRIDGE_VLANDB_GOPTS_ID, BRIDGE_VLANDB_GOPTS_RANGE, + BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 2d151d39073aff498358543801fca0f670fea981 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Sun, 18 Jul 2021 09:11:06 +0200 Subject: xfrm: Add possibility to set the default to block if we have no policy As the default we assume the traffic to pass, if we have no matching IPsec policy. With this patch, we have a possibility to change this default from allow to block. It can be configured via netlink. Each direction (input/output/forward) can be configured separately. With the default to block configuered, we need allow policies for all packet flows we accept. We do not use default policy lookup for the loopback device. v1->v2 - fix compiling when XFRM is disabled - Reported-by: kernel test robot Co-developed-by: Christian Langrock Signed-off-by: Christian Langrock Co-developed-by: Antony Antony Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index ffc6a5391bb7..6e8095106192 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -213,6 +213,11 @@ enum { XFRM_MSG_GETSPDINFO, #define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO + XFRM_MSG_SETDEFAULT, +#define XFRM_MSG_SETDEFAULT XFRM_MSG_SETDEFAULT + XFRM_MSG_GETDEFAULT, +#define XFRM_MSG_GETDEFAULT XFRM_MSG_GETDEFAULT + XFRM_MSG_MAPPING, #define XFRM_MSG_MAPPING XFRM_MSG_MAPPING __XFRM_MSG_MAX @@ -508,6 +513,11 @@ struct xfrm_user_offload { #define XFRM_OFFLOAD_IPV6 1 #define XFRM_OFFLOAD_INBOUND 2 +struct xfrm_userpolicy_default { + __u8 dirmask; + __u8 action; +}; + #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 -- cgit v1.2.3 From db67f219fc9365a0c456666ed7c134d43ab0be8a Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 20 Jul 2021 21:42:56 +0200 Subject: uapi: IPv6 IOAM headers definition This patch provides the IPv6 IOAM option header [1] as well as the IOAM Trace header [2]. An IOAM option must be 4n-aligned. Here is an overview of a Hop-by-Hop with an IOAM Trace option: +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Next header | Hdr Ext Len | Padding | Padding | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Option Type | Opt Data Len | Reserved | IOAM Type | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Namespace-ID | NodeLen | Flags | RemainingLen| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | IOAM-Trace-Type | Reserved | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+<-+ | | | | node data [n] | | | | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ D | | a | node data [n-1] | t | | a +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ ~ ... ~ S +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ p | | a | node data [1] | c | | e +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | | | | node data [0] | | | | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+<-+ The IOAM option header starts at "Option Type" and ends after "IOAM Type". The IOAM Trace header starts at "Namespace-ID" and ends after "IOAM-Trace-Type/Reserved". IOAM Type: either Pre-allocated Trace (=0), Incremental Trace (=1), Proof-of-Transit (=2) or Edge-to-Edge (=3). Note that both the Pre-allocated Trace and the Incremental Trace look the same. The two others are not implemented. Namespace-ID: IOAM namespace identifier, not to be confused with network namespaces. It adds further context to IOAM options and associated data, and allows devices which are IOAM capable to determine whether IOAM options must be processed or ignored. It can also be used by an operator to distinguish different operational domains or to identify different sets of devices. NodeLen: Length of data added by each node. It depends on the Trace Type. Flags: Only the Overflow (O) flag for now. The O flag is set by a transit node when there are not enough octets left to record its data. RemainingLen: Remaining free space to record data. IOAM-Trace-Type: Bit field where each bit corresponds to a specific kind of IOAM data. See [2] for a detailed list. [1] https://tools.ietf.org/html/draft-ietf-ippm-ioam-ipv6-options [2] https://tools.ietf.org/html/draft-ietf-ippm-ioam-data Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- include/uapi/linux/ioam6.h | 123 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 include/uapi/linux/ioam6.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ioam6.h b/include/uapi/linux/ioam6.h new file mode 100644 index 000000000000..2177e4e49566 --- /dev/null +++ b/include/uapi/linux/ioam6.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM implementation + * + * Author: + * Justin Iurman + */ + +#ifndef _UAPI_LINUX_IOAM6_H +#define _UAPI_LINUX_IOAM6_H + +#include +#include + +/* + * IPv6 IOAM Option Header + */ +struct ioam6_hdr { + __u8 opt_type; + __u8 opt_len; + __u8 :8; /* reserved */ +#define IOAM6_TYPE_PREALLOC 0 + __u8 type; +} __attribute__((packed)); + +/* + * IOAM Trace Header + */ +struct ioam6_trace_hdr { + __be16 namespace_id; + +#if defined(__LITTLE_ENDIAN_BITFIELD) + + __u8 :1, /* unused */ + :1, /* unused */ + overflow:1, + nodelen:5; + + __u8 remlen:7, + :1; /* unused */ + + union { + __be32 type_be32; + + struct { + __u32 bit7:1, + bit6:1, + bit5:1, + bit4:1, + bit3:1, + bit2:1, + bit1:1, + bit0:1, + bit15:1, /* unused */ + bit14:1, /* unused */ + bit13:1, /* unused */ + bit12:1, /* unused */ + bit11:1, + bit10:1, + bit9:1, + bit8:1, + bit23:1, /* reserved */ + bit22:1, + bit21:1, /* unused */ + bit20:1, /* unused */ + bit19:1, /* unused */ + bit18:1, /* unused */ + bit17:1, /* unused */ + bit16:1, /* unused */ + :8; /* reserved */ + } type; + }; + +#elif defined(__BIG_ENDIAN_BITFIELD) + + __u8 nodelen:5, + overflow:1, + :1, /* unused */ + :1; /* unused */ + + __u8 :1, /* unused */ + remlen:7; + + union { + __be32 type_be32; + + struct { + __u32 bit0:1, + bit1:1, + bit2:1, + bit3:1, + bit4:1, + bit5:1, + bit6:1, + bit7:1, + bit8:1, + bit9:1, + bit10:1, + bit11:1, + bit12:1, /* unused */ + bit13:1, /* unused */ + bit14:1, /* unused */ + bit15:1, /* unused */ + bit16:1, /* unused */ + bit17:1, /* unused */ + bit18:1, /* unused */ + bit19:1, /* unused */ + bit20:1, /* unused */ + bit21:1, /* unused */ + bit22:1, + bit23:1, /* reserved */ + :8; /* reserved */ + } type; + }; + +#else +#error "Please fix " +#endif + + __u8 data[0]; +} __attribute__((packed)); + +#endif /* _UAPI_LINUX_IOAM6_H */ -- cgit v1.2.3 From 9ee11f0fff205b4b3df9750bff5e94f97c71b6a0 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 20 Jul 2021 21:42:57 +0200 Subject: ipv6: ioam: Data plane support for Pre-allocated Trace Implement support for processing the IOAM Pre-allocated Trace with IPv6, see [1] and [2]. Introduce a new IPv6 Hop-by-Hop TLV option, see IANA [3]. A new per-interface sysctl is introduced. The value is a boolean to accept (=1) or ignore (=0, by default) IPv6 IOAM options on ingress for an interface: - net.ipv6.conf.XXX.ioam6_enabled Two other sysctls are introduced to define IOAM IDs, represented by an integer. They are respectively per-namespace and per-interface: - net.ipv6.ioam6_id - net.ipv6.conf.XXX.ioam6_id The value of the first one represents the IOAM ID of the node itself (u32; max and default value = U32_MAX>>8, due to hop limit concatenation) while the other represents the IOAM ID of an interface (u16; max and default value = U16_MAX). Each "ioam6_id" sysctl has a "_wide" equivalent: - net.ipv6.ioam6_id_wide - net.ipv6.conf.XXX.ioam6_id_wide The value of the first one represents the wide IOAM ID of the node itself (u64; max and default value = U64_MAX>>8, due to hop limit concatenation) while the other represents the wide IOAM ID of an interface (u32; max and default value = U32_MAX). The use of short and wide equivalents is not exclusive, a deployment could choose to leverage both. For example, net.ipv6.conf.XXX.ioam6_id (short format) could be an identifier for a physical interface, whereas net.ipv6.conf.XXX.ioam6_id_wide (wide format) could be an identifier for a logical sub-interface. Documentation about new sysctls is provided at the end of this patchset. Two relativistic hash tables are used: one for IOAM namespaces, the other for IOAM schemas. A namespace can only have a single active schema and a schema can only be attached to a single namespace (1:1 relationship). [1] https://tools.ietf.org/html/draft-ietf-ippm-ioam-ipv6-options [2] https://tools.ietf.org/html/draft-ietf-ippm-ioam-data [3] https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml#ipv6-parameters-2 Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- include/uapi/linux/in6.h | 1 + include/uapi/linux/ioam6.h | 9 +++++++++ include/uapi/linux/ipv6.h | 3 +++ 3 files changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 5ad396a57eb3..c4c53a9ab959 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -145,6 +145,7 @@ struct in6_flowlabel_req { #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ +#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */ #define IPV6_TLV_JUMBO 194 #define IPV6_TLV_HAO 201 /* home address option */ diff --git a/include/uapi/linux/ioam6.h b/include/uapi/linux/ioam6.h index 2177e4e49566..23ba6e85582f 100644 --- a/include/uapi/linux/ioam6.h +++ b/include/uapi/linux/ioam6.h @@ -12,6 +12,15 @@ #include #include +#define IOAM6_U16_UNAVAILABLE U16_MAX +#define IOAM6_U32_UNAVAILABLE U32_MAX +#define IOAM6_U64_UNAVAILABLE U64_MAX + +#define IOAM6_DEFAULT_ID (IOAM6_U32_UNAVAILABLE >> 8) +#define IOAM6_DEFAULT_ID_WIDE (IOAM6_U64_UNAVAILABLE >> 8) +#define IOAM6_DEFAULT_IF_ID IOAM6_U16_UNAVAILABLE +#define IOAM6_DEFAULT_IF_ID_WIDE IOAM6_U32_UNAVAILABLE + /* * IPv6 IOAM Option Header */ diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 70603775fe91..b243a53fa985 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -190,6 +190,9 @@ enum { DEVCONF_NDISC_TCLASS, DEVCONF_RPL_SEG_ENABLED, DEVCONF_RA_DEFRTR_METRIC, + DEVCONF_IOAM6_ENABLED, + DEVCONF_IOAM6_ID, + DEVCONF_IOAM6_ID_WIDE, DEVCONF_MAX }; -- cgit v1.2.3 From 8c6f6fa6772696be0c047a711858084b38763728 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 20 Jul 2021 21:42:58 +0200 Subject: ipv6: ioam: IOAM Generic Netlink API Add Generic Netlink commands to allow userspace to configure IOAM namespaces and schemas. The target is iproute2 and the patch is ready. It will be posted as soon as this patchset is merged. Here is an overview: $ ip ioam Usage: ip ioam { COMMAND | help } ip ioam namespace show ip ioam namespace add ID [ data DATA32 ] [ wide DATA64 ] ip ioam namespace del ID ip ioam schema show ip ioam schema add ID DATA ip ioam schema del ID ip ioam namespace set ID schema { ID | none } Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- include/uapi/linux/ioam6_genl.h | 52 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 include/uapi/linux/ioam6_genl.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ioam6_genl.h b/include/uapi/linux/ioam6_genl.h new file mode 100644 index 000000000000..ca4b22833754 --- /dev/null +++ b/include/uapi/linux/ioam6_genl.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM Generic Netlink API + * + * Author: + * Justin Iurman + */ + +#ifndef _UAPI_LINUX_IOAM6_GENL_H +#define _UAPI_LINUX_IOAM6_GENL_H + +#define IOAM6_GENL_NAME "IOAM6" +#define IOAM6_GENL_VERSION 0x1 + +enum { + IOAM6_ATTR_UNSPEC, + + IOAM6_ATTR_NS_ID, /* u16 */ + IOAM6_ATTR_NS_DATA, /* u32 */ + IOAM6_ATTR_NS_DATA_WIDE,/* u64 */ + +#define IOAM6_MAX_SCHEMA_DATA_LEN (255 * 4) + IOAM6_ATTR_SC_ID, /* u32 */ + IOAM6_ATTR_SC_DATA, /* Binary */ + IOAM6_ATTR_SC_NONE, /* Flag */ + + IOAM6_ATTR_PAD, + + __IOAM6_ATTR_MAX, +}; + +#define IOAM6_ATTR_MAX (__IOAM6_ATTR_MAX - 1) + +enum { + IOAM6_CMD_UNSPEC, + + IOAM6_CMD_ADD_NAMESPACE, + IOAM6_CMD_DEL_NAMESPACE, + IOAM6_CMD_DUMP_NAMESPACES, + + IOAM6_CMD_ADD_SCHEMA, + IOAM6_CMD_DEL_SCHEMA, + IOAM6_CMD_DUMP_SCHEMAS, + + IOAM6_CMD_NS_SET_SCHEMA, + + __IOAM6_CMD_MAX, +}; + +#define IOAM6_CMD_MAX (__IOAM6_CMD_MAX - 1) + +#endif /* _UAPI_LINUX_IOAM6_GENL_H */ -- cgit v1.2.3 From 3edede08ff37c6a9370510508d5eeb54890baf47 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 20 Jul 2021 21:42:59 +0200 Subject: ipv6: ioam: Support for IOAM injection with lwtunnels Add support for the IOAM inline insertion (only for the host-to-host use case) which is per-route configured with lightweight tunnels. The target is iproute2 and the patch is ready. It will be posted as soon as this patchset is merged. Here is an overview: $ ip -6 ro ad fc00::1/128 encap ioam6 trace type 0x800000 ns 1 size 12 dev eth0 This example configures an IOAM Pre-allocated Trace option attached to the fc00::1/128 prefix. The IOAM namespace (ns) is 1, the size of the pre-allocated trace data block is 12 octets (size) and only the first IOAM data (bit 0: hop_limit + node id) is included in the trace (type) represented as a bitfield. The reason why the in-transit (IPv6-in-IPv6 encapsulation) use case is not implemented is explained on the patchset cover. Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- include/uapi/linux/ioam6.h | 1 + include/uapi/linux/ioam6_iptunnel.h | 20 ++++++++++++++++++++ include/uapi/linux/lwtunnel.h | 1 + 3 files changed, 22 insertions(+) create mode 100644 include/uapi/linux/ioam6_iptunnel.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ioam6.h b/include/uapi/linux/ioam6.h index 23ba6e85582f..ac4de376f0ce 100644 --- a/include/uapi/linux/ioam6.h +++ b/include/uapi/linux/ioam6.h @@ -126,6 +126,7 @@ struct ioam6_trace_hdr { #error "Please fix " #endif +#define IOAM6_TRACE_DATA_SIZE_MAX 244 __u8 data[0]; } __attribute__((packed)); diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h new file mode 100644 index 000000000000..bae14636a8c8 --- /dev/null +++ b/include/uapi/linux/ioam6_iptunnel.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM Lightweight Tunnel API + * + * Author: + * Justin Iurman + */ + +#ifndef _UAPI_LINUX_IOAM6_IPTUNNEL_H +#define _UAPI_LINUX_IOAM6_IPTUNNEL_H + +enum { + IOAM6_IPTUNNEL_UNSPEC, + IOAM6_IPTUNNEL_TRACE, /* struct ioam6_trace_hdr */ + __IOAM6_IPTUNNEL_MAX, +}; + +#define IOAM6_IPTUNNEL_MAX (__IOAM6_IPTUNNEL_MAX - 1) + +#endif /* _UAPI_LINUX_IOAM6_IPTUNNEL_H */ diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 568a4303ccce..2e206919125c 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -14,6 +14,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_BPF, LWTUNNEL_ENCAP_SEG6_LOCAL, LWTUNNEL_ENCAP_RPL, + LWTUNNEL_ENCAP_IOAM6, __LWTUNNEL_ENCAP_MAX, }; -- cgit v1.2.3 From 9ffb14ef61bab83fa818736bf3e7e6b6e182e8e2 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 15 Jul 2021 13:07:13 +0300 Subject: move_mount: allow to add a mount into an existing group Previously a sharing group (shared and master ids pair) can be only inherited when mount is created via bindmount. This patch adds an ability to add an existing private mount into an existing sharing group. With this functionality one can first create the desired mount tree from only private mounts (without the need to care about undesired mount propagation or mount creation order implied by sharing group dependencies), and next then setup any desired mount sharing between those mounts in tree as needed. This allows CRIU to restore any set of mount namespaces, mount trees and sharing group trees for a container. We have many issues with restoring mounts in CRIU related to sharing groups and propagation: - reverse sharing groups vs mount tree order requires complex mounts reordering which mostly implies also using some temporary mounts (please see https://lkml.org/lkml/2021/3/23/569 for more info) - mount() syscall creates tons of mounts due to propagation - mount re-parenting due to propagation - "Mount Trap" due to propagation - "Non Uniform" propagation, meaning that with different tricks with mount order and temporary children-"lock" mounts one can create mount trees which can't be restored without those tricks (see https://www.linuxplumbersconf.org/event/7/contributions/640/) With this new functionality we can resolve all the problems with propagation at once. Link: https://lore.kernel.org/r/20210715100714.120228-1-ptikhomirov@virtuozzo.com Cc: Eric W. Biederman Cc: Alexander Viro Cc: Christian Brauner Cc: Mattias Nissler Cc: Aleksa Sarai Cc: Andrei Vagin Cc: linux-fsdevel@vger.kernel.org Cc: linux-api@vger.kernel.org Cc: lkml Co-developed-by: Andrei Vagin Acked-by: Christian Brauner Signed-off-by: Pavel Tikhomirov Signed-off-by: Andrei Vagin Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index dd7a166fdf9c..4d93967f8aea 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -73,7 +73,8 @@ #define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ #define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ #define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ -#define MOVE_MOUNT__MASK 0x00000077 +#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */ +#define MOVE_MOUNT__MASK 0x00000177 /* * fsopen() flags. -- cgit v1.2.3 From d7aff291d069c4418285f3c8ee27b0ff67ce5998 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sat, 26 Jun 2021 06:11:51 +0200 Subject: serial: 8250: Define RX trigger levels for OxSemi 950 devices Oxford Semiconductor 950 serial port devices have a 128-byte FIFO and in the enhanced (650) mode, which we select in `autoconfig_has_efr' with the ECB bit set in the EFR register, they support the receive interrupt trigger level selectable with FCR bits 7:6 from the set of 16, 32, 112, 120. This applies to the original OX16C950 discrete UART[1] as well as 950 cores embedded into more complex devices. For these devices we set the default to 112, which sets an excessively high level of 112 or 7/8 of the FIFO capacity, unlike with other port types where we choose at most 1/2 of their respective FIFO capacities. Additionally we don't make the trigger level configurable. Consequently frequent input overruns happen with high bit rates where hardware flow control cannot be used (e.g. terminal applications) even with otherwise highly-performant systems. Lower the default receive interrupt trigger level to 32 then, and make it configurable. Document the trigger levels along with other port types, including the set of 16, 32, 64, 112 for the transmit interrupt as well[2]. References: [1] "OX16C950 rev B High Performance UART with 128 byte FIFOs", Oxford Semiconductor, Inc., DS-0031, Sep 05, Table 10: "Receiver Trigger Levels", p. 22 [2] same, Table 9: "Transmit Interrupt Trigger Levels", p. 22 Signed-off-by: Maciej W. Rozycki Link: https://lore.kernel.org/r/alpine.DEB.2.21.2106260608480.37803@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_reg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h index be07b5470f4b..f51bc8f36813 100644 --- a/include/uapi/linux/serial_reg.h +++ b/include/uapi/linux/serial_reg.h @@ -62,6 +62,7 @@ * ST16C654: 8 16 56 60 8 16 32 56 PORT_16654 * TI16C750: 1 16 32 56 xx xx xx xx PORT_16750 * TI16C752: 8 16 56 60 8 16 32 56 + * OX16C950: 16 32 112 120 16 32 64 112 PORT_16C950 * Tegra: 1 4 8 14 16 8 4 1 PORT_TEGRA */ #define UART_FCR_R_TRIG_00 0x00 -- cgit v1.2.3 From e4252cb66637b846b916cca7c2cdb4ed22ab2fc3 Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Fri, 23 Jul 2021 10:24:12 -0400 Subject: openvswitch: update kdoc OVS_DP_ATTR_PER_CPU_PIDS Signed-off-by: Mark Gray Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 6571b57b2268..0e436a3755f1 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -70,7 +70,7 @@ enum ovs_datapath_cmd { * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should * not be sent. - * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when + * @OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. -- cgit v1.2.3 From 784dcfa56e0453bb197601ba0b8196f6f892ebcb Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Fri, 23 Jul 2021 10:24:13 -0400 Subject: openvswitch: fix alignment issues Signed-off-by: Mark Gray Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 0e436a3755f1..150bcff49b1c 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -89,8 +89,8 @@ enum ovs_datapath_attr { OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ OVS_DP_ATTR_PAD, OVS_DP_ATTR_MASKS_CACHE_SIZE, - OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in per-cpu - * dispatch mode + OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in + * per-cpu dispatch mode */ __OVS_DP_ATTR_MAX }; -- cgit v1.2.3 From e893bb1bb4d2eb635eba61e5d9c5135d96855773 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 8 Jan 2021 23:10:55 +1100 Subject: x86, prctl: Hook L1D flushing in via prctl Use the existing PR_GET/SET_SPECULATION_CTRL API to expose the L1D flush capability. For L1D flushing PR_SPEC_FORCE_DISABLE and PR_SPEC_DISABLE_NOEXEC are not supported. Enabling L1D flush does not check if the task is running on an SMT enabled core, rather a check is done at runtime (at the time of flush), if the task runs on a SMT sibling then the task is sent a SIGBUS which is executed before the task returns to user space or to a guest. This is better than the other alternatives of: a. Ensuring strict affinity of the task (hard to enforce without further changes in the scheduler) b. Silently skipping flush for tasks that move to SMT enabled cores. Hook up the core prctl and implement the x86 specific parts which in turn makes it functional. Suggested-by: Thomas Gleixner Signed-off-by: Balbir Singh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210108121056.21940-5-sblbir@amazon.com --- include/uapi/linux/prctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 967d9c55323d..964c41ed303e 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -213,6 +213,7 @@ struct prctl_mm_map { /* Speculation control variants */ # define PR_SPEC_STORE_BYPASS 0 # define PR_SPEC_INDIRECT_BRANCH 1 +# define PR_SPEC_L1D_FLUSH 2 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ # define PR_SPEC_NOT_AFFECTED 0 # define PR_SPEC_PRCTL (1UL << 0) -- cgit v1.2.3 From 56af5e749f20c3a540310c207dcc373f4f09156e Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 27 Jul 2021 18:33:15 -0700 Subject: net/sched: act_skbmod: Add SKBMOD_F_ECN option support Currently, when doing rate limiting using the tc-police(8) action, the easiest way is to simply drop the packets which exceed or conform the configured bandwidth limit. Add a new option to tc-skbmod(8), so that users may use the ECN [1] extension to explicitly inform the receiver about the congestion instead of dropping packets "on the floor". The 2 least significant bits of the Traffic Class field in IPv4 and IPv6 headers are used to represent different ECN states [2]: 0b00: "Non ECN-Capable Transport", Non-ECT 0b10: "ECN Capable Transport", ECT(0) 0b01: "ECN Capable Transport", ECT(1) 0b11: "Congestion Encountered", CE As an example: $ tc filter add dev eth0 parent 1: protocol ip prio 10 \ matchall action skbmod ecn Doing the above marks all ECT(0) and ECT(1) packets as CE. It does NOT affect Non-ECT or non-IP packets. In the tc-police scenario mentioned above, users may pipe a tc-police action and a tc-skbmod "ecn" action together to achieve ECN-based rate limiting. For TCP connections, upon receiving a CE packet, the receiver will respond with an ECE packet, asking the sender to reduce their congestion window. However ECN also works with other L4 protocols e.g. DCCP and SCTP [2], and our implementation does not touch or care about L4 headers. The updated tc-skbmod SYNOPSIS looks like the following: tc ... action skbmod { set SETTABLE | swap SWAPPABLE | ecn } ... Only one of "set", "swap" or "ecn" shall be used in a single tc-skbmod command. Trying to use more than one of them at a time is considered undefined behavior; pipe multiple tc-skbmod commands together instead. "set" and "swap" only affect Ethernet packets, while "ecn" only affects IPv{4,6} packets. It is also worth mentioning that, in theory, the same effect could be achieved by piping a "police" action and a "bpf" action using the bpf_skb_ecn_set_ce() helper, but this requires eBPF programming from the user, thus impractical. Depends on patch "net/sched: act_skbmod: Skip non-Ethernet packets". [1] https://datatracker.ietf.org/doc/html/rfc3168 [2] https://en.wikipedia.org/wiki/Explicit_Congestion_Notification Reviewed-by: Cong Wang Signed-off-by: Peilin Ye Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_skbmod.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h index c525b3503797..af6ef2cfbf3d 100644 --- a/include/uapi/linux/tc_act/tc_skbmod.h +++ b/include/uapi/linux/tc_act/tc_skbmod.h @@ -17,6 +17,7 @@ #define SKBMOD_F_SMAC 0x2 #define SKBMOD_F_ETYPE 0x4 #define SKBMOD_F_SWAPMAC 0x8 +#define SKBMOD_F_ECN 0x10 struct tc_skbmod { tc_gen; -- cgit v1.2.3 From 125d10373ad991888c9e94d2da49bcc5ccba2127 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 20 Jul 2021 13:42:15 -0700 Subject: dmanegine: idxd: add software command status Enabling device and wq returns standard errno and that does not provide enough details to indicate what exactly failed. The hardware command status is only 8bits. Expand the command status to 32bits and use the upper 16 bits to define software errors to provide more details on the exact failure. Bit 31 will be used to indicate the error is software set as the driver is using some of the spec defined hardware error as well. Cc: Ramesh Thomas Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162681373579.1968485.5891788397526827892.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index e33997b4d750..1c0175aa0e42 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -9,6 +9,29 @@ #include #endif +/* Driver command error status */ +enum idxd_scmd_stat { + IDXD_SCMD_DEV_ENABLED = 0x80000010, + IDXD_SCMD_DEV_NOT_ENABLED = 0x80000020, + IDXD_SCMD_WQ_ENABLED = 0x80000021, + IDXD_SCMD_DEV_DMA_ERR = 0x80020000, + IDXD_SCMD_WQ_NO_GRP = 0x80030000, + IDXD_SCMD_WQ_NO_NAME = 0x80040000, + IDXD_SCMD_WQ_NO_SVM = 0x80050000, + IDXD_SCMD_WQ_NO_THRESH = 0x80060000, + IDXD_SCMD_WQ_PORTAL_ERR = 0x80070000, + IDXD_SCMD_WQ_RES_ALLOC_ERR = 0x80080000, + IDXD_SCMD_PERCPU_ERR = 0x80090000, + IDXD_SCMD_DMA_CHAN_ERR = 0x800a0000, + IDXD_SCMD_CDEV_ERR = 0x800b0000, + IDXD_SCMD_WQ_NO_SWQ_SUPPORT = 0x800c0000, + IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000, + IDXD_SCMD_WQ_NO_SIZE = 0x800e0000, +}; + +#define IDXD_SCMD_SOFTERR_MASK 0x80000000 +#define IDXD_SCMD_SOFTERR_SHIFT 16 + /* Descriptor flags */ #define IDXD_OP_FLAG_FENCE 0x0001 #define IDXD_OP_FLAG_BOF 0x0002 -- cgit v1.2.3 From 25905f602fdb0cfa147017056636768a7aa1ff6f Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 21 Jul 2021 12:25:20 -0700 Subject: dmaengine: idxd: Change license on idxd.h to LGPL This file was given GPL-2.0 license. But LGPL-2.1 makes more sense as it needs to be used by libraries outside of the kernel source tree. Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds --- include/uapi/linux/idxd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index e33997b4d750..edc346a77c91 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */ /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ #ifndef _USR_IDXD_H_ #define _USR_IDXD_H_ -- cgit v1.2.3 From 433c38f40f6a81cf3988b9372f2983912737f322 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 27 Jul 2021 13:52:56 -0700 Subject: arm64: mte: change ASYNC and SYNC TCF settings into bitfields Allow the user program to specify both ASYNC and SYNC TCF modes by repurposing the existing constants as bitfields. This will allow the kernel to select one of the modes on behalf of the user program. With this patch the kernel will always select async mode, but a subsequent patch will make this configurable. Link: https://linux-review.googlesource.com/id/Icc5923c85a8ea284588cc399ae74fd19ec291230 Signed-off-by: Peter Collingbourne Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210727205300.2554659-3-pcc@google.com Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- include/uapi/linux/prctl.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 967d9c55323d..89de78a14b9b 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -234,14 +234,15 @@ struct prctl_mm_map { #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) /* MTE tag check fault modes */ -# define PR_MTE_TCF_SHIFT 1 -# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_NONE 0 +# define PR_MTE_TCF_SYNC (1UL << 1) +# define PR_MTE_TCF_ASYNC (1UL << 2) +# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) /* MTE tag inclusion mask */ # define PR_MTE_TAG_SHIFT 3 # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) +/* Unused; kept only for source compatibility */ +# define PR_MTE_TCF_SHIFT 1 /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 -- cgit v1.2.3 From 5d8dbb7fb82b8661c16d496644b931c0e2e3a12e Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Wed, 28 Jul 2021 19:38:18 +0300 Subject: net: xfrm: fix shift-out-of-bounce We need to check up->dirmask to avoid shift-out-of-bounce bug, since up->dirmask comes from userspace. Also, added XFRM_USERPOLICY_DIRMASK_MAX constant to uapi to inform user-space that up->dirmask has maximum possible value Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy") Reported-and-tested-by: syzbot+9cd5837a045bbee5b810@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 6e8095106192..b96c1ea7166d 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -514,6 +514,7 @@ struct xfrm_user_offload { #define XFRM_OFFLOAD_INBOUND 2 struct xfrm_userpolicy_default { +#define XFRM_USERPOLICY_DIRMASK_MAX (sizeof(__u8) * 8) __u8 dirmask; __u8 action; }; -- cgit v1.2.3 From bc49d8169aa72295104f1558830c568efb946315 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 29 Jul 2021 10:20:39 +0800 Subject: mctp: Add MCTP base Add basic Kconfig, an initial (empty) af_mctp source object, and {AF,PF}_MCTP definitions, and the required definitions for a new protocol type. Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/uapi/linux/mctp.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 include/uapi/linux/mctp.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h new file mode 100644 index 000000000000..2640a589c14c --- /dev/null +++ b/include/uapi/linux/mctp.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Management Component Transport Protocol (MCTP) + * + * Copyright (c) 2021 Code Construct + * Copyright (c) 2021 Google + */ + +#ifndef __UAPI_MCTP_H +#define __UAPI_MCTP_H + +struct sockaddr_mctp { +}; + +#endif /* __UAPI_MCTP_H */ -- cgit v1.2.3 From 60fc63981693f807baa0e404104dedea0e8b4e61 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 29 Jul 2021 10:20:42 +0800 Subject: mctp: Add sockaddr_mctp to uapi This change introduces the user-visible MCTP header, containing the protocol-specific addressing definitions. Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/uapi/linux/mctp.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 2640a589c14c..52b54d13f385 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -9,7 +9,28 @@ #ifndef __UAPI_MCTP_H #define __UAPI_MCTP_H +#include + +typedef __u8 mctp_eid_t; + +struct mctp_addr { + mctp_eid_t s_addr; +}; + struct sockaddr_mctp { + unsigned short int smctp_family; + int smctp_network; + struct mctp_addr smctp_addr; + __u8 smctp_type; + __u8 smctp_tag; }; +#define MCTP_NET_ANY 0x0 + +#define MCTP_ADDR_NULL 0x00 +#define MCTP_ADDR_ANY 0xff + +#define MCTP_TAG_MASK 0x07 +#define MCTP_TAG_OWNER 0x08 + #endif /* __UAPI_MCTP_H */ -- cgit v1.2.3 From 4b2e69305cbbc7c32ecbd946110b505c4ff6071a Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 29 Jul 2021 10:20:43 +0800 Subject: mctp: Add initial driver infrastructure Add an empty drivers/net/mctp/, for future interface drivers. Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/uapi/linux/if_arp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h index c3cc5a9e5eaf..4783af9fe520 100644 --- a/include/uapi/linux/if_arp.h +++ b/include/uapi/linux/if_arp.h @@ -54,6 +54,7 @@ #define ARPHRD_X25 271 /* CCITT X.25 */ #define ARPHRD_HWX25 272 /* Boards with X.25 in firmware */ #define ARPHRD_CAN 280 /* Controller Area Network */ +#define ARPHRD_MCTP 290 #define ARPHRD_PPP 512 #define ARPHRD_CISCO 513 /* Cisco HDLC */ #define ARPHRD_HDLC ARPHRD_CISCO -- cgit v1.2.3 From 583be982d93479ea3d85091b0fd0b01201ede87d Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 29 Jul 2021 10:20:44 +0800 Subject: mctp: Add device handling and netlink interface This change adds the infrastructure for managing MCTP netdevices; we add a pointer to the AF_MCTP-specific data to struct netdevice, and hook up the rtnetlink operations for adding and removing addresses. Includes changes from Matt Johnston . Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 3 +++ include/uapi/linux/if_link.h | 10 ++++++++++ include/uapi/linux/mctp.h | 1 + 3 files changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index a0b637911d3c..5f589c7a8382 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -151,6 +151,9 @@ #define ETH_P_MAP 0x00F9 /* Qualcomm multiplexing and * aggregation protocol */ +#define ETH_P_MCTP 0x00FA /* Management component transport + * protocol packets + */ /* * This is an Ethernet frame header. diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4882e81514b6..49b22afab78f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1260,4 +1260,14 @@ struct ifla_rmnet_flags { __u32 mask; }; +/* MCTP section */ + +enum { + IFLA_MCTP_UNSPEC, + IFLA_MCTP_NET, + __IFLA_MCTP_MAX, +}; + +#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 52b54d13f385..a9d8edb3402b 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -26,6 +26,7 @@ struct sockaddr_mctp { }; #define MCTP_NET_ANY 0x0 +#define MCTP_NET_DEFAULT 0x0 #define MCTP_ADDR_NULL 0x00 #define MCTP_ADDR_ANY 0xff -- cgit v1.2.3 From 03f2bbc4ee57ca53b2fa1d9caabc5006e0b8f375 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 29 Jul 2021 10:20:52 +0800 Subject: mctp: Allow per-netns default networks Currently we have a compile-time default network (MCTP_INITIAL_DEFAULT_NET). This change introduces a default_net field on the net namespace, allowing future configuration for new interfaces. Signed-off-by: Matt Johnston Signed-off-by: David S. Miller --- include/uapi/linux/mctp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index a9d8edb3402b..52b54d13f385 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -26,7 +26,6 @@ struct sockaddr_mctp { }; #define MCTP_NET_ANY 0x0 -#define MCTP_NET_DEFAULT 0x0 #define MCTP_ADDR_NULL 0x00 #define MCTP_ADDR_ANY 0xff -- cgit v1.2.3 From 695176bfe5dec2051f950bdac0ae0b21e29e6de3 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 29 Jul 2021 16:12:14 -0700 Subject: net_sched: refactor TC action init API TC action ->init() API has 10 parameters, it becomes harder to read. Some of them are just boolean and can be replaced by flags. Similarly for the internal API tcf_action_init() and tcf_exts_validate(). This patch converts them to flags and fold them into the upper 16 bits of "flags", whose lower 16 bits are still reserved for user-space. More specifically, the following kernel flags are introduced: TCA_ACT_FLAGS_POLICE replace 'name' in a few contexts, to distinguish whether it is compatible with policer. TCA_ACT_FLAGS_BIND replaces 'bind', to indicate whether this action is bound to a filter. TCA_ACT_FLAGS_REPLACE replaces 'ovr' in most contexts, means we are replacing an existing action. TCA_ACT_FLAGS_NO_RTNL replaces 'rtnl_held' but has the opposite meaning, because we still hold RTNL in most cases. The only user-space flag TCA_ACT_FLAGS_NO_PERCPU_STATS is untouched and still stored as before. I have tested this patch with tdc and I do not see any failure related to this patch. Tested-by: Vlad Buslov Acked-by: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 025c40fef93d..6836ccb9c45d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -22,6 +22,7 @@ enum { __TCA_ACT_MAX }; +/* See other TCA_ACT_FLAGS_ * flags in include/net/act_api.h. */ #define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for * actions stats. */ -- cgit v1.2.3 From 2d3e5caf96b9449af951e63476657acd759c1a30 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 31 Jul 2021 12:08:30 -0500 Subject: net/ipv4: Replace one-element array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. Use an anonymous union with a couple of anonymous structs in order to keep userspace unchanged: $ pahole -C ip_msfilter net/ipv4/ip_sockglue.o struct ip_msfilter { union { struct { __be32 imsf_multiaddr_aux; /* 0 4 */ __be32 imsf_interface_aux; /* 4 4 */ __u32 imsf_fmode_aux; /* 8 4 */ __u32 imsf_numsrc_aux; /* 12 4 */ __be32 imsf_slist[1]; /* 16 4 */ }; /* 0 20 */ struct { __be32 imsf_multiaddr; /* 0 4 */ __be32 imsf_interface; /* 4 4 */ __u32 imsf_fmode; /* 8 4 */ __u32 imsf_numsrc; /* 12 4 */ __be32 imsf_slist_flex[0]; /* 16 0 */ }; /* 0 16 */ }; /* 0 20 */ /* size: 20, cachelines: 1, members: 1 */ /* last cacheline: 20 bytes */ }; Also, refactor the code accordingly and make use of the struct_size() and flex_array_size() helpers. This helps with the ongoing efforts to globally enable -Warray-bounds and get us closer to being able to tighten the FORTIFY_SOURCE routines on memcpy(). [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.10/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/79 Link: https://github.com/KSPP/linux/issues/109 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index d1b327036ae4..193b7cf1f0ac 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -188,11 +188,22 @@ struct ip_mreq_source { }; struct ip_msfilter { - __be32 imsf_multiaddr; - __be32 imsf_interface; - __u32 imsf_fmode; - __u32 imsf_numsrc; - __be32 imsf_slist[1]; + union { + struct { + __be32 imsf_multiaddr_aux; + __be32 imsf_interface_aux; + __u32 imsf_fmode_aux; + __u32 imsf_numsrc_aux; + __be32 imsf_slist[1]; + }; + struct { + __be32 imsf_multiaddr; + __be32 imsf_interface; + __u32 imsf_fmode; + __u32 imsf_numsrc; + __be32 imsf_slist_flex[]; + }; + }; }; #define IP_MSFILTER_SIZE(numsrc) \ -- cgit v1.2.3 From 06447ae5e33bfbc5a777cc06d9854a31f3912833 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 14 Jul 2021 21:56:55 +0200 Subject: ioprio: move user space relevant ioprio bits to UAPI includes systemd added a modified copy of include/linux/ioprio.h into its code to get the relevant content definitions for the exposed ioprio_[get|set] system calls. Move the user space relevant ioprio bits to the UAPI includes to be able to use the ioprio_[get|set] syscalls as intended. Cc: Kay Sievers Cc: Greg Kroah-Hartman Cc: Jens Axboe Cc: linux-block@vger.kernel.org Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20210714195655.181943-1-socketcan@hartkopp.net Signed-off-by: Jens Axboe --- include/uapi/linux/ioprio.h | 46 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 include/uapi/linux/ioprio.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h new file mode 100644 index 000000000000..77b17e08b0da --- /dev/null +++ b/include/uapi/linux/ioprio.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_IOPRIO_H +#define _UAPI_LINUX_IOPRIO_H + +/* + * Gives us 8 prio classes with 13-bits of data for each class + */ +#define IOPRIO_CLASS_SHIFT (13) +#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) + +#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT) +#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) +#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) + +/* + * These are the io priority groups as implemented by CFQ. RT is the realtime + * class, it always gets premium service. BE is the best-effort scheduling + * class, the default for any process. IDLE is the idle scheduling class, it + * is only served when no one else is using the disk. + */ +enum { + IOPRIO_CLASS_NONE, + IOPRIO_CLASS_RT, + IOPRIO_CLASS_BE, + IOPRIO_CLASS_IDLE, +}; + +#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE) + +/* + * 8 best effort priority levels are supported + */ +#define IOPRIO_BE_NR (8) + +enum { + IOPRIO_WHO_PROCESS = 1, + IOPRIO_WHO_PGRP, + IOPRIO_WHO_USER, +}; + +/* + * Fallback BE priority + */ +#define IOPRIO_NORM (4) + +#endif /* _UAPI_LINUX_IOPRIO_H */ -- cgit v1.2.3 From 7957d93bf32bc211415827e44fdd9cdf1388df59 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 13 Jul 2021 01:05:27 +0200 Subject: block: add ioctl to read the disk sequence number Add a new BLKGETDISKSEQ ioctl which retrieves the disk sequence number from the genhd structure. # ./getdiskseq /dev/loop* /dev/loop0: 13 /dev/loop0p1: 13 /dev/loop0p2: 13 /dev/loop0p3: 13 /dev/loop1: 14 /dev/loop1p1: 14 /dev/loop1p2: 14 /dev/loop2: 5 /dev/loop3: 6 Reviewed-by: Christoph Hellwig Signed-off-by: Matteo Croce Tested-by: Luca Boccassi Link: https://lore.kernel.org/r/20210712230530.29323-4-mcroce@linux.microsoft.com Signed-off-by: Jens Axboe --- include/uapi/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 4c32e97dcdf0..bdf7b404b3e7 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -184,6 +184,7 @@ struct fsxattr { #define BLKSECDISCARD _IO(0x12,125) #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) +#define BLKGETDISKSEQ _IOR(0x12,128,__u64) /* * A jump here: 130-136 are reserved for zoned block devices * (see uapi/linux/blkzoned.h) -- cgit v1.2.3 From 3a755cd8b7c601f756cbbf908b84f7cc8c04a02b Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 2 Aug 2021 11:02:19 +0800 Subject: bonding: add new option lacp_active Add an option lacp_active, which is similar with team's runner.active. This option specifies whether to send LACPDU frames periodically. If set on, the LACPDU frames are sent along with the configured lacp_rate setting. If set off, the LACPDU frames acts as "speak when spoken to". Note, the LACPDU state frames still will be sent when init or unbind port. v2: remove module parameter Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 49b22afab78f..5310003523ce 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -855,6 +855,7 @@ enum { IFLA_BOND_AD_ACTOR_SYSTEM, IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, + IFLA_BOND_AD_LACP_ACTIVE, __IFLA_BOND_MAX, }; -- cgit v1.2.3 From 018c14911dd7e2feedd96d440f12ea999e459fff Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Tue, 13 Jul 2021 19:50:21 +0200 Subject: scsi: target: tcmu: Add new feature KEEP_BUF When running command pipelining for WRITE direction commands (e.g. tape device write), userspace sends cmd completion to cmd ring before processing write data. In that case userspace has to copy data before sending completion, because cmd completion also implicitly releases the data buffer in data area. The new feature KEEP_BUF allows userspace to optionally keep the buffer after completion by setting new bit TCMU_UFLAG_KEEP_BUF in tcmu_cmd_entry_hdr->uflags. In that case buffer has to be released explicitly by writing the cmd_id to new action item free_kept_buf. All kept buffers are released during reset_ring and if userspace closes uio device (tcmu_release). Link: https://lore.kernel.org/r/20210713175021.20103-1-bostroesser@gmail.com Reviewed-by: Mike Christie Signed-off-by: Bodo Stroesser Signed-off-by: Martin K. Petersen --- include/uapi/linux/target_core_user.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index 95b1597f16ae..27ace512babd 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -46,6 +46,7 @@ #define TCMU_MAILBOX_FLAG_CAP_OOOC (1 << 0) /* Out-of-order completions */ #define TCMU_MAILBOX_FLAG_CAP_READ_LEN (1 << 1) /* Read data length */ #define TCMU_MAILBOX_FLAG_CAP_TMR (1 << 2) /* TMR notifications */ +#define TCMU_MAILBOX_FLAG_CAP_KEEP_BUF (1<<3) /* Keep buf after cmd completion */ struct tcmu_mailbox { __u16 version; @@ -75,6 +76,7 @@ struct tcmu_cmd_entry_hdr { __u8 kflags; #define TCMU_UFLAG_UNKNOWN_OP 0x1 #define TCMU_UFLAG_READ_LEN 0x2 +#define TCMU_UFLAG_KEEP_BUF 0x4 __u8 uflags; } __packed; -- cgit v1.2.3 From 5b9272e93f2efe3f6cda60cc2c26817b2ce49386 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 7 Jul 2021 11:48:54 +0200 Subject: can: j1939: extend UAPI to notify about RX status To be able to create applications with user friendly feedback, we need be able to provide receive status information. Typical ETP transfer may take seconds or even hours. To give user some clue or show a progress bar, the stack should push status updates. Same as for the TX information, the socket error queue will be used with following new signals: - J1939_EE_INFO_RX_RTS - received and accepted request to send signal. - J1939_EE_INFO_RX_DPO - received data package offset signal - J1939_EE_INFO_RX_ABORT - RX session was aborted Instead of completion signal, user will get data package. To activate this signals, application should set SOF_TIMESTAMPING_RX_SOFTWARE to the SO_TIMESTAMPING socket option. This will avoid unpredictable application behavior for the old software. Link: https://lore.kernel.org/r/20210707094854.30781-3-o.rempel@pengutronix.de Signed-off-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/j1939.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/j1939.h b/include/uapi/linux/can/j1939.h index df6e821075c1..38936460f668 100644 --- a/include/uapi/linux/can/j1939.h +++ b/include/uapi/linux/can/j1939.h @@ -78,11 +78,20 @@ enum { enum { J1939_NLA_PAD, J1939_NLA_BYTES_ACKED, + J1939_NLA_TOTAL_SIZE, + J1939_NLA_PGN, + J1939_NLA_SRC_NAME, + J1939_NLA_DEST_NAME, + J1939_NLA_SRC_ADDR, + J1939_NLA_DEST_ADDR, }; enum { J1939_EE_INFO_NONE, J1939_EE_INFO_TX_ABORT, + J1939_EE_INFO_RX_RTS, + J1939_EE_INFO_RX_DPO, + J1939_EE_INFO_RX_ABORT, }; struct j1939_filter { -- cgit v1.2.3 From 04190bf8944deb7e3ac165a1a494db23aa0160a9 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Wed, 4 Aug 2021 10:55:56 +0300 Subject: sock: allow reading and changing sk_userlocks with setsockopt SOCK_SNDBUF_LOCK and SOCK_RCVBUF_LOCK flags disable automatic socket buffers adjustment done by kernel (see tcp_fixup_rcvbuf() and tcp_sndbuf_expand()). If we've just created a new socket this adjustment is enabled on it, but if one changes the socket buffer size by setsockopt(SO_{SND,RCV}BUF*) it becomes disabled. CRIU needs to call setsockopt(SO_{SND,RCV}BUF*) on each socket on restore as it first needs to increase buffer sizes for packet queues restore and second it needs to restore back original buffer sizes. So after CRIU restore all sockets become non-auto-adjustable, which can decrease network performance of restored applications significantly. CRIU need to be able to restore sockets with enabled/disabled adjustment to the same state it was before dump, so let's add special setsockopt for it. Let's also export SOCK_SNDBUF_LOCK and SOCK_RCVBUF_LOCK flags to uAPI so that using these interface one can reenable automatic socket buffer adjustment on their sockets. Signed-off-by: Pavel Tikhomirov Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/socket.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index c3409c8ec0dd..eb0a9a5b6e71 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -26,4 +26,9 @@ struct __kernel_sockaddr_storage { }; }; +#define SOCK_SNDBUF_LOCK 1 +#define SOCK_RCVBUF_LOCK 2 + +#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) + #endif /* _UAPI_LINUX_SOCKET_H */ -- cgit v1.2.3 From 9d5adeecc409365e45cd89657f0392d0dd5c217f Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Tue, 22 Jun 2021 13:39:57 +0200 Subject: media: v4l2-ctrls: Add intra-refresh period control Add a control to set intra-refresh period. Acked-by: Hans Verkuil Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index fdf97a6d7d18..5532b5f68493 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -435,6 +435,7 @@ enum v4l2_mpeg_video_multi_slice_mode { #define V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX (V4L2_CID_CODEC_BASE+233) #define V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES (V4L2_CID_CODEC_BASE+234) #define V4L2_CID_MPEG_VIDEO_DEC_CONCEAL_COLOR (V4L2_CID_CODEC_BASE+235) +#define V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD (V4L2_CID_CODEC_BASE+236) /* CIDs for the MPEG-2 Part 2 (H.262) codec */ #define V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL (V4L2_CID_CODEC_BASE+270) -- cgit v1.2.3 From 46abe13b5e3db187e52cd0de06c07bbce010726c Mon Sep 17 00:00:00 2001 From: Igor Skalkin Date: Tue, 3 Aug 2021 14:10:24 +0100 Subject: firmware: arm_scmi: Add virtio transport This transport enables communications with an SCMI platform through virtio; the SCMI platform will be represented by a virtio device. Implement an SCMI virtio driver according to the virtio SCMI device spec [1]. Virtio device id 32 has been reserved for the SCMI device [2]. The virtio transport has one Tx channel (virtio cmdq, A2P channel) and at most one Rx channel (virtio eventq, P2A channel). The following feature bit defined in [1] is not implemented: VIRTIO_SCMI_F_SHARED_MEMORY. The number of messages which can be pending simultaneously is restricted according to the virtqueue capacity negotiated at probing time. As soon as Rx channel message buffers are allocated or have been read out by the arm-scmi driver, feed them back to the virtio device. Since some virtio devices may not have the short response time exhibited by SCMI platforms using other transports, set a generous response timeout. SCMI polling mode is not supported by this virtio transport since deemed meaningless: polling mode operation is offered by the SCMI core to those transports that could not provide a completion interrupt on the TX path, which is never the case for virtio whose core callbacks can easily call into core scmi_rx_callback upon messages reception. [1] https://github.com/oasis-tcs/virtio-spec/blob/master/virtio-scmi.tex [2] https://www.oasis-open.org/committees/ballot.php?id=3496 Link: https://lore.kernel.org/r/20210803131024.40280-16-cristian.marussi@arm.com Cc: "Michael S. Tsirkin" Cc: Jason Wang Co-developed-by: Peter Hilber Co-developed-by: Cristian Marussi Signed-off-by: Igor Skalkin [ Peter: Adapted patch for submission to upstream. ] Signed-off-by: Peter Hilber [ Cristian: simplified driver logic, changed link_supplier and channel available/setup logic, removed dummy callbacks ] Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/uapi/linux/virtio_ids.h | 1 + include/uapi/linux/virtio_scmi.h | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 include/uapi/linux/virtio_scmi.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 70a8057ad4bb..f74155f6882d 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -55,6 +55,7 @@ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ +#define VIRTIO_ID_SCMI 32 /* virtio SCMI */ #define VIRTIO_ID_BT 40 /* virtio bluetooth */ /* diff --git a/include/uapi/linux/virtio_scmi.h b/include/uapi/linux/virtio_scmi.h new file mode 100644 index 000000000000..f8ddd04a3ace --- /dev/null +++ b/include/uapi/linux/virtio_scmi.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* + * Copyright (C) 2020-2021 OpenSynergy GmbH + * Copyright (C) 2021 ARM Ltd. + */ + +#ifndef _UAPI_LINUX_VIRTIO_SCMI_H +#define _UAPI_LINUX_VIRTIO_SCMI_H + +#include + +/* Device implements some SCMI notifications, or delayed responses. */ +#define VIRTIO_SCMI_F_P2A_CHANNELS 0 + +/* Device implements any SCMI statistics shared memory region */ +#define VIRTIO_SCMI_F_SHARED_MEMORY 1 + +/* Virtqueues */ + +#define VIRTIO_SCMI_VQ_TX 0 /* cmdq */ +#define VIRTIO_SCMI_VQ_RX 1 /* eventq */ +#define VIRTIO_SCMI_VQ_MAX_CNT 2 + +#endif /* _UAPI_LINUX_VIRTIO_SCMI_H */ -- cgit v1.2.3 From db243b796439c0caba47865564d8acd18a301d18 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 4 Aug 2021 15:45:36 -0500 Subject: net/ipv4/ipv6: Replace one-element arraya with flexible-array members MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. Use an anonymous union with a couple of anonymous structs in order to keep userspace unchanged and refactor the related code accordingly: $ pahole -C group_filter net/ipv4/ip_sockglue.o struct group_filter { union { struct { __u32 gf_interface_aux; /* 0 4 */ /* XXX 4 bytes hole, try to pack */ struct __kernel_sockaddr_storage gf_group_aux; /* 8 128 */ /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */ __u32 gf_fmode_aux; /* 136 4 */ __u32 gf_numsrc_aux; /* 140 4 */ struct __kernel_sockaddr_storage gf_slist[1]; /* 144 128 */ }; /* 0 272 */ struct { __u32 gf_interface; /* 0 4 */ /* XXX 4 bytes hole, try to pack */ struct __kernel_sockaddr_storage gf_group; /* 8 128 */ /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */ __u32 gf_fmode; /* 136 4 */ __u32 gf_numsrc; /* 140 4 */ struct __kernel_sockaddr_storage gf_slist_flex[0]; /* 144 0 */ }; /* 0 144 */ }; /* 0 272 */ /* size: 272, cachelines: 5, members: 1 */ /* last cacheline: 16 bytes */ }; $ pahole -C compat_group_filter net/ipv4/ip_sockglue.o struct compat_group_filter { union { struct { __u32 gf_interface_aux; /* 0 4 */ struct __kernel_sockaddr_storage gf_group_aux __attribute__((__aligned__(4))); /* 4 128 */ /* --- cacheline 2 boundary (128 bytes) was 4 bytes ago --- */ __u32 gf_fmode_aux; /* 132 4 */ __u32 gf_numsrc_aux; /* 136 4 */ struct __kernel_sockaddr_storage gf_slist[1] __attribute__((__aligned__(4))); /* 140 128 */ } __attribute__((__packed__)) __attribute__((__aligned__(4))); /* 0 268 */ struct { __u32 gf_interface; /* 0 4 */ struct __kernel_sockaddr_storage gf_group __attribute__((__aligned__(4))); /* 4 128 */ /* --- cacheline 2 boundary (128 bytes) was 4 bytes ago --- */ __u32 gf_fmode; /* 132 4 */ __u32 gf_numsrc; /* 136 4 */ struct __kernel_sockaddr_storage gf_slist_flex[0] __attribute__((__aligned__(4))); /* 140 0 */ } __attribute__((__packed__)) __attribute__((__aligned__(4))); /* 0 140 */ } __attribute__((__aligned__(1))); /* 0 268 */ /* size: 268, cachelines: 5, members: 1 */ /* forced alignments: 1 */ /* last cacheline: 12 bytes */ } __attribute__((__packed__)); This helps with the ongoing efforts to globally enable -Warray-bounds and get us closer to being able to tighten the FORTIFY_SOURCE routines on memcpy(). [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.10/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/79 Link: https://github.com/KSPP/linux/issues/109 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 193b7cf1f0ac..14168225cecd 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -222,11 +222,22 @@ struct group_source_req { }; struct group_filter { - __u32 gf_interface; /* interface index */ - struct __kernel_sockaddr_storage gf_group; /* multicast address */ - __u32 gf_fmode; /* filter mode */ - __u32 gf_numsrc; /* number of sources */ - struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + union { + struct { + __u32 gf_interface_aux; /* interface index */ + struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */ + __u32 gf_fmode_aux; /* filter mode */ + __u32 gf_numsrc_aux; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + }; + struct { + __u32 gf_interface; /* interface index */ + struct __kernel_sockaddr_storage gf_group; /* multicast address */ + __u32 gf_fmode; /* filter mode */ + __u32 gf_numsrc; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */ + }; + }; }; #define GROUP_FILTER_SIZE(numsrc) \ -- cgit v1.2.3 From 9344988d2979ce9eefe136a69efcf692615ebba8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 30 Jul 2021 15:14:22 +0200 Subject: netfilter: ctnetlink: allow to filter dump by status bits If CTA_STATUS is present, but CTA_STATUS_MASK is not, then the mask is automatically set to 'status', so that kernel returns those entries that have all of the requested bits set. This makes more sense than using a all-one mask since we'd hardly ever find a match. There are no other checks for status bits, so if e.g. userspace sets impossible combinations it will get an empty dump. If kernel would reject unknown status bits, then a program that works on a future kernel that has IPS_FOO bit fails on old kernels. Same for 'impossible' combinations: Kernel never sets ASSURED without first having set SEEN_REPLY, but its possible that a future kernel could do so. Therefore no sanity tests other than a 0-mask. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index d8484be72fdc..c6e6d7d7d538 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -56,6 +56,7 @@ enum ctattr_type { CTA_LABELS_MASK, CTA_SYNPROXY, CTA_FILTER, + CTA_STATUS_MASK, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) -- cgit v1.2.3 From a6e57c4af12bbacf927d7321c3aa894948653688 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 3 Aug 2021 00:15:54 +0200 Subject: netfilter: nfnetlink_hook: missing chain family The family is relevant for pseudo-families like NFPROTO_INET otherwise the user needs to rely on the hook function name to differentiate it from NFPROTO_IPV4 and NFPROTO_IPV6 names. Add nfnl_hook_chain_desc_attributes instead of using the existing NFTA_CHAIN_* attributes, since these do not provide a family number. Fixes: e2cf17d3774c ("netfilter: add new hook nfnl subsystem") Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_hook.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_hook.h b/include/uapi/linux/netfilter/nfnetlink_hook.h index 912ec60b26b0..bbcd285b22e1 100644 --- a/include/uapi/linux/netfilter/nfnetlink_hook.h +++ b/include/uapi/linux/netfilter/nfnetlink_hook.h @@ -43,6 +43,15 @@ enum nfnl_hook_chain_info_attributes { }; #define NFNLA_HOOK_INFO_MAX (__NFNLA_HOOK_INFO_MAX - 1) +enum nfnl_hook_chain_desc_attributes { + NFNLA_CHAIN_UNSPEC, + NFNLA_CHAIN_TABLE, + NFNLA_CHAIN_FAMILY, + NFNLA_CHAIN_NAME, + __NFNLA_CHAIN_MAX, +}; +#define NFNLA_CHAIN_MAX (__NFNLA_CHAIN_MAX - 1) + /** * enum nfnl_hook_chaintype - chain type * -- cgit v1.2.3 From a43e2a0e11491b73e2acaa27ee74d6c3b86deac0 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 16 Jul 2021 22:46:21 -0400 Subject: drm/amdkfd: Allow querying SVM attributes that are clear Currently the SVM get_attr call allows querying, which flags are set in the entire address range. Add the opposite query, which flags are clear in the entire address range. Both queries can be combined in a single get_attr call, which allows answering questions such as, "is this address range coherent, non-coherent, or a mix of both"? Proposed userspace for UAPI: https://github.com/RadeonOpenCompute/ROCR-Runtime/tree/memory_model_queries Signed-off-by: Felix Kuehling Reviewed-by: Philip Yand Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 3cb5b5dd9f77..af96af174dc4 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -31,9 +31,10 @@ * - 1.3 - Add SMI events support * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API + * - 1.6 - Query clear flags in SVM get_attr API */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 5 +#define KFD_IOCTL_MINOR_VERSION 6 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -575,18 +576,19 @@ struct kfd_ioctl_svm_attribute { * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC or * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC resepctively. For * @KFD_IOCTL_SVM_ATTR_SET_FLAGS, flags of all pages will be - * aggregated by bitwise AND. The minimum migration granularity - * throughout the range will be returned for - * @KFD_IOCTL_SVM_ATTR_GRANULARITY. + * aggregated by bitwise AND. That means, a flag will be set in the + * output, if that flag is set for all pages in the range. For + * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS, flags of all pages will be + * aggregated by bitwise NOR. That means, a flag will be set in the + * output, if that flag is clear for all pages in the range. + * The minimum migration granularity throughout the range will be + * returned for @KFD_IOCTL_SVM_ATTR_GRANULARITY. * * Querying of accessibility attributes works by initializing the * attribute type to @KFD_IOCTL_SVM_ATTR_ACCESS and the value to the * GPUID being queried. Multiple attributes can be given to allow * querying multiple GPUIDs. The ioctl function overwrites the * attribute type to indicate the access for the specified GPU. - * - * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS is invalid for - * @KFD_IOCTL_SVM_OP_GET_ATTR. */ struct kfd_ioctl_svm_args { __u64 start_addr; -- cgit v1.2.3 From af579beb666aefb17e9a335c12c788c92932baf1 Mon Sep 17 00:00:00 2001 From: Matthew Bobrowski Date: Sun, 8 Aug 2021 15:26:25 +1000 Subject: fanotify: add pidfd support to the fanotify API Introduce a new flag FAN_REPORT_PIDFD for fanotify_init(2) which allows userspace applications to control whether a pidfd information record containing a pidfd is to be returned alongside the generic event metadata for each event. If FAN_REPORT_PIDFD is enabled for a notification group, an additional struct fanotify_event_info_pidfd object type will be supplied alongside the generic struct fanotify_event_metadata for a single event. This functionality is analogous to that of FAN_REPORT_FID in terms of how the event structure is supplied to a userspace application. Usage of FAN_REPORT_PIDFD with FAN_REPORT_FID/FAN_REPORT_DFID_NAME is permitted, and in this case a struct fanotify_event_info_pidfd object will likely follow any struct fanotify_event_info_fid object. Currently, the usage of the FAN_REPORT_TID flag is not permitted along with FAN_REPORT_PIDFD as the pidfd API currently only supports the creation of pidfds for thread-group leaders. Additionally, usage of the FAN_REPORT_PIDFD flag is limited to privileged processes only i.e. event listeners that are running with the CAP_SYS_ADMIN capability. Attempting to supply the FAN_REPORT_TID initialization flags with FAN_REPORT_PIDFD or creating a notification group without CAP_SYS_ADMIN will result with -EINVAL being returned to the caller. In the event of a pidfd creation error, there are two types of error values that can be reported back to the listener. There is FAN_NOPIDFD, which will be reported in cases where the process responsible for generating the event has terminated prior to the event listener being able to read the event. Then there is FAN_EPIDFD, which will be reported when a more generic pidfd creation error has occurred when fanotify calls pidfd_create(). Link: https://lore.kernel.org/r/5f9e09cff7ed62bfaa51c1369e0f7ea5f16a91aa.1628398044.git.repnop@google.com Signed-off-by: Matthew Bobrowski Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index fbf9c5c7dd59..64553df9d735 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -51,6 +51,7 @@ #define FAN_ENABLE_AUDIT 0x00000040 /* Flags to determine fanotify event format */ +#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */ #define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ #define FAN_REPORT_FID 0x00000200 /* Report unique file id */ #define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ @@ -123,6 +124,7 @@ struct fanotify_event_metadata { #define FAN_EVENT_INFO_TYPE_FID 1 #define FAN_EVENT_INFO_TYPE_DFID_NAME 2 #define FAN_EVENT_INFO_TYPE_DFID 3 +#define FAN_EVENT_INFO_TYPE_PIDFD 4 /* Variable length info record following event metadata */ struct fanotify_event_info_header { @@ -148,6 +150,15 @@ struct fanotify_event_info_fid { unsigned char handle[0]; }; +/* + * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD. + * It holds a pidfd for the pid that was responsible for generating an event. + */ +struct fanotify_event_info_pidfd { + struct fanotify_event_info_header hdr; + __s32 pidfd; +}; + struct fanotify_response { __s32 fd; __u32 response; @@ -160,6 +171,8 @@ struct fanotify_response { /* No fd set in event */ #define FAN_NOFD -1 +#define FAN_NOPIDFD FAN_NOFD +#define FAN_EPIDFD -2 /* Helper functions to deal with fanotify_event_metadata buffers */ #define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) -- cgit v1.2.3 From 91ccbbac1747eea155632a1c6bb100052309b215 Mon Sep 17 00:00:00 2001 From: Tushar Sugandhi Date: Mon, 12 Jul 2021 17:48:58 -0700 Subject: dm ima: measure data on table load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DM configures a block device with various target specific attributes passed to it as a table. DM loads the table, and calls each target’s respective constructors with the attributes as input parameters. Some of these attributes are critical to ensure the device meets certain security bar. Thus, IMA should measure these attributes, to ensure they are not tampered with, during the lifetime of the device. So that the external services can have high confidence in the configuration of the block-devices on a given system. Some devices may have large tables. And a given device may change its state (table-load, suspend, resume, rename, remove, table-clear etc.) many times. Measuring these attributes each time when the device changes its state will significantly increase the size of the IMA logs. Further, once configured, these attributes are not expected to change unless a new table is loaded, or a device is removed and recreated. Therefore the clear-text of the attributes should only be measured during table load, and the hash of the active/inactive table should be measured for the remaining device state changes. Export IMA function ima_measure_critical_data() to allow measurement of DM device parameters, as well as target specific attributes, during table load. Compute the hash of the inactive table and store it for measurements during future state change. If a load is called multiple times, update the inactive table hash with the hash of the latest populated table. So that the correct inactive table hash is measured when the device transitions to different states like resume, remove, rename, etc. Signed-off-by: Tushar Sugandhi Signed-off-by: Colin Ian King # leak fix Signed-off-by: Mike Snitzer --- include/uapi/linux/dm-ioctl.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index e5c6e458bdf7..c12ce30b52df 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -376,4 +376,10 @@ enum { */ #define DM_INTERNAL_SUSPEND_FLAG (1 << 18) /* Out */ +/* + * If set, returns in the in buffer passed by UM, the raw table information + * that would be measured by IMA subsystem on device state change. + */ +#define DM_IMA_MEASUREMENT_FLAG (1 << 19) /* In */ + #endif /* _LINUX_DM_IOCTL_H */ -- cgit v1.2.3 From 45a687879b31caae4032abd1c2402e289d2b8083 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 14:00:10 +0300 Subject: net: bridge: fix flags interpretation for extern learn fdb entries Ignore fdb flags when adding port extern learn entries and always set BR_FDB_LOCAL flag when adding bridge extern learn entries. This is closest to the behaviour we had before and avoids breaking any use cases which were allowed. This patch fixes iproute2 calls which assume NUD_PERMANENT and were allowed before, example: $ bridge fdb add 00:11:22:33:44:55 dev swp1 extern_learn Extern learn entries are allowed to roam, but do not expire, so static or dynamic flags make no sense for them. Also add a comment for future reference. Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space") Fixes: 0541a6293298 ("net: bridge: validate the NUD_PERMANENT bit when adding an extern_learn FDB entry") Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: Nikolay Aleksandrov Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210810110010.43859-1-razor@blackwall.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/neighbour.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index dc8b72201f6c..00a60695fa53 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -66,8 +66,11 @@ enum { #define NUD_NONE 0x00 /* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change - and make no address resolution or NUD. - NUD_PERMANENT also cannot be deleted by garbage collectors. + * and make no address resolution or NUD. + * NUD_PERMANENT also cannot be deleted by garbage collectors. + * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry + * states don't make sense and thus are ignored. Such entries don't age and + * can roam. */ struct nda_cacheinfo { -- cgit v1.2.3 From df271cd641f101decaa4f7c1dd5c62939900bd4c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:19 +0300 Subject: net: bridge: vlan: add support for mcast igmp/mld version global options Add support to change and retrieve global vlan IGMP/MLD versions. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 5aca85874447..5188b9f6da28 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -549,6 +549,8 @@ enum { BRIDGE_VLANDB_GOPTS_ID, BRIDGE_VLANDB_GOPTS_RANGE, BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING, + BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION, + BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 931ba87d2017f3869d656f3c705883549bfeb97f Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:20 +0300 Subject: net: bridge: vlan: add support for mcast last member count global option Add support to change and retrieve global vlan multicast last member count option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 5188b9f6da28..d7a150034376 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -551,6 +551,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING, BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION, BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION, + BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 50725f6e6b217e7661ca696b7cc1f1b9aa7bda84 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:21 +0300 Subject: net: bridge: vlan: add support for mcast startup query count global option Add support to change and retrieve global vlan multicast startup query count option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index d7a150034376..082b413e1342 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -552,6 +552,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION, BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION, BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT, + BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 77f6ababa299112092a264cac96bedf1a87015ef Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:22 +0300 Subject: net: bridge: vlan: add support for mcast last member interval global option Add support to change and retrieve global vlan multicast last member interval option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 082b413e1342..950ad175610e 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -553,6 +553,8 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION, BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT, + BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL, + BRIDGE_VLANDB_GOPTS_PAD, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 2da0aea21f1c40d003af6680551eaa5471103164 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:23 +0300 Subject: net: bridge: vlan: add support for mcast membership interval global option Add support to change and retrieve global vlan multicast membership interval option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 950ad175610e..93f1f16617c8 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -555,6 +555,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT, BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL, BRIDGE_VLANDB_GOPTS_PAD, + BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From cd9269d463107bc4a53a0965d90a57efeee9ae11 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:24 +0300 Subject: net: bridge: vlan: add support for mcast querier interval global option Add support to change and retrieve global vlan multicast querier interval option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 93f1f16617c8..fdc264c57009 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -556,6 +556,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL, BRIDGE_VLANDB_GOPTS_PAD, BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From d6c08aba4f29f606769939eb6156efceb7dbb790 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:25 +0300 Subject: net: bridge: vlan: add support for mcast query interval global option Add support to change and retrieve global vlan multicast query interval option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index fdc264c57009..1517aea738f4 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -557,6 +557,7 @@ enum { BRIDGE_VLANDB_GOPTS_PAD, BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 425214508b1bd3596edb31da8d9aedee30f2b4f5 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:26 +0300 Subject: net: bridge: vlan: add support for mcast query response interval global option Add support to change and retrieve global vlan multicast query response interval option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 1517aea738f4..2627a657c3b3 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -558,6 +558,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 941121ee22a69935252473f03976f1f1200b9ae9 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:27 +0300 Subject: net: bridge: vlan: add support for mcast startup query interval global option Add support to change and retrieve global vlan multicast startup query interval option. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 2627a657c3b3..b5d01538acd4 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -559,6 +559,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 62938182c35906c0ed4beb7845b93b8ffb937597 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:30 +0300 Subject: net: bridge: vlan: add support for mcast querier global option Add support to change and retrieve global vlan multicast querier state. We just need to pass multicast context to br_multicast_set_querier instead of bridge device and the rest of the logic remains the same. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index b5d01538acd4..03fd14a4e377 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -560,6 +560,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From a97df080b6a86c105f98052ca3a9d66149b461b3 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:31 +0300 Subject: net: bridge: vlan: add support for mcast router global option Add support to change and retrieve global vlan multicast router state which is used for the bridge itself. We just need to pass multicast context to br_multicast_set_router instead of bridge device and the rest of the logic remains the same. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 03fd14a4e377..2104dd3557b4 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -561,6 +561,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, + BRIDGE_VLANDB_GOPTS_MCAST_ROUTER, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From dc002875c22b56c795ec24dc987ac2dd2081588e Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 18:29:33 +0300 Subject: net: bridge: vlan: use br_rports_fill_info() to export mcast router ports Embed the standard multicast router port export by br_rports_fill_info() into a new global vlan attribute BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS. In order to have the same format for the global bridge mcast context and the per-vlan mcast context we need a double-nesting: - BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS - MDBA_ROUTER Currently we don't compare router lists, if any router port exists in the bridge mcast contexts we consider their option sets as different and export them separately. In addition we export the router port vlan id when dumping similar to the router port notification format. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 2104dd3557b4..620d86e825b8 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -562,6 +562,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, BRIDGE_VLANDB_GOPTS_MCAST_ROUTER, + BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 3d2a2544eae93987f0688c2d6ec06c76f9e1477b Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 25 Jun 2021 11:17:16 +0300 Subject: nl80211: vendor-cmd: add Intel vendor commands for iwlmei usage iwlmei allows to integrate with the CSME firmware. There are flows that are prioprietary for this purpose: * Get the information of the AP the CSME firmware is connected to. This is useful when we need to speed up the connection process in case the CSME firmware has a TCP connection that must be kept alive across the ownership transition. * Forbid roaming, which will happen when the CSME firmware wants to tell the user space not disrupt the connection. * Request ownership, upon driver boot when the CSME firmware owns the device. This is a notification sent by the kernel. All those commands are expected to be used by any software managing the connection (mainly NetworkManager). Those commands are expected to be used only in case the CSME firmware owns the device and doesn't want to release the device unless the host made sure that it can keep the connectivity. Here are the steps of the expected flow: 1) The machine boots while AMT has an active TCP connection 2) iwlwifi starts and tries to access the device 3) The device is not available because of the active TCP connection. (If there are no active connections, the CSME firmware would have allowed iwlwifi to use the device) Note that all the steps up to here don't involve iwlmei. All this happens in iwlwifi (in iwl_pcie_prepare_card_hw). 4) iwlmei establishes a connection to the CSME firmware (through SAP) Here iwlwifi uses iwlmei to access the device's capabilities (since it can't touch the device), but this is not relevant for the vendor commands. 5) The CSME firmware tells iwlmei that it uses the NIC and that there is an acitve TCP connection, and hence, the host needs to think twice before asking the CSME firmware to release the device 6) iwlmei tells iwlwifi to report HW RFKILL with a special reason Up to here, there was no user space involved. 7) The user space (NetworkManager) boots and sees that the device is in RFKILL because the host doesn't own the device 8) The user space asks the kernel what AP the CSME firmware is connected to (with the first vendor command mentionned above) 9) The user space checks if it has a profile that matches the reply from the CSME firmware 10) The user space installs a network to the wpa_supplicant with a specific BSSID and a specific frequency 11) The user space prevents any type of full scan 12) The user space asks iwlmei to request ownership on the device (with the third vendor command) 13) iwlmei request ownership from the CSME firmware 14) The CSME firmware grants ownership 15) iwlmei tells iwlwifi to lift the RFKILL 16) RFKILL OFF is reported to userspace 17) The host boots the device, loads the firwmare, and connect to a specific BSSID without scanning including IP in less than 600ms (this is what I measured, of course it depends on many factors) 18) The host reports to the CSME firmware that there is a connection 19) The TCP connection is preserved and the host has now connectivity 20) Later, the TCP connection to the CSME firmware is terminated 21) The CSME firmware tells iwlmei that it is now free to do whatever it likes 22) iwlwifi sends the second vendor command to tell the user space that it can remove the special network configuration and pick any SSID / BSSID it likes. Co-Developed-by: Ayala Beker Signed-off-by: Emmanuel Grumbach Link: https://lore.kernel.org/r/20210625081717.7680-4-emmanuel.grumbach@intel.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211-vnd-intel.h | 77 ++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 include/uapi/linux/nl80211-vnd-intel.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211-vnd-intel.h b/include/uapi/linux/nl80211-vnd-intel.h new file mode 100644 index 000000000000..0bf177b84fd9 --- /dev/null +++ b/include/uapi/linux/nl80211-vnd-intel.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2012-2014, 2018-2021 Intel Corporation + * Copyright (C) 2013-2015 Intel Mobile Communications GmbH + * Copyright (C) 2016-2017 Intel Deutschland GmbH + */ +#ifndef __VENDOR_CMD_INTEL_H__ +#define __VENDOR_CMD_INTEL_H__ + +#define INTEL_OUI 0x001735 + +/** + * enum iwl_mvm_vendor_cmd - supported vendor commands + * @IWL_MVM_VENDOR_CMD_GET_CSME_CONN_INFO: reports CSME connection info. + * @IWL_MVM_VENDOR_CMD_HOST_GET_OWNERSHIP: asks for ownership on the device. + * @IWL_MVM_VENDOR_CMD_ROAMING_FORBIDDEN_EVENT: notifies if roaming is allowed. + * It contains a &IWL_MVM_VENDOR_ATTR_ROAMING_FORBIDDEN and a + * &IWL_MVM_VENDOR_ATTR_VIF_ADDR attributes. + */ + +enum iwl_mvm_vendor_cmd { + IWL_MVM_VENDOR_CMD_GET_CSME_CONN_INFO = 0x2d, + IWL_MVM_VENDOR_CMD_HOST_GET_OWNERSHIP = 0x30, + IWL_MVM_VENDOR_CMD_ROAMING_FORBIDDEN_EVENT = 0x32, +}; + +enum iwl_vendor_auth_akm_mode { + IWL_VENDOR_AUTH_OPEN, + IWL_VENDOR_AUTH_RSNA = 0x6, + IWL_VENDOR_AUTH_RSNA_PSK, + IWL_VENDOR_AUTH_SAE = 0x9, + IWL_VENDOR_AUTH_MAX, +}; + +/** + * enum iwl_mvm_vendor_attr - attributes used in vendor commands + * @__IWL_MVM_VENDOR_ATTR_INVALID: attribute 0 is invalid + * @IWL_MVM_VENDOR_ATTR_VIF_ADDR: interface MAC address + * @IWL_MVM_VENDOR_ATTR_ADDR: MAC address + * @IWL_MVM_VENDOR_ATTR_SSID: SSID (binary attribute, 0..32 octets) + * @IWL_MVM_VENDOR_ATTR_STA_CIPHER: the cipher to use for the station with the + * mac address specified in &IWL_MVM_VENDOR_ATTR_ADDR. + * @IWL_MVM_VENDOR_ATTR_ROAMING_FORBIDDEN: u8 attribute. Indicates whether + * roaming is forbidden or not. Value 1 means roaming is forbidden, + * 0 mean roaming is allowed. + * @IWL_MVM_VENDOR_ATTR_AUTH_MODE: u32 attribute. Authentication mode type + * as specified in &enum iwl_vendor_auth_akm_mode. + * @IWL_MVM_VENDOR_ATTR_CHANNEL_NUM: u8 attribute. Contains channel number. + * @IWL_MVM_VENDOR_ATTR_BAND: u8 attribute. + * 0 for 2.4 GHz band, 1 for 5.2GHz band and 2 for 6GHz band. + * @IWL_MVM_VENDOR_ATTR_COLLOC_CHANNEL: u32 attribute. Channel number of + * collocated AP. Relevant for 6GHz AP info. + * @IWL_MVM_VENDOR_ATTR_COLLOC_ADDR: MAC address of a collocated AP. + * Relevant for 6GHz AP info. + * + * @NUM_IWL_MVM_VENDOR_ATTR: number of vendor attributes + * @MAX_IWL_MVM_VENDOR_ATTR: highest vendor attribute number + + */ +enum iwl_mvm_vendor_attr { + __IWL_MVM_VENDOR_ATTR_INVALID = 0x00, + IWL_MVM_VENDOR_ATTR_VIF_ADDR = 0x02, + IWL_MVM_VENDOR_ATTR_ADDR = 0x0a, + IWL_MVM_VENDOR_ATTR_SSID = 0x3d, + IWL_MVM_VENDOR_ATTR_STA_CIPHER = 0x51, + IWL_MVM_VENDOR_ATTR_ROAMING_FORBIDDEN = 0x64, + IWL_MVM_VENDOR_ATTR_AUTH_MODE = 0x65, + IWL_MVM_VENDOR_ATTR_CHANNEL_NUM = 0x66, + IWL_MVM_VENDOR_ATTR_BAND = 0x69, + IWL_MVM_VENDOR_ATTR_COLLOC_CHANNEL = 0x70, + IWL_MVM_VENDOR_ATTR_COLLOC_ADDR = 0x71, + + NUM_IWL_MVM_VENDOR_ATTR, + MAX_IWL_MVM_VENDOR_ATTR = NUM_IWL_MVM_VENDOR_ATTR - 1, +}; + +#endif /* __VENDOR_CMD_INTEL_H__ */ -- cgit v1.2.3 From c7fa1d9b1fb179375e889ff076a1566ecc997bfc Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 13 Aug 2021 18:00:00 +0300 Subject: net: bridge: mcast: dump ipv4 querier state Add support for dumping global IPv4 querier state, we dump the state only if our own querier is enabled or there has been another external querier which has won the election. For the bridge global state we use a new attribute IFLA_BR_MCAST_QUERIER_STATE and embed the state inside. The structure is: [IFLA_BR_MCAST_QUERIER_STATE] `[BRIDGE_QUERIER_IP_ADDRESS] - ip address of the querier `[BRIDGE_QUERIER_IP_PORT] - bridge port ifindex where the querier was seen (set only if external querier) `[BRIDGE_QUERIER_IP_OTHER_TIMER] - other querier timeout Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 10 ++++++++++ include/uapi/linux/if_link.h | 1 + 2 files changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 620d86e825b8..e0fff67fcd88 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -770,4 +770,14 @@ struct br_boolopt_multi { __u32 optval; __u32 optmask; }; + +enum { + BRIDGE_QUERIER_UNSPEC, + BRIDGE_QUERIER_IP_ADDRESS, + BRIDGE_QUERIER_IP_PORT, + BRIDGE_QUERIER_IP_OTHER_TIMER, + BRIDGE_QUERIER_PAD, + __BRIDGE_QUERIER_MAX +}; +#define BRIDGE_QUERIER_MAX (__BRIDGE_QUERIER_MAX - 1) #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5310003523ce..8aad65b69054 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -479,6 +479,7 @@ enum { IFLA_BR_MCAST_MLD_VERSION, IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, + IFLA_BR_MCAST_QUERIER_STATE, __IFLA_BR_MAX, }; -- cgit v1.2.3 From 85b4108211742c5dd4f9f56c1d0704b4e0d4c98e Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 13 Aug 2021 18:00:01 +0300 Subject: net: bridge: mcast: dump ipv6 querier state Add support for dumping global IPv6 querier state, we dump the state only if our own querier is enabled or there has been another external querier which has won the election. For the bridge global state we use a new attribute IFLA_BR_MCAST_QUERIER_STATE and embed the state inside. The structure is: [IFLA_BR_MCAST_QUERIER_STATE] `[BRIDGE_QUERIER_IPV6_ADDRESS] - ip address of the querier `[BRIDGE_QUERIER_IPV6_PORT] - bridge port ifindex where the querier was seen (set only if external querier) `[BRIDGE_QUERIER_IPV6_OTHER_TIMER] - other querier timeout IPv4 and IPv6 attributes are embedded at the same level of IFLA_BR_MCAST_QUERIER_STATE. If we didn't dump anything we cancel the nest and return. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index e0fff67fcd88..eceaad200bf6 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -777,6 +777,9 @@ enum { BRIDGE_QUERIER_IP_PORT, BRIDGE_QUERIER_IP_OTHER_TIMER, BRIDGE_QUERIER_PAD, + BRIDGE_QUERIER_IPV6_ADDRESS, + BRIDGE_QUERIER_IPV6_PORT, + BRIDGE_QUERIER_IPV6_OTHER_TIMER, __BRIDGE_QUERIER_MAX }; #define BRIDGE_QUERIER_MAX (__BRIDGE_QUERIER_MAX - 1) -- cgit v1.2.3 From ddc649d158c560c6685be1701900a6e456ecceac Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 13 Aug 2021 18:00:02 +0300 Subject: net: bridge: vlan: dump mcast ctx querier state Use the new mcast querier state dump infrastructure and export vlans' mcast context querier state embedded in attribute BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index eceaad200bf6..f71a81fdbbc6 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -563,6 +563,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, BRIDGE_VLANDB_GOPTS_MCAST_ROUTER, BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 6c9b40844751ea30c72f7a2f92f4d704bc6b2927 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Fri, 13 Aug 2021 20:08:02 +0800 Subject: net: Remove net/ipx.h and uapi/linux/ipx.h header files commit <47595e32869f> ("") indicated the ipx network layer as obsolete in Jan 2018, updated in the MAINTAINERS file now, after being exposed for 3 years to refactoring, so to delete uapi/linux/ipx.h and net/ipx.h header files for good. additionally, there is no module that depends on ipx.h except a broken staging driver(r8188eu) Signed-off-by: Cai Huoqing Signed-off-by: David S. Miller --- include/uapi/linux/ipx.h | 87 ------------------------------------------------ 1 file changed, 87 deletions(-) delete mode 100644 include/uapi/linux/ipx.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ipx.h b/include/uapi/linux/ipx.h deleted file mode 100644 index 3168137adae8..000000000000 --- a/include/uapi/linux/ipx.h +++ /dev/null @@ -1,87 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _IPX_H_ -#define _IPX_H_ -#include /* for compatibility with glibc netipx/ipx.h */ -#include -#include -#include -#define IPX_NODE_LEN 6 -#define IPX_MTU 576 - -#if __UAPI_DEF_SOCKADDR_IPX -struct sockaddr_ipx { - __kernel_sa_family_t sipx_family; - __be16 sipx_port; - __be32 sipx_network; - unsigned char sipx_node[IPX_NODE_LEN]; - __u8 sipx_type; - unsigned char sipx_zero; /* 16 byte fill */ -}; -#endif /* __UAPI_DEF_SOCKADDR_IPX */ - -/* - * So we can fit the extra info for SIOCSIFADDR into the address nicely - */ -#define sipx_special sipx_port -#define sipx_action sipx_zero -#define IPX_DLTITF 0 -#define IPX_CRTITF 1 - -#if __UAPI_DEF_IPX_ROUTE_DEFINITION -struct ipx_route_definition { - __be32 ipx_network; - __be32 ipx_router_network; - unsigned char ipx_router_node[IPX_NODE_LEN]; -}; -#endif /* __UAPI_DEF_IPX_ROUTE_DEFINITION */ - -#if __UAPI_DEF_IPX_INTERFACE_DEFINITION -struct ipx_interface_definition { - __be32 ipx_network; - unsigned char ipx_device[16]; - unsigned char ipx_dlink_type; -#define IPX_FRAME_NONE 0 -#define IPX_FRAME_SNAP 1 -#define IPX_FRAME_8022 2 -#define IPX_FRAME_ETHERII 3 -#define IPX_FRAME_8023 4 -#define IPX_FRAME_TR_8022 5 /* obsolete */ - unsigned char ipx_special; -#define IPX_SPECIAL_NONE 0 -#define IPX_PRIMARY 1 -#define IPX_INTERNAL 2 - unsigned char ipx_node[IPX_NODE_LEN]; -}; -#endif /* __UAPI_DEF_IPX_INTERFACE_DEFINITION */ - -#if __UAPI_DEF_IPX_CONFIG_DATA -struct ipx_config_data { - unsigned char ipxcfg_auto_select_primary; - unsigned char ipxcfg_auto_create_interfaces; -}; -#endif /* __UAPI_DEF_IPX_CONFIG_DATA */ - -/* - * OLD Route Definition for backward compatibility. - */ - -#if __UAPI_DEF_IPX_ROUTE_DEF -struct ipx_route_def { - __be32 ipx_network; - __be32 ipx_router_network; -#define IPX_ROUTE_NO_ROUTER 0 - unsigned char ipx_router_node[IPX_NODE_LEN]; - unsigned char ipx_device[16]; - unsigned short ipx_flags; -#define IPX_RT_SNAP 8 -#define IPX_RT_8022 4 -#define IPX_RT_BLUEBOOK 2 -#define IPX_RT_ROUTED 1 -}; -#endif /* __UAPI_DEF_IPX_ROUTE_DEF */ - -#define SIOCAIPXITFCRT (SIOCPROTOPRIVATE) -#define SIOCAIPXPRISLT (SIOCPROTOPRIVATE + 1) -#define SIOCIPXCFGDATA (SIOCPROTOPRIVATE + 2) -#define SIOCIPXNCPCONN (SIOCPROTOPRIVATE + 3) -#endif /* _IPX_H_ */ -- cgit v1.2.3 From 9ea9b9c48387edc101d56349492ad9c0492ff78d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 12 Aug 2021 15:23:08 +0200 Subject: remove the lightnvm subsystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lightnvm supports the OCSSD 1.x and 2.0 specs which were early attempts to produce Open Channel SSDs and never made it into the NVMe spec proper. They have since been superceeded by NVMe enhancements such as ZNS support. Remove the support per the deprecation schedule. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210812132308.38486-1-hch@lst.de Reviewed-by: Matias Bjørling Reviewed-by: Javier González Signed-off-by: Jens Axboe --- include/uapi/linux/lightnvm.h | 224 ------------------------------------------ 1 file changed, 224 deletions(-) delete mode 100644 include/uapi/linux/lightnvm.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h deleted file mode 100644 index 2745afd9b8fa..000000000000 --- a/include/uapi/linux/lightnvm.h +++ /dev/null @@ -1,224 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2015 CNEX Labs. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -#ifndef _UAPI_LINUX_LIGHTNVM_H -#define _UAPI_LINUX_LIGHTNVM_H - -#ifdef __KERNEL__ -#include -#else /* __KERNEL__ */ -#include -#include -#define DISK_NAME_LEN 32 -#endif /* __KERNEL__ */ - -#include -#include - -#define NVM_TTYPE_NAME_MAX 48 -#define NVM_TTYPE_MAX 63 -#define NVM_MMTYPE_LEN 8 - -#define NVM_CTRL_FILE "/dev/lightnvm/control" - -struct nvm_ioctl_info_tgt { - __u32 version[3]; - __u32 reserved; - char tgtname[NVM_TTYPE_NAME_MAX]; -}; - -struct nvm_ioctl_info { - __u32 version[3]; /* in/out - major, minor, patch */ - __u16 tgtsize; /* number of targets */ - __u16 reserved16; /* pad to 4K page */ - __u32 reserved[12]; - struct nvm_ioctl_info_tgt tgts[NVM_TTYPE_MAX]; -}; - -enum { - NVM_DEVICE_ACTIVE = 1 << 0, -}; - -struct nvm_ioctl_device_info { - char devname[DISK_NAME_LEN]; - char bmname[NVM_TTYPE_NAME_MAX]; - __u32 bmversion[3]; - __u32 flags; - __u32 reserved[8]; -}; - -struct nvm_ioctl_get_devices { - __u32 nr_devices; - __u32 reserved[31]; - struct nvm_ioctl_device_info info[31]; -}; - -struct nvm_ioctl_create_simple { - __u32 lun_begin; - __u32 lun_end; -}; - -struct nvm_ioctl_create_extended { - __u16 lun_begin; - __u16 lun_end; - __u16 op; - __u16 rsv; -}; - -enum { - NVM_CONFIG_TYPE_SIMPLE = 0, - NVM_CONFIG_TYPE_EXTENDED = 1, -}; - -struct nvm_ioctl_create_conf { - __u32 type; - union { - struct nvm_ioctl_create_simple s; - struct nvm_ioctl_create_extended e; - }; -}; - -enum { - NVM_TARGET_FACTORY = 1 << 0, /* Init target in factory mode */ -}; - -struct nvm_ioctl_create { - char dev[DISK_NAME_LEN]; /* open-channel SSD device */ - char tgttype[NVM_TTYPE_NAME_MAX]; /* target type name */ - char tgtname[DISK_NAME_LEN]; /* dev to expose target as */ - - __u32 flags; - - struct nvm_ioctl_create_conf conf; -}; - -struct nvm_ioctl_remove { - char tgtname[DISK_NAME_LEN]; - - __u32 flags; -}; - -struct nvm_ioctl_dev_init { - char dev[DISK_NAME_LEN]; /* open-channel SSD device */ - char mmtype[NVM_MMTYPE_LEN]; /* register to media manager */ - - __u32 flags; -}; - -enum { - NVM_FACTORY_ERASE_ONLY_USER = 1 << 0, /* erase only blocks used as - * host blks or grown blks */ - NVM_FACTORY_RESET_HOST_BLKS = 1 << 1, /* remove host blk marks */ - NVM_FACTORY_RESET_GRWN_BBLKS = 1 << 2, /* remove grown blk marks */ - NVM_FACTORY_NR_BITS = 1 << 3, /* stops here */ -}; - -struct nvm_ioctl_dev_factory { - char dev[DISK_NAME_LEN]; - - __u32 flags; -}; - -struct nvm_user_vio { - __u8 opcode; - __u8 flags; - __u16 control; - __u16 nppas; - __u16 rsvd; - __u64 metadata; - __u64 addr; - __u64 ppa_list; - __u32 metadata_len; - __u32 data_len; - __u64 status; - __u32 result; - __u32 rsvd3[3]; -}; - -struct nvm_passthru_vio { - __u8 opcode; - __u8 flags; - __u8 rsvd[2]; - __u32 nsid; - __u32 cdw2; - __u32 cdw3; - __u64 metadata; - __u64 addr; - __u32 metadata_len; - __u32 data_len; - __u64 ppa_list; - __u16 nppas; - __u16 control; - __u32 cdw13; - __u32 cdw14; - __u32 cdw15; - __u64 status; - __u32 result; - __u32 timeout_ms; -}; - -/* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */ -enum { - /* top level cmds */ - NVM_INFO_CMD = 0x20, - NVM_GET_DEVICES_CMD, - - /* device level cmds */ - NVM_DEV_CREATE_CMD, - NVM_DEV_REMOVE_CMD, - - /* Init a device to support LightNVM media managers */ - NVM_DEV_INIT_CMD, - - /* Factory reset device */ - NVM_DEV_FACTORY_CMD, - - /* Vector user I/O */ - NVM_DEV_VIO_ADMIN_CMD = 0x41, - NVM_DEV_VIO_CMD = 0x42, - NVM_DEV_VIO_USER_CMD = 0x43, -}; - -#define NVM_IOCTL 'L' /* 0x4c */ - -#define NVM_INFO _IOWR(NVM_IOCTL, NVM_INFO_CMD, \ - struct nvm_ioctl_info) -#define NVM_GET_DEVICES _IOR(NVM_IOCTL, NVM_GET_DEVICES_CMD, \ - struct nvm_ioctl_get_devices) -#define NVM_DEV_CREATE _IOW(NVM_IOCTL, NVM_DEV_CREATE_CMD, \ - struct nvm_ioctl_create) -#define NVM_DEV_REMOVE _IOW(NVM_IOCTL, NVM_DEV_REMOVE_CMD, \ - struct nvm_ioctl_remove) -#define NVM_DEV_INIT _IOW(NVM_IOCTL, NVM_DEV_INIT_CMD, \ - struct nvm_ioctl_dev_init) -#define NVM_DEV_FACTORY _IOW(NVM_IOCTL, NVM_DEV_FACTORY_CMD, \ - struct nvm_ioctl_dev_factory) - -#define NVME_NVM_IOCTL_IO_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_USER_CMD, \ - struct nvm_passthru_vio) -#define NVME_NVM_IOCTL_ADMIN_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_ADMIN_CMD,\ - struct nvm_passthru_vio) -#define NVME_NVM_IOCTL_SUBMIT_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_CMD,\ - struct nvm_user_vio) - -#define NVM_VERSION_MAJOR 1 -#define NVM_VERSION_MINOR 0 -#define NVM_VERSION_PATCHLEVEL 0 - -#endif -- cgit v1.2.3 From 5b4ecc3d4c4aab8d002fe6358885c10e7b57e432 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Mon, 16 Aug 2021 10:15:27 +0800 Subject: ethtool: add two link extended substates of bad signal integrity ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST means the input external clock signal for SerDes is too weak or lost. ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS means the received signal for SerDes is too weak because analog loss of signal. Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 67aa7134b301..b6db6590baf0 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -639,6 +639,8 @@ enum ethtool_link_ext_substate_link_logical_mismatch { enum ethtool_link_ext_substate_bad_signal_integrity { ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1, ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS, }; /* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */ -- cgit v1.2.3 From b89fbfbb854c9afc3047e8273cc3a694650b802e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 15 Aug 2021 00:05:57 -0700 Subject: bpf: Implement minimal BPF perf link Introduce a new type of BPF link - BPF perf link. This brings perf_event-based BPF program attachments (perf_event, tracepoints, kprobes, and uprobes) into the common BPF link infrastructure, allowing to list all active perf_event based attachments, auto-detaching BPF program from perf_event when link's FD is closed, get generic BPF link fdinfo/get_info functionality. BPF_LINK_CREATE command expects perf_event's FD as target_fd. No extra flags are currently supported. Force-detaching and atomic BPF program updates are not yet implemented, but with perf_event-based BPF links we now have common framework for this without the need to extend ioctl()-based perf_event interface. One interesting consideration is a new value for bpf_attach_type, which BPF_LINK_CREATE command expects. Generally, it's either 1-to-1 mapping from bpf_attach_type to bpf_prog_type, or many-to-1 mapping from a subset of bpf_attach_types to one bpf_prog_type (e.g., see BPF_PROG_TYPE_SK_SKB or BPF_PROG_TYPE_CGROUP_SOCK). In this case, though, we have three different program types (KPROBE, TRACEPOINT, PERF_EVENT) using the same perf_event-based mechanism, so it's many bpf_prog_types to one bpf_attach_type. I chose to define a single BPF_PERF_EVENT attach type for all of them and adjust link_create()'s logic for checking correspondence between attach type and program type. The alternative would be to define three new attach types (e.g., BPF_KPROBE, BPF_TRACEPOINT, and BPF_PERF_EVENT), but that seemed like unnecessary overkill and BPF_KPROBE will cause naming conflicts with BPF_KPROBE() macro, defined by libbpf. I chose to not do this to avoid unnecessary proliferation of bpf_attach_type enum values and not have to deal with naming conflicts. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/bpf/20210815070609.987780-5-andrii@kernel.org --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2db6925e04f4..94fe8329b28f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -993,6 +993,7 @@ enum bpf_attach_type { BPF_SK_SKB_VERDICT, BPF_SK_REUSEPORT_SELECT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, + BPF_PERF_EVENT, __MAX_BPF_ATTACH_TYPE }; @@ -1006,6 +1007,7 @@ enum bpf_link_type { BPF_LINK_TYPE_ITER = 4, BPF_LINK_TYPE_NETNS = 5, BPF_LINK_TYPE_XDP = 6, + BPF_LINK_TYPE_PERF_EVENT = 7, MAX_BPF_LINK_TYPE, }; -- cgit v1.2.3 From 82e6b1eee6a8875ef4eacfd60711cce6965c6b04 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 15 Aug 2021 00:05:58 -0700 Subject: bpf: Allow to specify user-provided bpf_cookie for BPF perf links Add ability for users to specify custom u64 value (bpf_cookie) when creating BPF link for perf_event-backed BPF programs (kprobe/uprobe, perf_event, tracepoints). This is useful for cases when the same BPF program is used for attaching and processing invocation of different tracepoints/kprobes/uprobes in a generic fashion, but such that each invocation is distinguished from each other (e.g., BPF program can look up additional information associated with a specific kernel function without having to rely on function IP lookups). This enables new use cases to be implemented simply and efficiently that previously were possible only through code generation (and thus multiple instances of almost identical BPF program) or compilation at runtime (BCC-style) on target hosts (even more expensive resource-wise). For uprobes it is not even possible in some cases to know function IP before hand (e.g., when attaching to shared library without PID filtering, in which case base load address is not known for a library). This is done by storing u64 bpf_cookie in struct bpf_prog_array_item, corresponding to each attached and run BPF program. Given cgroup BPF programs already use two 8-byte pointers for their needs and cgroup BPF programs don't have (yet?) support for bpf_cookie, reuse that space through union of cgroup_storage and new bpf_cookie field. Make it available to kprobe/tracepoint BPF programs through bpf_trace_run_ctx. This is set by BPF_PROG_RUN_ARRAY, used by kprobe/uprobe/tracepoint BPF program execution code, which luckily is now also split from BPF_PROG_RUN_ARRAY_CG. This run context will be utilized by a new BPF helper giving access to this user-provided cookie value from inside a BPF program. Generic perf_event BPF programs will access this value from perf_event itself through passed in BPF program context. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/bpf/20210815070609.987780-6-andrii@kernel.org --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 94fe8329b28f..63ee482d50e1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1448,6 +1448,13 @@ union bpf_attr { __aligned_u64 iter_info; /* extra bpf_iter_link_info */ __u32 iter_info_len; /* iter_info length */ }; + struct { + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 bpf_cookie; + } perf_event; }; } link_create; -- cgit v1.2.3 From 7adfc6c9b315e174cf8743b21b7b691c8766791b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 15 Aug 2021 00:05:59 -0700 Subject: bpf: Add bpf_get_attach_cookie() BPF helper to access bpf_cookie value Add new BPF helper, bpf_get_attach_cookie(), which can be used by BPF programs to get access to a user-provided bpf_cookie value, specified during BPF program attachment (BPF link creation) time. Naming is hard, though. With the concept being named "BPF cookie", I've considered calling the helper: - bpf_get_cookie() -- seems too unspecific and easily mistaken with socket cookie; - bpf_get_bpf_cookie() -- too much tautology; - bpf_get_link_cookie() -- would be ok, but while we create a BPF link to attach BPF program to BPF hook, it's still an "attachment" and the bpf_cookie is associated with BPF program attachment to a hook, not a BPF link itself. Technically, we could support bpf_cookie with old-style cgroup programs.So I ultimately rejected it in favor of bpf_get_attach_cookie(). Currently all perf_event-backed BPF program types support bpf_get_attach_cookie() helper. Follow-up patches will add support for fentry/fexit programs as well. While at it, mark bpf_tracing_func_proto() as static to make it obvious that it's only used from within the kernel/trace/bpf_trace.c. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210815070609.987780-7-andrii@kernel.org --- include/uapi/linux/bpf.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 63ee482d50e1..c4f7892edb2b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4856,6 +4856,21 @@ union bpf_attr { * Get address of the traced function (for tracing and kprobe programs). * Return * Address of the traced function. + * + * u64 bpf_get_attach_cookie(void *ctx) + * Description + * Get bpf_cookie value provided (optionally) during the program + * attachment. It might be different for each individual + * attachment, even if BPF program itself is the same. + * Expects BPF program context *ctx* as a first argument. + * + * Supported for the following program types: + * - kprobe/uprobe; + * - tracepoint; + * - perf_event. + * Return + * Value specified by user at BPF link creation/attachment time + * or 0, if it was not specified. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5032,6 +5047,7 @@ union bpf_attr { FN(timer_start), \ FN(timer_cancel), \ FN(get_func_ip), \ + FN(get_attach_cookie), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 0d2ab3aea50bb02ff0c9c3d53c7b2b4b21cdd59d Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 2 Jul 2021 19:44:07 +0200 Subject: nl80211: add support for BSS coloring This patch adds support for BSS color collisions to the wireless subsystem. Add the required functionality to nl80211 that will notify about color collisions, triggering the color change and notifying when it is completed. Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: John Crispin Link: https://lore.kernel.org/r/500b3582aec8fe2c42ef46f3117b148cb7cbceb5.1625247619.git.lorenzo@kernel.org [remove unnecessary NULL initialisation] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index db474994fa73..c2efea98e060 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1185,6 +1185,21 @@ * passed using %NL80211_ATTR_SAR_SPEC. %NL80211_ATTR_WIPHY is used to * specify the wiphy index to be applied to. * + * @NL80211_CMD_OBSS_COLOR_COLLISION: This notification is sent out whenever + * mac80211/drv detects a bss color collision. + * + * @NL80211_CMD_COLOR_CHANGE_REQUEST: This command is used to indicate that + * userspace wants to change the BSS color. + * + * @NL80211_CMD_COLOR_CHANGE_STARTED: Notify userland, that a color change has + * started + * + * @NL80211_CMD_COLOR_CHANGE_ABORTED: Notify userland, that the color change has + * been aborted + * + * @NL80211_CMD_COLOR_CHANGE_COMPLETED: Notify userland that the color change + * has completed + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1417,6 +1432,14 @@ enum nl80211_commands { NL80211_CMD_SET_SAR_SPECS, + NL80211_CMD_OBSS_COLOR_COLLISION, + + NL80211_CMD_COLOR_CHANGE_REQUEST, + + NL80211_CMD_COLOR_CHANGE_STARTED, + NL80211_CMD_COLOR_CHANGE_ABORTED, + NL80211_CMD_COLOR_CHANGE_COMPLETED, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2560,6 +2583,16 @@ enum nl80211_commands { * disassoc events to indicate that an immediate reconnect to the AP * is desired. * + * @NL80211_ATTR_OBSS_COLOR_BITMAP: bitmap of the u64 BSS colors for the + * %NL80211_CMD_OBSS_COLOR_COLLISION event. + * + * @NL80211_ATTR_COLOR_CHANGE_COUNT: u8 attribute specifying the number of TBTT's + * until the color switch event. + * @NL80211_ATTR_COLOR_CHANGE_COLOR: u8 attribute specifying the color that we are + * switching to + * @NL80211_ATTR_COLOR_CHANGE_ELEMS: Nested set of attributes containing the IE + * information for the time while performing a color switch. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3057,6 +3090,12 @@ enum nl80211_attrs { NL80211_ATTR_DISABLE_HE, + NL80211_ATTR_OBSS_COLOR_BITMAP, + + NL80211_ATTR_COLOR_CHANGE_COUNT, + NL80211_ATTR_COLOR_CHANGE_COLOR, + NL80211_ATTR_COLOR_CHANGE_ELEMS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5953,6 +5992,9 @@ enum nl80211_feature_flags { * frame protection for all management frames exchanged during the * negotiation and range measurement procedure. * + * @NL80211_EXT_FEATURE_BSS_COLOR: The driver supports BSS color collision + * detection and change announcemnts. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6017,6 +6059,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SECURE_LTF, NL80211_EXT_FEATURE_SECURE_RTT, NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE, + NL80211_EXT_FEATURE_BSS_COLOR, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From ea49dc79002c416a9003f3204bc14f846a0dbcae Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 28 Jul 2021 08:56:09 +1000 Subject: NFSD: remove vanity comments Including one's name in copyright claims is appropriate. Including it in random comments is just vanity. After 2 decades, it is time for these to be gone. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/uapi/linux/nfsd/nfsfh.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h index 427294dd56a1..e29e8accc4f4 100644 --- a/include/uapi/linux/nfsd/nfsfh.h +++ b/include/uapi/linux/nfsd/nfsfh.h @@ -33,7 +33,6 @@ struct nfs_fhbase_old { /* * This is the new flexible, extensible style NFSv2/v3/v4 file handle. - * by Neil Brown - March 2000 * * The file handle starts with a sequence of four-byte words. * The first word contains a version number (1) and three descriptor bytes -- cgit v1.2.3 From 2843ff6f36db7074e17bf5d637a14da08c54aed8 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 17 Aug 2021 15:07:23 -0700 Subject: mptcp: remote addresses fullmesh This patch added and managed a new per endpoint flag, named MPTCP_PM_ADDR_FLAG_FULLMESH. In mptcp_pm_create_subflow_or_signal_addr(), if such flag is set, instead of: remote_address((struct sock_common *)sk, &remote); fill a temporary allocated array of all known remote address. After releaseing the pm lock loop on such array and create a subflow for each remote address from the given local. Note that the we could still use an array even for non 'fullmesh' endpoint: with a single entry corresponding to the primary MPC subflow remote address. Suggested-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 7b05f7102321..f66038b9551f 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -73,6 +73,7 @@ enum { #define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0) #define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1) #define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2) +#define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3) enum { MPTCP_PM_CMD_UNSPEC, -- cgit v1.2.3 From 25bca50e523cbe96c0207fbb92f22ff2bc28e9aa Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Aug 2021 12:36:58 +0900 Subject: block: improve ioprio class description comment In include/usapi/linux/ioprio.h, change the ioprio class enum comment to remove the outdated reference to CFQ and mention BFQ and mq-deadline instead. Also document the high priority NCQ command use for RT class IOs directed at ATA drives that support NCQ priority. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210811033702.368488-3-damien.lemoal@wdc.com Signed-off-by: Jens Axboe --- include/uapi/linux/ioprio.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index 77b17e08b0da..6b735854aebd 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -13,10 +13,12 @@ #define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) /* - * These are the io priority groups as implemented by CFQ. RT is the realtime - * class, it always gets premium service. BE is the best-effort scheduling - * class, the default for any process. IDLE is the idle scheduling class, it - * is only served when no one else is using the disk. + * These are the io priority groups as implemented by the BFQ and mq-deadline + * schedulers. RT is the realtime class, it always gets premium service. For + * ATA disks supporting NCQ IO priority, RT class IOs will be processed using + * high priority NCQ commands. BE is the best-effort scheduling class, the + * default for any process. IDLE is the idle scheduling class, it is only + * served when no one else is using the disk. */ enum { IOPRIO_CLASS_NONE, -- cgit v1.2.3 From a553a835ca57668b0d9907d8ec2507ec51292d9a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Aug 2021 12:36:59 +0900 Subject: block: change ioprio_valid() to an inline function Change the ioprio_valid() macro in include/usapi/linux/ioprio.h to an inline function declared on the kernel side in include/linux/ioprio.h. Also improve checks on the class value by checking the upper bound value. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210811033702.368488-4-damien.lemoal@wdc.com Signed-off-by: Jens Axboe --- include/uapi/linux/ioprio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index 6b735854aebd..5064e230374c 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -27,8 +27,6 @@ enum { IOPRIO_CLASS_IDLE, }; -#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE) - /* * 8 best effort priority levels are supported */ -- cgit v1.2.3 From ba05200fcce0a73fa8db16c514fbaa476d1d9399 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Aug 2021 12:37:00 +0900 Subject: block: fix IOPRIO_PRIO_CLASS() and IOPRIO_PRIO_VALUE() macros The ki_ioprio field of struct kiocb is 16-bits (u16) but often handled as an int in the block layer. E.g. ioprio_check_cap() takes an int as argument. With such implicit int casting function calls, the upper 16-bits of the int argument may be left uninitialized by the compiler, resulting in invalid values for the IOPRIO_PRIO_CLASS() macro (garbage upper bits) and in an error return for functions such as ioprio_check_cap(). Fix this by masking the result of the shift by IOPRIO_CLASS_SHIFT bits in the IOPRIO_PRIO_CLASS() macro. The new macro IOPRIO_CLASS_MASK defines the 3-bits mask for the priority class. Similarly, apply the IOPRIO_PRIO_MASK mask to the data argument of the IOPRIO_PRIO_VALUE() macro to ignore the upper bits of the data value. The IOPRIO_CLASS_MASK mask is also applied to the class argument of this macro before shifting the result by IOPRIO_CLASS_SHIFT bits. While at it, also change the argument name of the IOPRIO_PRIO_CLASS() and IOPRIO_PRIO_DATA() macros from "mask" to "ioprio" to reflect the fact that a priority value should be passed rather than a mask. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210811033702.368488-5-damien.lemoal@wdc.com Signed-off-by: Jens Axboe --- include/uapi/linux/ioprio.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index 5064e230374c..936f0d8f30e1 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -5,12 +5,16 @@ /* * Gives us 8 prio classes with 13-bits of data for each class */ -#define IOPRIO_CLASS_SHIFT (13) +#define IOPRIO_CLASS_SHIFT 13 +#define IOPRIO_CLASS_MASK 0x07 #define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) -#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT) -#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) -#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) +#define IOPRIO_PRIO_CLASS(ioprio) \ + (((ioprio) >> IOPRIO_CLASS_SHIFT) & IOPRIO_CLASS_MASK) +#define IOPRIO_PRIO_DATA(ioprio) ((ioprio) & IOPRIO_PRIO_MASK) +#define IOPRIO_PRIO_VALUE(class, data) \ + ((((class) & IOPRIO_CLASS_MASK) << IOPRIO_CLASS_SHIFT) | \ + ((data) & IOPRIO_PRIO_MASK)) /* * These are the io priority groups as implemented by the BFQ and mq-deadline -- cgit v1.2.3 From 202bc942c5cd4340d37b06c4e0b8b03f9925d818 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Aug 2021 12:37:01 +0900 Subject: block: Introduce IOPRIO_NR_LEVELS The BFQ scheduler and ioprio_check_cap() both assume that the RT priority class (IOPRIO_CLASS_RT) can have up to 8 different priority levels, similarly to the BE class (IOPRIO_CLASS_iBE). This is controlled using the IOPRIO_BE_NR macro , which is badly named as the number of levels also applies to the RT class. Introduce the class independent IOPRIO_NR_LEVELS macro, defined to 8, to make things clear. Keep the old IOPRIO_BE_NR macro definition as an alias for IOPRIO_NR_LEVELS. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210811033702.368488-6-damien.lemoal@wdc.com Signed-off-by: Jens Axboe --- include/uapi/linux/ioprio.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index 936f0d8f30e1..aac39338d02c 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -32,9 +32,10 @@ enum { }; /* - * 8 best effort priority levels are supported + * The RT and BE priority classes both support up to 8 priority levels. */ -#define IOPRIO_BE_NR (8) +#define IOPRIO_NR_LEVELS 8 +#define IOPRIO_BE_NR IOPRIO_NR_LEVELS enum { IOPRIO_WHO_PROCESS = 1, -- cgit v1.2.3 From e70344c05995a190a56bbd1a23dc2218bcc8c924 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Aug 2021 12:37:02 +0900 Subject: block: fix default IO priority handling The default IO priority is the best effort (BE) class with the normal priority level IOPRIO_NORM (4). However, get_task_ioprio() returns IOPRIO_CLASS_NONE/IOPRIO_NORM as the default priority and get_current_ioprio() returns IOPRIO_CLASS_NONE/0. Let's be consistent with the defined default and have both of these functions return the default priority IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM) when the user did not define another default IO priority for the task. In include/uapi/linux/ioprio.h, introduce the IOPRIO_BE_NORM macro as an alias to IOPRIO_NORM to clarify that this default level applies to the BE priotity class. In include/linux/ioprio.h, define the macro IOPRIO_DEFAULT as IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM) and use this new macro when setting a priority to the default. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210811033702.368488-7-damien.lemoal@wdc.com [axboe: drop unnecessary lightnvm change] Signed-off-by: Jens Axboe --- include/uapi/linux/ioprio.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index aac39338d02c..f70f2596a6bf 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -44,8 +44,9 @@ enum { }; /* - * Fallback BE priority + * Fallback BE priority level. */ -#define IOPRIO_NORM (4) +#define IOPRIO_NORM 4 +#define IOPRIO_BE_NORM IOPRIO_NORM #endif /* _UAPI_LINUX_IOPRIO_H */ -- cgit v1.2.3 From 3cfc88380413d20f777dc6648a38f683962e52bf Mon Sep 17 00:00:00 2001 From: Jie Deng Date: Fri, 23 Jul 2021 13:44:35 +0800 Subject: i2c: virtio: add a virtio i2c frontend driver Add an I2C bus driver for virtio para-virtualization. The controller can be emulated by the backend driver in any device model software by following the virtio protocol. The device specification can be found on https://lists.oasis-open.org/archives/virtio-comment/202101/msg00008.html. By following the specification, people may implement different backend drivers to emulate different controllers according to their needs. Co-developed-by: Conghui Chen Signed-off-by: Conghui Chen Signed-off-by: Jie Deng Reviewed-by: Viresh Kumar Tested-by: Viresh Kumar Acked-by: Michael S. Tsirkin Signed-off-by: Wolfram Sang --- include/uapi/linux/virtio_i2c.h | 41 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/virtio_ids.h | 1 + 2 files changed, 42 insertions(+) create mode 100644 include/uapi/linux/virtio_i2c.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_i2c.h b/include/uapi/linux/virtio_i2c.h new file mode 100644 index 000000000000..7c6a6fc01ad6 --- /dev/null +++ b/include/uapi/linux/virtio_i2c.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */ +/* + * Definitions for virtio I2C Adpter + * + * Copyright (c) 2021 Intel Corporation. All rights reserved. + */ + +#ifndef _UAPI_LINUX_VIRTIO_I2C_H +#define _UAPI_LINUX_VIRTIO_I2C_H + +#include +#include + +/* The bit 0 of the @virtio_i2c_out_hdr.@flags, used to group the requests */ +#define VIRTIO_I2C_FLAGS_FAIL_NEXT _BITUL(0) + +/** + * struct virtio_i2c_out_hdr - the virtio I2C message OUT header + * @addr: the controlled device address + * @padding: used to pad to full dword + * @flags: used for feature extensibility + */ +struct virtio_i2c_out_hdr { + __le16 addr; + __le16 padding; + __le32 flags; +}; + +/** + * struct virtio_i2c_in_hdr - the virtio I2C message IN header + * @status: the processing result from the backend + */ +struct virtio_i2c_in_hdr { + __u8 status; +}; + +/* The final status written by the device */ +#define VIRTIO_I2C_MSG_OK 0 +#define VIRTIO_I2C_MSG_ERR 1 + +#endif /* _UAPI_LINUX_VIRTIO_I2C_H */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 70a8057ad4bb..99aa27b100bc 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -55,6 +55,7 @@ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ +#define VIRTIO_ID_I2C_ADAPTER 34 /* virtio i2c adapter */ #define VIRTIO_ID_BT 40 /* virtio bluetooth */ /* -- cgit v1.2.3 From 2796d846d74a18cc6563e96eff8bf28c5e06f912 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 20 Aug 2021 15:42:55 +0300 Subject: net: bridge: vlan: convert mcast router global option to per-vlan entry The per-vlan router option controls the port/vlan and host vlan entries' mcast router config. The global option controlled only the host vlan config, but that is unnecessary and incosistent as it's not really a global vlan option, but rather bridge option to control host router config, so convert BRIDGE_VLANDB_GOPTS_MCAST_ROUTER to BRIDGE_VLANDB_ENTRY_MCAST_ROUTER which can be used to control both host vlan and port vlan mcast router config. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index f71a81fdbbc6..2711c3522010 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -506,6 +506,7 @@ enum { BRIDGE_VLANDB_ENTRY_STATE, BRIDGE_VLANDB_ENTRY_TUNNEL_INFO, BRIDGE_VLANDB_ENTRY_STATS, + BRIDGE_VLANDB_ENTRY_MCAST_ROUTER, __BRIDGE_VLANDB_ENTRY_MAX, }; #define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1) @@ -561,7 +562,6 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, - BRIDGE_VLANDB_GOPTS_MCAST_ROUTER, BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE, __BRIDGE_VLANDB_GOPTS_MAX -- cgit v1.2.3 From f95937ccf5bd5e0a6bbac2b8e65a87982ffae403 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Mon, 2 Aug 2021 16:56:29 +0000 Subject: KVM: stats: Support linear and logarithmic histogram statistics Add new types of KVM stats, linear and logarithmic histogram. Histogram are very useful for observing the value distribution of time or size related stats. Signed-off-by: Jing Zhang Message-Id: <20210802165633.1866976-2-jingzhangos@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d9e4aabcb31a..a067410ebea5 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1965,7 +1965,9 @@ struct kvm_stats_header { #define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT) #define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT) #define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) -#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK +#define KVM_STATS_TYPE_LINEAR_HIST (0x3 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_LOG_HIST (0x4 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_LOG_HIST #define KVM_STATS_UNIT_SHIFT 4 #define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT) @@ -1988,8 +1990,9 @@ struct kvm_stats_header { * @size: The number of data items for this stats. * Every data item is of type __u64. * @offset: The offset of the stats to the start of stat structure in - * struture kvm or kvm_vcpu. - * @unused: Unused field for future usage. Always 0 for now. + * structure kvm or kvm_vcpu. + * @bucket_size: A parameter value used for histogram stats. It is only used + * for linear histogram stats, specifying the size of the bucket; * @name: The name string for the stats. Its size is indicated by the * &kvm_stats_header->name_size. */ @@ -1998,7 +2001,7 @@ struct kvm_stats_desc { __s16 exponent; __u16 size; __u32 offset; - __u32 unused; + __u32 bucket_size; char name[]; }; -- cgit v1.2.3 From 3a29355a22c0275fe864100794fee58a73175d93 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 19 Aug 2021 10:00:22 +0530 Subject: gpio: Add virtio-gpio driver This patch adds a new driver for Virtio based GPIO devices. This allows a guest VM running Linux to access GPIO lines provided by the host. It supports all basic operations, except interrupts for the GPIO lines. Based on the initial work posted by: "Enrico Weigelt, metux IT consult" . Reviewed-by: Linus Walleij Signed-off-by: Viresh Kumar Signed-off-by: Bartosz Golaszewski --- include/uapi/linux/virtio_gpio.h | 47 ++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/virtio_ids.h | 1 + 2 files changed, 48 insertions(+) create mode 100644 include/uapi/linux/virtio_gpio.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_gpio.h b/include/uapi/linux/virtio_gpio.h new file mode 100644 index 000000000000..844574acf095 --- /dev/null +++ b/include/uapi/linux/virtio_gpio.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _LINUX_VIRTIO_GPIO_H +#define _LINUX_VIRTIO_GPIO_H + +#include + +/* Virtio GPIO request types */ +#define VIRTIO_GPIO_MSG_GET_NAMES 0x0001 +#define VIRTIO_GPIO_MSG_GET_DIRECTION 0x0002 +#define VIRTIO_GPIO_MSG_SET_DIRECTION 0x0003 +#define VIRTIO_GPIO_MSG_GET_VALUE 0x0004 +#define VIRTIO_GPIO_MSG_SET_VALUE 0x0005 + +/* Possible values of the status field */ +#define VIRTIO_GPIO_STATUS_OK 0x0 +#define VIRTIO_GPIO_STATUS_ERR 0x1 + +/* Direction types */ +#define VIRTIO_GPIO_DIRECTION_NONE 0x00 +#define VIRTIO_GPIO_DIRECTION_OUT 0x01 +#define VIRTIO_GPIO_DIRECTION_IN 0x02 + +struct virtio_gpio_config { + __u16 ngpio; + __u8 padding[2]; + __u32 gpio_names_size; +} __packed; + +/* Virtio GPIO Request / Response */ +struct virtio_gpio_request { + __u16 type; + __u16 gpio; + __u32 value; +}; + +struct virtio_gpio_response { + __u8 status; + __u8 value; +}; + +struct virtio_gpio_response_get_names { + __u8 status; + __u8 value[]; +}; + +#endif /* _LINUX_VIRTIO_GPIO_H */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 70a8057ad4bb..e04fa2bfc0eb 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -56,6 +56,7 @@ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ #define VIRTIO_ID_BT 40 /* virtio bluetooth */ +#define VIRTIO_ID_GPIO 41 /* virtio gpio */ /* * Virtio Transitional IDs -- cgit v1.2.3 From 146054090b0859b28fc39015c7704ccc3c3a347f Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 30 Jun 2021 13:01:49 -0700 Subject: btrfs: initial fsverity support Add support for fsverity in btrfs. To support the generic interface in fs/verity, we add two new item types in the fs tree for inodes with verity enabled. One stores the per-file verity descriptor and btrfs verity item and the other stores the Merkle tree data itself. Verity checking is done in end_page_read just before a page is marked uptodate. This naturally handles a variety of edge cases like holes, preallocated extents, and inline extents. Some care needs to be taken to not try to verity pages past the end of the file, which are accessed by the generic buffered file reading code under some circumstances like reading to the end of the last page and trying to read again. Direct IO on a verity file falls back to buffered reads. Verity relies on PageChecked for the Merkle tree data itself to avoid re-walking up shared paths in the tree. For this reason, we need to cache the Merkle tree data. Since the file is immutable after verity is turned on, we can cache it at an index past EOF. Use the new inode ro_flags to store verity on the inode item, so that we can enable verity on a file, then rollback to an older kernel and still mount the file system and read the file. Since we can't safely write the file anymore without ruining the invariants of the Merkle tree, we mark a ro_compat flag on the file system when a file has verity enabled. Acked-by: Eric Biggers Co-developed-by: Chris Mason Signed-off-by: Chris Mason Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 1 + include/uapi/linux/btrfs_tree.h | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 22cd037123fa..d7d3cfead056 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -288,6 +288,7 @@ struct btrfs_ioctl_fs_info_args { * first mount when booting older kernel versions. */ #define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID (1ULL << 1) +#define BTRFS_FEATURE_COMPAT_RO_VERITY (1ULL << 2) #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index ccdb40fe40dc..e1c4c732aaba 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -118,6 +118,29 @@ #define BTRFS_INODE_REF_KEY 12 #define BTRFS_INODE_EXTREF_KEY 13 #define BTRFS_XATTR_ITEM_KEY 24 + +/* + * fs verity items are stored under two different key types on disk. + * The descriptor items: + * [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ] + * + * At offset 0, we store a btrfs_verity_descriptor_item which tracks the size + * of the descriptor item and some extra data for encryption. + * Starting at offset 1, these hold the generic fs verity descriptor. The + * latter are opaque to btrfs, we just read and write them as a blob for the + * higher level verity code. The most common descriptor size is 256 bytes. + * + * The merkle tree items: + * [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ] + * + * These also start at offset 0, and correspond to the merkle tree bytes. When + * fsverity asks for page 0 of the merkle tree, we pull up one page starting at + * offset 0 for this key type. These are also opaque to btrfs, we're blindly + * storing whatever fsverity sends down. + */ +#define BTRFS_VERITY_DESC_ITEM_KEY 36 +#define BTRFS_VERITY_MERKLE_ITEM_KEY 37 + #define BTRFS_ORPHAN_ITEM_KEY 48 /* reserve 2-15 close to the inode for later flexibility */ @@ -991,4 +1014,16 @@ struct btrfs_qgroup_limit_item { __le64 rsv_excl; } __attribute__ ((__packed__)); +struct btrfs_verity_descriptor_item { + /* Size of the verity descriptor in bytes */ + __le64 size; + /* + * When we implement support for fscrypt, we will need to encrypt the + * Merkle tree for encrypted verity files. These 128 bits are for the + * eventual storage of an fscrypt initialization vector. + */ + __le64 reserved[2]; + __u8 encryption; +} __attribute__ ((__packed__)); + #endif /* _BTRFS_CTREE_H_ */ -- cgit v1.2.3 From d5ae8d7f85b7f6f6e60f1af8ff4be52b0926fde1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 23 Aug 2021 09:49:09 -0700 Subject: Revert "media: dvb header files: move some headers to staging" This reverts commit 819fbd3d8ef36c09576c2a0ffea503f5c46e9177. It turns out that some user-space applications use these uapi header files, so even though the only user of the interface is an old driver that was moved to staging, moving the header files causes unnecessary pain. Generally, we really don't want user space to use kernel headers directly (exactly because it causes pain when we re-organize), and instead copy them as needed. But these things happen, and the headers were in the uapi directory, so I guess it's not entirely unreasonable. Link: https://lore.kernel.org/lkml/4e3e0d40-df4a-94f8-7c2d-85010b0873c4@web.de/ Reported-by: Soeren Moch Cc: stable@kernel.org # 5.13 Cc: Mauro Carvalho Chehab Signed-off-by: Linus Torvalds --- include/uapi/linux/dvb/audio.h | 101 +++++++++++++++++++ include/uapi/linux/dvb/osd.h | 181 +++++++++++++++++++++++++++++++++ include/uapi/linux/dvb/video.h | 220 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 502 insertions(+) create mode 100644 include/uapi/linux/dvb/audio.h create mode 100644 include/uapi/linux/dvb/osd.h create mode 100644 include/uapi/linux/dvb/video.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dvb/audio.h b/include/uapi/linux/dvb/audio.h new file mode 100644 index 000000000000..2f869da69171 --- /dev/null +++ b/include/uapi/linux/dvb/audio.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: LGPL-2.1+ WITH Linux-syscall-note */ +/* + * audio.h - DEPRECATED MPEG-TS audio decoder API + * + * NOTE: should not be used on future drivers + * + * Copyright (C) 2000 Ralph Metzler + * & Marcus Metzler + * for convergence integrated media GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Lesser Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +#ifndef _DVBAUDIO_H_ +#define _DVBAUDIO_H_ + +#include + +typedef enum { + AUDIO_SOURCE_DEMUX, /* Select the demux as the main source */ + AUDIO_SOURCE_MEMORY /* Select internal memory as the main source */ +} audio_stream_source_t; + + +typedef enum { + AUDIO_STOPPED, /* Device is stopped */ + AUDIO_PLAYING, /* Device is currently playing */ + AUDIO_PAUSED /* Device is paused */ +} audio_play_state_t; + + +typedef enum { + AUDIO_STEREO, + AUDIO_MONO_LEFT, + AUDIO_MONO_RIGHT, + AUDIO_MONO, + AUDIO_STEREO_SWAPPED +} audio_channel_select_t; + + +typedef struct audio_mixer { + unsigned int volume_left; + unsigned int volume_right; + /* what else do we need? bass, pass-through, ... */ +} audio_mixer_t; + + +typedef struct audio_status { + int AV_sync_state; /* sync audio and video? */ + int mute_state; /* audio is muted */ + audio_play_state_t play_state; /* current playback state */ + audio_stream_source_t stream_source; /* current stream source */ + audio_channel_select_t channel_select; /* currently selected channel */ + int bypass_mode; /* pass on audio data to */ + audio_mixer_t mixer_state; /* current mixer state */ +} audio_status_t; /* separate decoder hardware */ + + +/* for GET_CAPABILITIES and SET_FORMAT, the latter should only set one bit */ +#define AUDIO_CAP_DTS 1 +#define AUDIO_CAP_LPCM 2 +#define AUDIO_CAP_MP1 4 +#define AUDIO_CAP_MP2 8 +#define AUDIO_CAP_MP3 16 +#define AUDIO_CAP_AAC 32 +#define AUDIO_CAP_OGG 64 +#define AUDIO_CAP_SDDS 128 +#define AUDIO_CAP_AC3 256 + +#define AUDIO_STOP _IO('o', 1) +#define AUDIO_PLAY _IO('o', 2) +#define AUDIO_PAUSE _IO('o', 3) +#define AUDIO_CONTINUE _IO('o', 4) +#define AUDIO_SELECT_SOURCE _IO('o', 5) +#define AUDIO_SET_MUTE _IO('o', 6) +#define AUDIO_SET_AV_SYNC _IO('o', 7) +#define AUDIO_SET_BYPASS_MODE _IO('o', 8) +#define AUDIO_CHANNEL_SELECT _IO('o', 9) +#define AUDIO_GET_STATUS _IOR('o', 10, audio_status_t) + +#define AUDIO_GET_CAPABILITIES _IOR('o', 11, unsigned int) +#define AUDIO_CLEAR_BUFFER _IO('o', 12) +#define AUDIO_SET_ID _IO('o', 13) +#define AUDIO_SET_MIXER _IOW('o', 14, audio_mixer_t) +#define AUDIO_SET_STREAMTYPE _IO('o', 15) +#define AUDIO_BILINGUAL_CHANNEL_SELECT _IO('o', 20) + +#endif /* _DVBAUDIO_H_ */ diff --git a/include/uapi/linux/dvb/osd.h b/include/uapi/linux/dvb/osd.h new file mode 100644 index 000000000000..858997c74043 --- /dev/null +++ b/include/uapi/linux/dvb/osd.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: LGPL-2.1+ WITH Linux-syscall-note */ +/* + * osd.h - DEPRECATED On Screen Display API + * + * NOTE: should not be used on future drivers + * + * Copyright (C) 2001 Ralph Metzler + * & Marcus Metzler + * for convergence integrated media GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Lesser Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +#ifndef _DVBOSD_H_ +#define _DVBOSD_H_ + +#include + +typedef enum { + /* All functions return -2 on "not open" */ + OSD_Close = 1, /* () */ + /* + * Disables OSD and releases the buffers + * returns 0 on success + */ + OSD_Open, /* (x0,y0,x1,y1,BitPerPixel[2/4/8](color&0x0F),mix[0..15](color&0xF0)) */ + /* + * Opens OSD with this size and bit depth + * returns 0 on success, -1 on DRAM allocation error, -2 on "already open" + */ + OSD_Show, /* () */ + /* + * enables OSD mode + * returns 0 on success + */ + OSD_Hide, /* () */ + /* + * disables OSD mode + * returns 0 on success + */ + OSD_Clear, /* () */ + /* + * Sets all pixel to color 0 + * returns 0 on success + */ + OSD_Fill, /* (color) */ + /* + * Sets all pixel to color + * returns 0 on success + */ + OSD_SetColor, /* (color,R{x0},G{y0},B{x1},opacity{y1}) */ + /* + * set palette entry to , and apply + * R,G,B: 0..255 + * R=Red, G=Green, B=Blue + * opacity=0: pixel opacity 0% (only video pixel shows) + * opacity=1..254: pixel opacity as specified in header + * opacity=255: pixel opacity 100% (only OSD pixel shows) + * returns 0 on success, -1 on error + */ + OSD_SetPalette, /* (firstcolor{color},lastcolor{x0},data) */ + /* + * Set a number of entries in the palette + * sets the entries "firstcolor" through "lastcolor" from the array "data" + * data has 4 byte for each color: + * R,G,B, and a opacity value: 0->transparent, 1..254->mix, 255->pixel + */ + OSD_SetTrans, /* (transparency{color}) */ + /* + * Sets transparency of mixed pixel (0..15) + * returns 0 on success + */ + OSD_SetPixel, /* (x0,y0,color) */ + /* + * sets pixel , to color number + * returns 0 on success, -1 on error + */ + OSD_GetPixel, /* (x0,y0) */ + /* returns color number of pixel ,, or -1 */ + OSD_SetRow, /* (x0,y0,x1,data) */ + /* + * fills pixels x0,y through x1,y with the content of data[] + * returns 0 on success, -1 on clipping all pixel (no pixel drawn) + */ + OSD_SetBlock, /* (x0,y0,x1,y1,increment{color},data) */ + /* + * fills pixels x0,y0 through x1,y1 with the content of data[] + * inc contains the width of one line in the data block, + * inc<=0 uses blockwidth as linewidth + * returns 0 on success, -1 on clipping all pixel + */ + OSD_FillRow, /* (x0,y0,x1,color) */ + /* + * fills pixels x0,y through x1,y with the color + * returns 0 on success, -1 on clipping all pixel + */ + OSD_FillBlock, /* (x0,y0,x1,y1,color) */ + /* + * fills pixels x0,y0 through x1,y1 with the color + * returns 0 on success, -1 on clipping all pixel + */ + OSD_Line, /* (x0,y0,x1,y1,color) */ + /* + * draw a line from x0,y0 to x1,y1 with the color + * returns 0 on success + */ + OSD_Query, /* (x0,y0,x1,y1,xasp{color}}), yasp=11 */ + /* + * fills parameters with the picture dimensions and the pixel aspect ratio + * returns 0 on success + */ + OSD_Test, /* () */ + /* + * draws a test picture. for debugging purposes only + * returns 0 on success + * TODO: remove "test" in final version + */ + OSD_Text, /* (x0,y0,size,color,text) */ + OSD_SetWindow, /* (x0) set window with number 0 + * & Ralph Metzler + * for convergence integrated media GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +#ifndef _UAPI_DVBVIDEO_H_ +#define _UAPI_DVBVIDEO_H_ + +#include +#ifndef __KERNEL__ +#include +#endif + +typedef enum { + VIDEO_FORMAT_4_3, /* Select 4:3 format */ + VIDEO_FORMAT_16_9, /* Select 16:9 format. */ + VIDEO_FORMAT_221_1 /* 2.21:1 */ +} video_format_t; + + +typedef enum { + VIDEO_PAN_SCAN, /* use pan and scan format */ + VIDEO_LETTER_BOX, /* use letterbox format */ + VIDEO_CENTER_CUT_OUT /* use center cut out format */ +} video_displayformat_t; + +typedef struct { + int w; + int h; + video_format_t aspect_ratio; +} video_size_t; + +typedef enum { + VIDEO_SOURCE_DEMUX, /* Select the demux as the main source */ + VIDEO_SOURCE_MEMORY /* If this source is selected, the stream + comes from the user through the write + system call */ +} video_stream_source_t; + + +typedef enum { + VIDEO_STOPPED, /* Video is stopped */ + VIDEO_PLAYING, /* Video is currently playing */ + VIDEO_FREEZED /* Video is freezed */ +} video_play_state_t; + + +/* Decoder commands */ +#define VIDEO_CMD_PLAY (0) +#define VIDEO_CMD_STOP (1) +#define VIDEO_CMD_FREEZE (2) +#define VIDEO_CMD_CONTINUE (3) + +/* Flags for VIDEO_CMD_FREEZE */ +#define VIDEO_CMD_FREEZE_TO_BLACK (1 << 0) + +/* Flags for VIDEO_CMD_STOP */ +#define VIDEO_CMD_STOP_TO_BLACK (1 << 0) +#define VIDEO_CMD_STOP_IMMEDIATELY (1 << 1) + +/* Play input formats: */ +/* The decoder has no special format requirements */ +#define VIDEO_PLAY_FMT_NONE (0) +/* The decoder requires full GOPs */ +#define VIDEO_PLAY_FMT_GOP (1) + +/* The structure must be zeroed before use by the application + This ensures it can be extended safely in the future. */ +struct video_command { + __u32 cmd; + __u32 flags; + union { + struct { + __u64 pts; + } stop; + + struct { + /* 0 or 1000 specifies normal speed, + 1 specifies forward single stepping, + -1 specifies backward single stepping, + >1: playback at speed/1000 of the normal speed, + <-1: reverse playback at (-speed/1000) of the normal speed. */ + __s32 speed; + __u32 format; + } play; + + struct { + __u32 data[16]; + } raw; + }; +}; + +/* FIELD_UNKNOWN can be used if the hardware does not know whether + the Vsync is for an odd, even or progressive (i.e. non-interlaced) + field. */ +#define VIDEO_VSYNC_FIELD_UNKNOWN (0) +#define VIDEO_VSYNC_FIELD_ODD (1) +#define VIDEO_VSYNC_FIELD_EVEN (2) +#define VIDEO_VSYNC_FIELD_PROGRESSIVE (3) + +struct video_event { + __s32 type; +#define VIDEO_EVENT_SIZE_CHANGED 1 +#define VIDEO_EVENT_FRAME_RATE_CHANGED 2 +#define VIDEO_EVENT_DECODER_STOPPED 3 +#define VIDEO_EVENT_VSYNC 4 + /* unused, make sure to use atomic time for y2038 if it ever gets used */ + long timestamp; + union { + video_size_t size; + unsigned int frame_rate; /* in frames per 1000sec */ + unsigned char vsync_field; /* unknown/odd/even/progressive */ + } u; +}; + + +struct video_status { + int video_blank; /* blank video on freeze? */ + video_play_state_t play_state; /* current state of playback */ + video_stream_source_t stream_source; /* current source (demux/memory) */ + video_format_t video_format; /* current aspect ratio of stream*/ + video_displayformat_t display_format;/* selected cropping mode */ +}; + + +struct video_still_picture { + char __user *iFrame; /* pointer to a single iframe in memory */ + __s32 size; +}; + + +typedef __u16 video_attributes_t; +/* bits: descr. */ +/* 15-14 Video compression mode (0=MPEG-1, 1=MPEG-2) */ +/* 13-12 TV system (0=525/60, 1=625/50) */ +/* 11-10 Aspect ratio (0=4:3, 3=16:9) */ +/* 9- 8 permitted display mode on 4:3 monitor (0=both, 1=only pan-sca */ +/* 7 line 21-1 data present in GOP (1=yes, 0=no) */ +/* 6 line 21-2 data present in GOP (1=yes, 0=no) */ +/* 5- 3 source resolution (0=720x480/576, 1=704x480/576, 2=352x480/57 */ +/* 2 source letterboxed (1=yes, 0=no) */ +/* 0 film/camera mode (0= + *camera, 1=film (625/50 only)) */ + + +/* bit definitions for capabilities: */ +/* can the hardware decode MPEG1 and/or MPEG2? */ +#define VIDEO_CAP_MPEG1 1 +#define VIDEO_CAP_MPEG2 2 +/* can you send a system and/or program stream to video device? + (you still have to open the video and the audio device but only + send the stream to the video device) */ +#define VIDEO_CAP_SYS 4 +#define VIDEO_CAP_PROG 8 +/* can the driver also handle SPU, NAVI and CSS encoded data? + (CSS API is not present yet) */ +#define VIDEO_CAP_SPU 16 +#define VIDEO_CAP_NAVI 32 +#define VIDEO_CAP_CSS 64 + + +#define VIDEO_STOP _IO('o', 21) +#define VIDEO_PLAY _IO('o', 22) +#define VIDEO_FREEZE _IO('o', 23) +#define VIDEO_CONTINUE _IO('o', 24) +#define VIDEO_SELECT_SOURCE _IO('o', 25) +#define VIDEO_SET_BLANK _IO('o', 26) +#define VIDEO_GET_STATUS _IOR('o', 27, struct video_status) +#define VIDEO_GET_EVENT _IOR('o', 28, struct video_event) +#define VIDEO_SET_DISPLAY_FORMAT _IO('o', 29) +#define VIDEO_STILLPICTURE _IOW('o', 30, struct video_still_picture) +#define VIDEO_FAST_FORWARD _IO('o', 31) +#define VIDEO_SLOWMOTION _IO('o', 32) +#define VIDEO_GET_CAPABILITIES _IOR('o', 33, unsigned int) +#define VIDEO_CLEAR_BUFFER _IO('o', 34) +#define VIDEO_SET_STREAMTYPE _IO('o', 36) +#define VIDEO_SET_FORMAT _IO('o', 37) +#define VIDEO_GET_SIZE _IOR('o', 55, video_size_t) + +/** + * VIDEO_GET_PTS + * + * Read the 33 bit presentation time stamp as defined + * in ITU T-REC-H.222.0 / ISO/IEC 13818-1. + * + * The PTS should belong to the currently played + * frame if possible, but may also be a value close to it + * like the PTS of the last decoded frame or the last PTS + * extracted by the PES parser. + */ +#define VIDEO_GET_PTS _IOR('o', 57, __u64) + +/* Read the number of displayed frames since the decoder was started */ +#define VIDEO_GET_FRAME_COUNT _IOR('o', 58, __u64) + +#define VIDEO_COMMAND _IOWR('o', 59, struct video_command) +#define VIDEO_TRY_COMMAND _IOWR('o', 60, struct video_command) + +#endif /* _UAPI_DVBVIDEO_H_ */ -- cgit v1.2.3 From e34a02dc40c95d126bb6486dcf802bbb8d1624a0 Mon Sep 17 00:00:00 2001 From: Dmitry Kadashev Date: Thu, 8 Jul 2021 13:34:45 +0700 Subject: io_uring: add support for IORING_OP_MKDIRAT IORING_OP_MKDIRAT behaves like mkdirat(2) and takes the same flags and arguments. Acked-by: Linus Torvalds Signed-off-by: Dmitry Kadashev Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20210708063447.3556403-10-dkadashev@gmail.com [axboe: add splice_fd_in check] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 79126d5cd289..a926407c230e 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -133,6 +133,7 @@ enum { IORING_OP_SHUTDOWN, IORING_OP_RENAMEAT, IORING_OP_UNLINKAT, + IORING_OP_MKDIRAT, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 7a8721f84fcb3b2946a92380b6fc311e017ff02c Mon Sep 17 00:00:00 2001 From: Dmitry Kadashev Date: Thu, 8 Jul 2021 13:34:46 +0700 Subject: io_uring: add support for IORING_OP_SYMLINKAT IORING_OP_SYMLINKAT behaves like symlinkat(2) and takes the same flags and arguments. Acked-by: Linus Torvalds Suggested-by: Christian Brauner Link: https://lore.kernel.org/io-uring/20210514145259.wtl4xcsp52woi6ab@wittgenstein/ Signed-off-by: Dmitry Kadashev Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20210708063447.3556403-11-dkadashev@gmail.com [axboe: add splice_fd_in check] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index a926407c230e..61fd347ab176 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -134,6 +134,7 @@ enum { IORING_OP_RENAMEAT, IORING_OP_UNLINKAT, IORING_OP_MKDIRAT, + IORING_OP_SYMLINKAT, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From cf30da90bc3a26911d369f199411f38b701394de Mon Sep 17 00:00:00 2001 From: Dmitry Kadashev Date: Thu, 8 Jul 2021 13:34:47 +0700 Subject: io_uring: add support for IORING_OP_LINKAT IORING_OP_LINKAT behaves like linkat(2) and takes the same flags and arguments. In some internal places 'hardlink' is used instead of 'link' to avoid confusion with the SQE links. Name 'link' conflicts with the existing 'link' member of io_kiocb. Acked-by: Linus Torvalds Suggested-by: Christian Brauner Link: https://lore.kernel.org/io-uring/20210514145259.wtl4xcsp52woi6ab@wittgenstein/ Signed-off-by: Dmitry Kadashev Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20210708063447.3556403-12-dkadashev@gmail.com [axboe: add splice_fd_in check] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 61fd347ab176..10eb38d2864f 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -44,6 +44,7 @@ struct io_uring_sqe { __u32 splice_flags; __u32 rename_flags; __u32 unlink_flags; + __u32 hardlink_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -135,6 +136,7 @@ enum { IORING_OP_UNLINKAT, IORING_OP_MKDIRAT, IORING_OP_SYMLINKAT, + IORING_OP_LINKAT, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 6fc88c354f3af83ffa2c285b86e76c759755693f Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Thu, 19 Aug 2021 02:24:20 -0700 Subject: bpf: Migrate cgroup_bpf to internal cgroup_bpf_attach_type enum Add an enum (cgroup_bpf_attach_type) containing only valid cgroup_bpf attach types and a function to map bpf_attach_type values to the new enum. Inspired by netns_bpf_attach_type. Then, migrate cgroup_bpf to use cgroup_bpf_attach_type wherever possible. Functionality is unchanged as attach_type_to_prog_type switches in bpf/syscall.c were preventing non-cgroup programs from making use of the invalid cgroup_bpf array slots. As a result struct cgroup_bpf uses 504 fewer bytes relative to when its arrays were sized using MAX_BPF_ATTACH_TYPE. bpf_cgroup_storage is notably not migrated as struct bpf_cgroup_storage_key is part of uapi and contains a bpf_attach_type member which is not meant to be opaque. Similarly, bpf_cgroup_link continues to report its bpf_attach_type member to userspace via fdinfo and bpf_link_info. To ease disambiguation, bpf_attach_type variables are renamed from 'type' to 'atype' when changed to cgroup_bpf_attach_type. Signed-off-by: Dave Marchevsky Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210819092420.1984861-2-davemarchevsky@fb.com --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c4f7892edb2b..191f0b286ee3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -84,7 +84,7 @@ struct bpf_lpm_trie_key { struct bpf_cgroup_storage_key { __u64 cgroup_inode_id; /* cgroup inode id */ - __u32 attach_type; /* program attach type */ + __u32 attach_type; /* program attach type (enum bpf_attach_type) */ }; union bpf_iter_link_info { -- cgit v1.2.3 From 029ee6b14356b94120bedb852dcdaefc0a282cf1 Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Fri, 20 Aug 2021 15:35:17 +0800 Subject: ethtool: add two coalesce attributes for CQE mode Currently, there are many drivers who support CQE mode configuration, some configure it as a fixed when initialized, some provide an interface to change it by ethtool private flags. In order to make it more generic, add two new 'ETHTOOL_A_COALESCE_USE_CQE_TX' and 'ETHTOOL_A_COALESCE_USE_CQE_RX' coalesce attributes, then these parameters can be accessed by ethtool netlink coalesce uAPI. Also add an new structure kernel_ethtool_coalesce, then the new parameter can be added into this struct. Signed-off-by: Yufeng Mo Signed-off-by: Huazhong Tan Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index b3b93710eff7..5545f1ca9237 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -377,6 +377,8 @@ enum { ETHTOOL_A_COALESCE_TX_USECS_HIGH, /* u32 */ ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, /* u32 */ ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, /* u32 */ + ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, /* u8 */ + ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, /* u8 */ /* add new constants above here */ __ETHTOOL_A_COALESCE_CNT, -- cgit v1.2.3 From b9445598d8c60a1379887b957024b71343965f74 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 25 Aug 2021 12:25:45 +0100 Subject: io_uring: openat directly into fixed fd table Instead of opening a file into a process's file table as usual and then registering the fd within io_uring, some users may want to skip the first step and place it directly into io_uring's fixed file table. This patch adds such a capability for IORING_OP_OPENAT and IORING_OP_OPENAT2. The behaviour is controlled by setting sqe->file_index, where 0 implies the old behaviour using normal file tables. If non-zero value is specified, then it will behave as described and place the file into a fixed file slot sqe->file_index - 1. A file table should be already created, the slot should be valid and empty, otherwise the operation will fail. Keep the error codes consistent with IORING_OP_FILES_UPDATE, ENXIO and EINVAL on inappropriate fixed tables, and return EBADF on collision with already registered file. Note: IOSQE_FIXED_FILE can't be used to switch between modes, because accept takes a file, and it already uses the flag with a different meaning. Suggested-by: Josh Triplett Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/e9b33d1163286f51ea707f87d95bd596dada1e65.1629888991.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 79126d5cd289..45a4f2373694 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -55,7 +55,10 @@ struct io_uring_sqe { } __attribute__((packed)); /* personality to use, if used */ __u16 personality; - __s32 splice_fd_in; + union { + __s32 splice_fd_in; + __u32 file_index; + }; __u64 __pad2[2]; }; -- cgit v1.2.3 From dd6e10fbd9fb86a571d925602c8a24bb4d09a2a7 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Mon, 23 Aug 2021 19:43:49 -0700 Subject: bpf: Add bpf_task_pt_regs() helper The motivation behind this helper is to access userspace pt_regs in a kprobe handler. uprobe's ctx is the userspace pt_regs. kprobe's ctx is the kernelspace pt_regs. bpf_task_pt_regs() allows accessing userspace pt_regs in a kprobe handler. The final case (kernelspace pt_regs in uprobe) is pretty rare (usermode helper) so I think that can be solved later if necessary. More concretely, this helper is useful in doing BPF-based DWARF stack unwinding. Currently the kernel can only do framepointer based stack unwinds for userspace code. This is because the DWARF state machines are too fragile to be computed in kernelspace [0]. The idea behind DWARF-based stack unwinds w/ BPF is to copy a chunk of the userspace stack (while in prog context) and send it up to userspace for unwinding (probably with libunwind) [1]. This would effectively enable profiling applications with -fomit-frame-pointer using kprobes and uprobes. [0]: https://lkml.org/lkml/2012/2/10/356 [1]: https://github.com/danobi/bpf-dwarf-walk Signed-off-by: Daniel Xu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/e2718ced2d51ef4268590ab8562962438ab82815.1629772842.git.dxu@dxuuu.xyz --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 191f0b286ee3..791f31dd0abe 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4871,6 +4871,12 @@ union bpf_attr { * Return * Value specified by user at BPF link creation/attachment time * or 0, if it was not specified. + * + * long bpf_task_pt_regs(struct task_struct *task) + * Description + * Get the struct pt_regs associated with **task**. + * Return + * A pointer to struct pt_regs. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5048,6 +5054,7 @@ union bpf_attr { FN(timer_cancel), \ FN(get_func_ip), \ FN(get_attach_cookie), \ + FN(task_pt_regs), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 6a241d2923c2c0f6893c78c79421ceb3935691fd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 17 Aug 2021 17:53:29 +0200 Subject: um: virt-pci: fix uapi documentation The identifier names in the documentation here didn't match the real ones, and the reserved was missing. Fix that. Reported-by: Bjorn Helgaas Fixes: 68f5d3f3b654 ("um: add PCI over virtio emulation driver") Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- include/uapi/linux/virtio_pcidev.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_pcidev.h b/include/uapi/linux/virtio_pcidev.h index 89daa88bcfef..668b07ce515b 100644 --- a/include/uapi/linux/virtio_pcidev.h +++ b/include/uapi/linux/virtio_pcidev.h @@ -9,13 +9,14 @@ /** * enum virtio_pcidev_ops - virtual PCI device operations + * @VIRTIO_PCIDEV_OP_RESERVED: reserved to catch errors * @VIRTIO_PCIDEV_OP_CFG_READ: read config space, size is 1, 2, 4 or 8; * the @data field should be filled in by the device (in little endian). * @VIRTIO_PCIDEV_OP_CFG_WRITE: write config space, size is 1, 2, 4 or 8; * the @data field contains the data to write (in little endian). - * @VIRTIO_PCIDEV_OP_BAR_READ: read BAR mem/pio, size can be variable; + * @VIRTIO_PCIDEV_OP_MMIO_READ: read BAR mem/pio, size can be variable; * the @data field should be filled in by the device (in little endian). - * @VIRTIO_PCIDEV_OP_BAR_WRITE: write BAR mem/pio, size can be variable; + * @VIRTIO_PCIDEV_OP_MMIO_WRITE: write BAR mem/pio, size can be variable; * the @data field contains the data to write (in little endian). * @VIRTIO_PCIDEV_OP_MMIO_MEMSET: memset MMIO, size is variable but * the @data field only has one byte (unlike @VIRTIO_PCIDEV_OP_MMIO_WRITE) -- cgit v1.2.3 From 49b99da2c9ce13ffcd93fe3a0f5670791c1d76f7 Mon Sep 17 00:00:00 2001 From: Rocco Yue Date: Fri, 27 Aug 2021 23:04:12 +0800 Subject: ipv6: add IFLA_INET6_RA_MTU to expose mtu value The kernel provides a "/proc/sys/net/ipv6/conf//mtu" file, which can temporarily record the mtu value of the last received RA message when the RA mtu value is lower than the interface mtu, but this proc has following limitations: (1) when the interface mtu (/sys/class/net//mtu) is updeated, mtu6 (/proc/sys/net/ipv6/conf//mtu) will be updated to the value of interface mtu; (2) mtu6 (/proc/sys/net/ipv6/conf//mtu) only affect ipv6 connection, and not affect ipv4. Therefore, when the mtu option is carried in the RA message, there will be a problem that the user sometimes cannot obtain RA mtu value correctly by reading mtu6. After this patch set, if a RA message carries the mtu option, you can send a netlink msg which nlmsg_type is RTM_GETLINK, and then by parsing the attribute of IFLA_INET6_RA_MTU to get the mtu value carried in the RA message received on the inet6 device. In addition, you can also get a link notification when ra_mtu is updated so it doesn't have to poll. In this way, if the MTU values that the device receives from the network in the PCO IPv4 and the RA IPv6 procedures are different, the user can obtain the correct ipv6 ra_mtu value and compare the value of ra_mtu and ipv4 mtu, then the device can use the lower MTU value for both IPv4 and IPv6. Signed-off-by: Rocco Yue Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210827150412.9267-1-rocco.yue@mediatek.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8aad65b69054..eebd3894fe89 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -417,6 +417,7 @@ enum { IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ IFLA_INET6_TOKEN, /* device token */ IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */ + IFLA_INET6_RA_MTU, /* mtu carried in the RA message */ __IFLA_INET6_MAX }; -- cgit v1.2.3 From d8071323c5632bdf0a8ef9b9e5662fac43649f9d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 19 Aug 2021 09:34:06 -0700 Subject: dmaengine: idxd: fix setting up priv mode for dwq DSA spec says WQ priv bit is 0 if the Privileged Mode Enable field of the PCI Express PASID capability is 0 and pasid is enabled. Make sure that the WQCFG priv field is set correctly according to usage type. Reject config if setting up kernel WQ type and no support. Also add the correct priv setup for a descriptor. Fixes: 484f910e93b4 ("dmaengine: idxd: fix wq config registers offset programming") Cc: Ramesh Thomas Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162939084657.903168.14160019185148244596.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index ca24c25252fb..c750eac09fc9 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -27,6 +27,7 @@ enum idxd_scmd_stat { IDXD_SCMD_WQ_NO_SWQ_SUPPORT = 0x800c0000, IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000, IDXD_SCMD_WQ_NO_SIZE = 0x800e0000, + IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, }; #define IDXD_SCMD_SOFTERR_MASK 0x80000000 -- cgit v1.2.3 From 2e480058ddc21ec53a10e8b41623e245e908bdbc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 27 Aug 2021 11:33:19 -0600 Subject: io-wq: provide a way to limit max number of workers io-wq divides work into two categories: 1) Work that completes in a bounded time, like reading from a regular file or a block device. This type of work is limited based on the size of the SQ ring. 2) Work that may never complete, we call this unbounded work. The amount of workers here is just limited by RLIMIT_NPROC. For various uses cases, it's handy to have the kernel limit the maximum amount of pending workers for both categories. Provide a way to do with with a new IORING_REGISTER_IOWQ_MAX_WORKERS operation. IORING_REGISTER_IOWQ_MAX_WORKERS takes an array of two integers and sets the max worker count to what is being passed in for each category. The old values are returned into that same array. If 0 is being passed in for either category, it simply returns the current value. The value is capped at RLIMIT_NPROC. This actually isn't that important as it's more of a hint, if we're exceeding the value then our attempt to fork a new worker will fail. This happens naturally already if more than one node is in the system, as these values are per-node internally for io-wq. Reported-by: Johannes Lundberg Link: https://github.com/axboe/liburing/issues/420 Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 45a4f2373694..64fe809c4e36 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -309,6 +309,9 @@ enum { IORING_REGISTER_IOWQ_AFF = 17, IORING_UNREGISTER_IOWQ_AFF = 18, + /* set/get max number of workers */ + IORING_REGISTER_IOWQ_MAX_WORKERS = 19, + /* this goes last */ IORING_REGISTER_LAST }; -- cgit v1.2.3 From 50c1df2b56e0f581b1dbf334dbf807d6fb8f77b2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 27 Aug 2021 17:11:06 -0600 Subject: io_uring: support CLOCK_BOOTTIME/REALTIME for timeouts Certain use cases want to use CLOCK_BOOTTIME or CLOCK_REALTIME rather than CLOCK_MONOTONIC, instead of the default CLOCK_MONOTONIC. Add an IORING_TIMEOUT_BOOTTIME and IORING_TIMEOUT_REALTIME flag that allows timeouts and linked timeouts to use the selected clock source. Only one clock source may be selected, and we -EINVAL the request if more than one is given. If neither BOOTIME nor REALTIME are selected, the previous default of MONOTONIC is used. Link: https://github.com/axboe/liburing/issues/369 Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 64fe809c4e36..b6d28d927a3f 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -151,6 +151,9 @@ enum { */ #define IORING_TIMEOUT_ABS (1U << 0) #define IORING_TIMEOUT_UPDATE (1U << 1) +#define IORING_TIMEOUT_BOOTTIME (1U << 2) +#define IORING_TIMEOUT_REALTIME (1U << 3) +#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) /* * sqe->splice_flags -- cgit v1.2.3 From f1042b6ccb887f07301f6b096b3d0cfcf9189323 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 28 Aug 2021 19:54:39 -0600 Subject: io_uring: allow updating linked timeouts We allow updating normal timeouts, add support for adjusting timings of linked timeouts as well. Reported-by: Victor Stewart Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b6d28d927a3f..3caec9199658 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -149,12 +149,13 @@ enum { /* * sqe->timeout_flags */ -#define IORING_TIMEOUT_ABS (1U << 0) -#define IORING_TIMEOUT_UPDATE (1U << 1) -#define IORING_TIMEOUT_BOOTTIME (1U << 2) -#define IORING_TIMEOUT_REALTIME (1U << 3) +#define IORING_TIMEOUT_ABS (1U << 0) +#define IORING_TIMEOUT_UPDATE (1U << 1) +#define IORING_TIMEOUT_BOOTTIME (1U << 2) +#define IORING_TIMEOUT_REALTIME (1U << 3) +#define IORING_LINK_TIMEOUT_UPDATE (1U << 4) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) - +#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) /* * sqe->splice_flags * extends splice(2) flags -- cgit v1.2.3 From d7e7747ac5c2496c98291944c6066adaa9f3b975 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 26 Aug 2021 15:54:22 +0200 Subject: netfilter: refuse insertion if chain has grown too large Also add a stat counter for this that gets exported both via old /proc interface and ctnetlink. Assuming the old default size of 16536 buckets and max hash occupancy of 64k, this results in 128k insertions (origin+reply), so ~8 entries per chain on average. The revised settings in this series will result in about two entries per bucket on average. This allows a hard-limit ceiling of 64. This is not tunable at the moment, but its possible to either increase nf_conntrack_buckets or decrease nf_conntrack_max to reduce average lengths. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index d8484be72fdc..5ade231f497b 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -257,6 +257,7 @@ enum ctattr_stats_cpu { CTA_STATS_ERROR, CTA_STATS_SEARCH_RESTART, CTA_STATS_CLASH_RESOLVE, + CTA_STATS_CHAIN_TOOLONG, __CTA_STATS_MAX, }; #define CTA_STATS_MAX (__CTA_STATS_MAX - 1) -- cgit v1.2.3 From 17395d7742baa4737e9d3b4672cc3d10e5970998 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 31 Aug 2021 10:59:25 +0530 Subject: gpio: virtio: Fix sparse warnings Fix warnings reported by sparse, related to type mismatch between u16 and __le16. Reported-by: kernel test robot Fixes: 3a29355a22c0 ("gpio: Add virtio-gpio driver") Signed-off-by: Viresh Kumar Acked-by: Michael S. Tsirkin Signed-off-by: Bartosz Golaszewski --- include/uapi/linux/virtio_gpio.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_gpio.h b/include/uapi/linux/virtio_gpio.h index 844574acf095..0445f905d8cc 100644 --- a/include/uapi/linux/virtio_gpio.h +++ b/include/uapi/linux/virtio_gpio.h @@ -22,16 +22,16 @@ #define VIRTIO_GPIO_DIRECTION_IN 0x02 struct virtio_gpio_config { - __u16 ngpio; + __le16 ngpio; __u8 padding[2]; - __u32 gpio_names_size; + __le32 gpio_names_size; } __packed; /* Virtio GPIO Request / Response */ struct virtio_gpio_request { - __u16 type; - __u16 gpio; - __u32 value; + __le16 type; + __le16 gpio; + __le32 value; }; struct virtio_gpio_response { -- cgit v1.2.3 From b27abaccf8e8b012f126da0c2a1ab32723ec8b9f Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 2 Sep 2021 15:00:06 -0700 Subject: mm/mempolicy: add MPOL_PREFERRED_MANY for multiple preferred nodes Patch series "Introduce multi-preference mempolicy", v7. This patch series introduces the concept of the MPOL_PREFERRED_MANY mempolicy. This mempolicy mode can be used with either the set_mempolicy(2) or mbind(2) interfaces. Like the MPOL_PREFERRED interface, it allows an application to set a preference for nodes which will fulfil memory allocation requests. Unlike the MPOL_PREFERRED mode, it takes a set of nodes. Like the MPOL_BIND interface, it works over a set of nodes. Unlike MPOL_BIND, it will not cause a SIGSEGV or invoke the OOM killer if those preferred nodes are not available. Along with these patches are patches for libnuma, numactl, numademo, and memhog. They still need some polish, but can be found here: https://gitlab.com/bwidawsk/numactl/-/tree/prefer-many It allows new usage: `numactl -P 0,3,4` The goal of the new mode is to enable some use-cases when using tiered memory usage models which I've lovingly named. 1a. The Hare - The interconnect is fast enough to meet bandwidth and latency requirements allowing preference to be given to all nodes with "fast" memory. 1b. The Indiscriminate Hare - An application knows it wants fast memory (or perhaps slow memory), but doesn't care which node it runs on. The application can prefer a set of nodes and then xpu bind to the local node (cpu, accelerator, etc). This reverses the nodes are chosen today where the kernel attempts to use local memory to the CPU whenever possible. This will attempt to use the local accelerator to the memory. 2. The Tortoise - The administrator (or the application itself) is aware it only needs slow memory, and so can prefer that. Much of this is almost achievable with the bind interface, but the bind interface suffers from an inability to fallback to another set of nodes if binding fails to all nodes in the nodemask. Like MPOL_BIND a nodemask is given. Inherently this removes ordering from the preference. > /* Set first two nodes as preferred in an 8 node system. */ > const unsigned long nodes = 0x3 > set_mempolicy(MPOL_PREFER_MANY, &nodes, 8); > /* Mimic interleave policy, but have fallback *. > const unsigned long nodes = 0xaa > set_mempolicy(MPOL_PREFER_MANY, &nodes, 8); Some internal discussion took place around the interface. There are two alternatives which we have discussed, plus one I stuck in: 1. Ordered list of nodes. Currently it's believed that the added complexity is nod needed for expected usecases. 2. A flag for bind to allow falling back to other nodes. This confuses the notion of binding and is less flexible than the current solution. 3. Create flags or new modes that helps with some ordering. This offers both a friendlier API as well as a solution for more customized usage. It's unknown if it's worth the complexity to support this. Here is sample code for how this might work: > // Prefer specific nodes for some something wacky > set_mempolicy(MPOL_PREFER_MANY, 0x17c, 1024); > > // Default > set_mempolicy(MPOL_PREFER_MANY | MPOL_F_PREFER_ORDER_SOCKET, NULL, 0); > // which is the same as > set_mempolicy(MPOL_DEFAULT, NULL, 0); > > // The Hare > set_mempolicy(MPOL_PREFER_MANY | MPOL_F_PREFER_ORDER_TYPE, NULL, 0); > > // The Tortoise > set_mempolicy(MPOL_PREFER_MANY | MPOL_F_PREFER_ORDER_TYPE_REV, NULL, 0); > > // Prefer the fast memory of the first two sockets > set_mempolicy(MPOL_PREFER_MANY | MPOL_F_PREFER_ORDER_TYPE, -1, 2); > This patch (of 5): The NUMA APIs currently allow passing in a "preferred node" as a single bit set in a nodemask. If more than one bit it set, bits after the first are ignored. This single node is generally OK for location-based NUMA where memory being allocated will eventually be operated on by a single CPU. However, in systems with multiple memory types, folks want to target a *type* of memory instead of a location. For instance, someone might want some high-bandwidth memory but do not care about the CPU next to which it is allocated. Or, they want a cheap, high capacity allocation and want to target all NUMA nodes which have persistent memory in volatile mode. In both of these cases, the application wants to target a *set* of nodes, but does not want strict MPOL_BIND behavior as that could lead to OOM killer or SIGSEGV. So add MPOL_PREFERRED_MANY policy to support the multiple preferred nodes requirement. This is not a pie-in-the-sky dream for an API. This was a response to a specific ask of more than one group at Intel. Specifically: 1. There are existing libraries that target memory types such as https://github.com/memkind/memkind. These are known to suffer from SIGSEGV's when memory is low on targeted memory "kinds" that span more than one node. The MCDRAM on a Xeon Phi in "Cluster on Die" mode is an example of this. 2. Volatile-use persistent memory users want to have a memory policy which is targeted at either "cheap and slow" (PMEM) or "expensive and fast" (DRAM). However, they do not want to experience allocation failures when the targeted type is unavailable. 3. Allocate-then-run. Generally, we let the process scheduler decide on which physical CPU to run a task. That location provides a default allocation policy, and memory availability is not generally considered when placing tasks. For situations where memory is valuable and constrained, some users want to allocate memory first, *then* allocate close compute resources to the allocation. This is the reverse of the normal (CPU) model. Accelerators such as GPUs that operate on core-mm-managed memory are interested in this model. A check is added in sanitize_mpol_flags() to not permit 'prefer_many' policy to be used for now, and will be removed in later patch after all implementations for 'prefer_many' are ready, as suggested by Michal Hocko. [mhocko@kernel.org: suggest to refine policy_node/policy_nodemask handling] Link: https://lkml.kernel.org/r/1627970362-61305-1-git-send-email-feng.tang@intel.com Link: https://lore.kernel.org/r/20200630212517.308045-4-ben.widawsky@intel.com Link: https://lkml.kernel.org/r/1627970362-61305-2-git-send-email-feng.tang@intel.com Co-developed-by: Ben Widawsky Signed-off-by: Ben Widawsky Signed-off-by: Dave Hansen Signed-off-by: Feng Tang Cc: Michal Hocko Acked-by: Michal Hocko Cc: Andrea Arcangeli Cc: Mel Gorman Cc: Mike Kravetz Cc: Randy Dunlap Cc: Vlastimil Babka Cc: Andi Kleen Cc: Dan Williams Cc: Huang Ying b Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/mempolicy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 19a00bc7fe86..046d0ccba4cd 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -22,6 +22,7 @@ enum { MPOL_BIND, MPOL_INTERLEAVE, MPOL_LOCAL, + MPOL_PREFERRED_MANY, MPOL_MAX, /* always last member of enum */ }; -- cgit v1.2.3 From c7c5e6ff533fe1f9afef7d2fa46678987a1335a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Sep 2021 15:03:43 -0700 Subject: fq_codel: reject silly quantum parameters syzbot found that forcing a big quantum attribute would crash hosts fast, essentially using this: tc qd replace dev eth0 root fq_codel quantum 4294967295 This is because fq_codel_dequeue() would have to loop ~2^31 times in : if (flow->deficit <= 0) { flow->deficit += q->quantum; list_move_tail(&flow->flowchain, &q->old_flows); goto begin; } SFQ max quantum is 2^19 (half a megabyte) Lets adopt a max quantum of one megabyte for FQ_CODEL. Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 79a699f106b1..ec88590b3198 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -827,6 +827,8 @@ struct tc_codel_xstats { /* FQ_CODEL */ +#define FQ_CODEL_QUANTUM_MAX (1 << 20) + enum { TCA_FQ_CODEL_UNSPEC, TCA_FQ_CODEL_TARGET, -- cgit v1.2.3 From d5a8680dfab0547a4ecd708b1fe9de48598a6757 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 27 Jul 2021 10:53:51 +0530 Subject: uapi: virtio_ids: Sync ids with specification This synchronizes the virtio ids with the latest list from virtio specification. Signed-off-by: Viresh Kumar Link: https://lore.kernel.org/r/61b27e3bc61fb0c9f067001e95cfafc5d37d414a.1627362340.git.viresh.kumar@linaro.org Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_ids.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 70a8057ad4bb..3c8e11820fdb 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -54,8 +54,20 @@ #define VIRTIO_ID_SOUND 25 /* virtio sound */ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ +#define VIRTIO_ID_RPMB 28 /* virtio rpmb */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ +#define VIRTIO_ID_VIDEO_ENCODER 30 /* virtio video encoder */ +#define VIRTIO_ID_VIDEO_DECODER 31 /* virtio video decoder */ +#define VIRTIO_ID_SCMI 32 /* virtio scmi */ +#define VIRTIO_ID_NITRO_SEC_MOD 33 /* virtio nitro secure module*/ +#define VIRTIO_ID_I2C_ADAPTER 34 /* virtio i2c adapter */ +#define VIRTIO_ID_WATCHDOG 35 /* virtio watchdog */ +#define VIRTIO_ID_CAN 36 /* virtio can */ +#define VIRTIO_ID_DMABUF 37 /* virtio dmabuf */ +#define VIRTIO_ID_PARAM_SERV 38 /* virtio parameter server */ +#define VIRTIO_ID_AUDIO_POLICY 39 /* virtio audio policy */ #define VIRTIO_ID_BT 40 /* virtio bluetooth */ +#define VIRTIO_ID_GPIO 41 /* virtio gpio */ /* * Virtio Transitional IDs -- cgit v1.2.3 From 9af8f1061646e8e22b66413bedf7b3e2ab3d69e5 Mon Sep 17 00:00:00 2001 From: Arseny Krasnov Date: Fri, 3 Sep 2021 15:31:06 +0300 Subject: virtio/vsock: rename 'EOR' to 'EOM' bit. This current implemented bit is used to mark end of messages ('EOM' - end of message), not records('EOR' - end of record). Also rename 'record' to 'message' in implementation as it is different things. Signed-off-by: Arseny Krasnov Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20210903123109.3273053-1-arseny.krasnov@kaspersky.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_vsock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h index 3dd3555b2740..8485b004a5f8 100644 --- a/include/uapi/linux/virtio_vsock.h +++ b/include/uapi/linux/virtio_vsock.h @@ -97,7 +97,7 @@ enum virtio_vsock_shutdown { /* VIRTIO_VSOCK_OP_RW flags values */ enum virtio_vsock_rw { - VIRTIO_VSOCK_SEQ_EOR = 1, + VIRTIO_VSOCK_SEQ_EOM = 1, }; #endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ -- cgit v1.2.3 From 41116599a0731f4cd451e9d191d879ab45e31945 Mon Sep 17 00:00:00 2001 From: Arseny Krasnov Date: Fri, 3 Sep 2021 15:32:23 +0300 Subject: virtio/vsock: add 'VIRTIO_VSOCK_SEQ_EOR' bit. This bit is used to handle POSIX MSG_EOR flag passed from userspace in 'send*()' system calls. It marks end of each record and is visible to receiver using 'recvmsg()' system call. Signed-off-by: Arseny Krasnov Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20210903123225.3273425-1-arseny.krasnov@kaspersky.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_vsock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h index 8485b004a5f8..64738838bee5 100644 --- a/include/uapi/linux/virtio_vsock.h +++ b/include/uapi/linux/virtio_vsock.h @@ -98,6 +98,7 @@ enum virtio_vsock_shutdown { /* VIRTIO_VSOCK_OP_RW flags values */ enum virtio_vsock_rw { VIRTIO_VSOCK_SEQ_EOM = 1, + VIRTIO_VSOCK_SEQ_EOR = 2, }; #endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ -- cgit v1.2.3 From c8a6153b6c59d95c0e091f053f6f180952ade91e Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Tue, 31 Aug 2021 18:36:33 +0800 Subject: vduse: Introduce VDUSE - vDPA Device in Userspace This VDUSE driver enables implementing software-emulated vDPA devices in userspace. The vDPA device is created by ioctl(VDUSE_CREATE_DEV) on /dev/vduse/control. Then a char device interface (/dev/vduse/$NAME) is exported to userspace for device emulation. In order to make the device emulation more secure, the device's control path is handled in kernel. A message mechnism is introduced to forward some dataplane related control messages to userspace. And in the data path, the DMA buffer will be mapped into userspace address space through different ways depending on the vDPA bus to which the vDPA device is attached. In virtio-vdpa case, the MMU-based software IOTLB is used to achieve that. And in vhost-vdpa case, the DMA buffer is reside in a userspace memory region which can be shared to the VDUSE userspace processs via transferring the shmfd. For more details on VDUSE design and usage, please see the follow-on Documentation commit. NB(mst): when merging this with b542e383d8c0 ("eventfd: Make signal recursion protection a task bit") replace eventfd_signal_count with eventfd_signal_allowed, and drop the previous ("eventfd: Export eventfd_wake_count to modules"). Signed-off-by: Xie Yongji Acked-by: Jason Wang Link: https://lore.kernel.org/r/20210831103634.33-13-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vduse.h | 306 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 306 insertions(+) create mode 100644 include/uapi/linux/vduse.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h new file mode 100644 index 000000000000..7cfe1c1280c0 --- /dev/null +++ b/include/uapi/linux/vduse.h @@ -0,0 +1,306 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_VDUSE_H_ +#define _UAPI_VDUSE_H_ + +#include + +#define VDUSE_BASE 0x81 + +/* The ioctls for control device (/dev/vduse/control) */ + +#define VDUSE_API_VERSION 0 + +/* + * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION). + * This is used for future extension. + */ +#define VDUSE_GET_API_VERSION _IOR(VDUSE_BASE, 0x00, __u64) + +/* Set the version of VDUSE API that userspace supported. */ +#define VDUSE_SET_API_VERSION _IOW(VDUSE_BASE, 0x01, __u64) + +/** + * struct vduse_dev_config - basic configuration of a VDUSE device + * @name: VDUSE device name, needs to be NUL terminated + * @vendor_id: virtio vendor id + * @device_id: virtio device id + * @features: virtio features + * @vq_num: the number of virtqueues + * @vq_align: the allocation alignment of virtqueue's metadata + * @reserved: for future use, needs to be initialized to zero + * @config_size: the size of the configuration space + * @config: the buffer of the configuration space + * + * Structure used by VDUSE_CREATE_DEV ioctl to create VDUSE device. + */ +struct vduse_dev_config { +#define VDUSE_NAME_MAX 256 + char name[VDUSE_NAME_MAX]; + __u32 vendor_id; + __u32 device_id; + __u64 features; + __u32 vq_num; + __u32 vq_align; + __u32 reserved[13]; + __u32 config_size; + __u8 config[]; +}; + +/* Create a VDUSE device which is represented by a char device (/dev/vduse/$NAME) */ +#define VDUSE_CREATE_DEV _IOW(VDUSE_BASE, 0x02, struct vduse_dev_config) + +/* + * Destroy a VDUSE device. Make sure there are no more references + * to the char device (/dev/vduse/$NAME). + */ +#define VDUSE_DESTROY_DEV _IOW(VDUSE_BASE, 0x03, char[VDUSE_NAME_MAX]) + +/* The ioctls for VDUSE device (/dev/vduse/$NAME) */ + +/** + * struct vduse_iotlb_entry - entry of IOTLB to describe one IOVA region [start, last] + * @offset: the mmap offset on returned file descriptor + * @start: start of the IOVA region + * @last: last of the IOVA region + * @perm: access permission of the IOVA region + * + * Structure used by VDUSE_IOTLB_GET_FD ioctl to find an overlapped IOVA region. + */ +struct vduse_iotlb_entry { + __u64 offset; + __u64 start; + __u64 last; +#define VDUSE_ACCESS_RO 0x1 +#define VDUSE_ACCESS_WO 0x2 +#define VDUSE_ACCESS_RW 0x3 + __u8 perm; +}; + +/* + * Find the first IOVA region that overlaps with the range [start, last] + * and return the corresponding file descriptor. Return -EINVAL means the + * IOVA region doesn't exist. Caller should set start and last fields. + */ +#define VDUSE_IOTLB_GET_FD _IOWR(VDUSE_BASE, 0x10, struct vduse_iotlb_entry) + +/* + * Get the negotiated virtio features. It's a subset of the features in + * struct vduse_dev_config which can be accepted by virtio driver. It's + * only valid after FEATURES_OK status bit is set. + */ +#define VDUSE_DEV_GET_FEATURES _IOR(VDUSE_BASE, 0x11, __u64) + +/** + * struct vduse_config_data - data used to update configuration space + * @offset: the offset from the beginning of configuration space + * @length: the length to write to configuration space + * @buffer: the buffer used to write from + * + * Structure used by VDUSE_DEV_SET_CONFIG ioctl to update device + * configuration space. + */ +struct vduse_config_data { + __u32 offset; + __u32 length; + __u8 buffer[]; +}; + +/* Set device configuration space */ +#define VDUSE_DEV_SET_CONFIG _IOW(VDUSE_BASE, 0x12, struct vduse_config_data) + +/* + * Inject a config interrupt. It's usually used to notify virtio driver + * that device configuration space has changed. + */ +#define VDUSE_DEV_INJECT_CONFIG_IRQ _IO(VDUSE_BASE, 0x13) + +/** + * struct vduse_vq_config - basic configuration of a virtqueue + * @index: virtqueue index + * @max_size: the max size of virtqueue + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue. + */ +struct vduse_vq_config { + __u32 index; + __u16 max_size; + __u16 reserved[13]; +}; + +/* + * Setup the specified virtqueue. Make sure all virtqueues have been + * configured before the device is attached to vDPA bus. + */ +#define VDUSE_VQ_SETUP _IOW(VDUSE_BASE, 0x14, struct vduse_vq_config) + +/** + * struct vduse_vq_state_split - split virtqueue state + * @avail_index: available index + */ +struct vduse_vq_state_split { + __u16 avail_index; +}; + +/** + * struct vduse_vq_state_packed - packed virtqueue state + * @last_avail_counter: last driver ring wrap counter observed by device + * @last_avail_idx: device available index + * @last_used_counter: device ring wrap counter + * @last_used_idx: used index + */ +struct vduse_vq_state_packed { + __u16 last_avail_counter; + __u16 last_avail_idx; + __u16 last_used_counter; + __u16 last_used_idx; +}; + +/** + * struct vduse_vq_info - information of a virtqueue + * @index: virtqueue index + * @num: the size of virtqueue + * @desc_addr: address of desc area + * @driver_addr: address of driver area + * @device_addr: address of device area + * @split: split virtqueue state + * @packed: packed virtqueue state + * @ready: ready status of virtqueue + * + * Structure used by VDUSE_VQ_GET_INFO ioctl to get virtqueue's information. + */ +struct vduse_vq_info { + __u32 index; + __u32 num; + __u64 desc_addr; + __u64 driver_addr; + __u64 device_addr; + union { + struct vduse_vq_state_split split; + struct vduse_vq_state_packed packed; + }; + __u8 ready; +}; + +/* Get the specified virtqueue's information. Caller should set index field. */ +#define VDUSE_VQ_GET_INFO _IOWR(VDUSE_BASE, 0x15, struct vduse_vq_info) + +/** + * struct vduse_vq_eventfd - eventfd configuration for a virtqueue + * @index: virtqueue index + * @fd: eventfd, -1 means de-assigning the eventfd + * + * Structure used by VDUSE_VQ_SETUP_KICKFD ioctl to setup kick eventfd. + */ +struct vduse_vq_eventfd { + __u32 index; +#define VDUSE_EVENTFD_DEASSIGN -1 + int fd; +}; + +/* + * Setup kick eventfd for specified virtqueue. The kick eventfd is used + * by VDUSE kernel module to notify userspace to consume the avail vring. + */ +#define VDUSE_VQ_SETUP_KICKFD _IOW(VDUSE_BASE, 0x16, struct vduse_vq_eventfd) + +/* + * Inject an interrupt for specific virtqueue. It's used to notify virtio driver + * to consume the used vring. + */ +#define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32) + +/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */ + +/** + * enum vduse_req_type - request type + * @VDUSE_GET_VQ_STATE: get the state for specified virtqueue from userspace + * @VDUSE_SET_STATUS: set the device status + * @VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for + * specified IOVA range via VDUSE_IOTLB_GET_FD ioctl + */ +enum vduse_req_type { + VDUSE_GET_VQ_STATE, + VDUSE_SET_STATUS, + VDUSE_UPDATE_IOTLB, +}; + +/** + * struct vduse_vq_state - virtqueue state + * @index: virtqueue index + * @split: split virtqueue state + * @packed: packed virtqueue state + */ +struct vduse_vq_state { + __u32 index; + union { + struct vduse_vq_state_split split; + struct vduse_vq_state_packed packed; + }; +}; + +/** + * struct vduse_dev_status - device status + * @status: device status + */ +struct vduse_dev_status { + __u8 status; +}; + +/** + * struct vduse_iova_range - IOVA range [start, last] + * @start: start of the IOVA range + * @last: last of the IOVA range + */ +struct vduse_iova_range { + __u64 start; + __u64 last; +}; + +/** + * struct vduse_dev_request - control request + * @type: request type + * @request_id: request id + * @reserved: for future use + * @vq_state: virtqueue state, only index field is available + * @s: device status + * @iova: IOVA range for updating + * @padding: padding + * + * Structure used by read(2) on /dev/vduse/$NAME. + */ +struct vduse_dev_request { + __u32 type; + __u32 request_id; + __u32 reserved[4]; + union { + struct vduse_vq_state vq_state; + struct vduse_dev_status s; + struct vduse_iova_range iova; + __u32 padding[32]; + }; +}; + +/** + * struct vduse_dev_response - response to control request + * @request_id: corresponding request id + * @result: the result of request + * @reserved: for future use, needs to be initialized to zero + * @vq_state: virtqueue state + * @padding: padding + * + * Structure used by write(2) on /dev/vduse/$NAME. + */ +struct vduse_dev_response { + __u32 request_id; +#define VDUSE_REQ_RESULT_OK 0x00 +#define VDUSE_REQ_RESULT_FAILED 0x01 + __u32 result; + __u32 reserved[4]; + union { + struct vduse_vq_state vq_state; + __u32 padding[32]; + }; +}; + +#endif /* _UAPI_VDUSE_H_ */ -- cgit v1.2.3 From 9d1b3afd73047d4dd30e3636412c9f9b5def2b14 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 3 Sep 2021 19:20:56 -0700 Subject: cxl/uapi: Fix defined but not used warnings Fix unused-const-variable warnings emitted by gcc when cxlmem.h is used by pretty much all files except pci.c Signed-off-by: Ben Widawsky Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/163072205652.2250120.16833548560832424468.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index f6e8a005b113..8d206f27bb6d 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -50,7 +50,7 @@ enum { CXL_CMDS }; #define ___C(a, b) { b } static const struct { const char *name; -} cxl_command_names[] = { CXL_CMDS }; +} cxl_command_names[] __attribute__((__unused__)) = { CXL_CMDS }; /* * Here's how this actually breaks out: -- cgit v1.2.3