From ed35e2f2f0ded15df313ae6f8da21e85c8e1e493 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Tue, 21 Feb 2023 17:52:05 +0100 Subject: landlock: Clarify documentation for the LANDLOCK_ACCESS_FS_REFER right MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clarify the "refer" documentation by splitting up a big paragraph of text. - Call out specifically that the denial by default applies to ABI v1 as well. - Turn the three additional constraints for link/rename operations into bullet points, to give it more structure. Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20230221165205.4231-1-gnoack3000@gmail.com Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 46 +++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 15 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index f3223f964691..81d09ef9aa50 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -130,21 +130,37 @@ struct landlock_path_beneath_attr { * - %LANDLOCK_ACCESS_FS_MAKE_BLOCK: Create (or rename or link) a block device. * - %LANDLOCK_ACCESS_FS_MAKE_SYM: Create (or rename or link) a symbolic link. * - %LANDLOCK_ACCESS_FS_REFER: Link or rename a file from or to a different - * directory (i.e. reparent a file hierarchy). This access right is - * available since the second version of the Landlock ABI. This is also the - * only access right which is always considered handled by any ruleset in - * such a way that reparenting a file hierarchy is always denied by default. - * To avoid privilege escalation, it is not enough to add a rule with this - * access right. When linking or renaming a file, the destination directory - * hierarchy must also always have the same or a superset of restrictions of - * the source hierarchy. If it is not the case, or if the domain doesn't - * handle this access right, such actions are denied by default with errno - * set to ``EXDEV``. Linking also requires a ``LANDLOCK_ACCESS_FS_MAKE_*`` - * access right on the destination directory, and renaming also requires a - * ``LANDLOCK_ACCESS_FS_REMOVE_*`` access right on the source's (file or - * directory) parent. Otherwise, such actions are denied with errno set to - * ``EACCES``. The ``EACCES`` errno prevails over ``EXDEV`` to let user space - * efficiently deal with an unrecoverable error. + * directory (i.e. reparent a file hierarchy). + * + * This access right is available since the second version of the Landlock + * ABI. + * + * This is the only access right which is denied by default by any ruleset, + * even if the right is not specified as handled at ruleset creation time. + * The only way to make a ruleset grant this right is to explicitly allow it + * for a specific directory by adding a matching rule to the ruleset. + * + * In particular, when using the first Landlock ABI version, Landlock will + * always deny attempts to reparent files between different directories. + * + * In addition to the source and destination directories having the + * %LANDLOCK_ACCESS_FS_REFER access right, the attempted link or rename + * operation must meet the following constraints: + * + * * The reparented file may not gain more access rights in the destination + * directory than it previously had in the source directory. If this is + * attempted, the operation results in an ``EXDEV`` error. + * + * * When linking or renaming, the ``LANDLOCK_ACCESS_FS_MAKE_*`` right for the + * respective file type must be granted for the destination directory. + * Otherwise, the operation results in an ``EACCES`` error. + * + * * When renaming, the ``LANDLOCK_ACCESS_FS_REMOVE_*`` right for the + * respective file type must be granted for the source directory. Otherwise, + * the operation results in an ``EACCES`` error. + * + * If multiple requirements are not met, the ``EACCES`` error code takes + * precedence over ``EXDEV``. * * .. warning:: * -- cgit v1.2.3 From b5964b968ac64c2ec2debee7518499113b27c34e Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:50 -0800 Subject: bpf: Add skb dynptrs Add skb dynptrs, which are dynptrs whose underlying pointer points to a skb. The dynptr acts on skb data. skb dynptrs have two main benefits. One is that they allow operations on sizes that are not statically known at compile-time (eg variable-sized accesses). Another is that parsing the packet data through dynptrs (instead of through direct access of skb->data and skb->data_end) can be more ergonomic and less brittle (eg does not need manual if checking for being within bounds of data_end). For bpf prog types that don't support writes on skb data, the dynptr is read-only (bpf_dynptr_write() will return an error) For reads and writes through the bpf_dynptr_read() and bpf_dynptr_write() interfaces, reading and writing from/to data in the head as well as from/to non-linear paged buffers is supported. Data slices through the bpf_dynptr_data API are not supported; instead bpf_dynptr_slice() and bpf_dynptr_slice_rdwr() (added in subsequent commit) should be used. For examples of how skb dynptrs can be used, please see the attached selftests. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20230301154953.641654-8-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 62ce1f5d1b1d..d0351d30e551 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5325,11 +5325,17 @@ union bpf_attr { * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. - * *flags* is currently unused. + * + * *flags* must be 0 except for skb-type dynptrs. + * + * For skb-type dynptrs: + * * For *flags*, please see the flags accepted by + * **bpf_skb_store_bytes**\ (). * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* - * is a read-only dynptr or if *flags* is not 0. + * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs, + * other errors correspond to errors returned by **bpf_skb_store_bytes**\ (). * * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len) * Description @@ -5337,6 +5343,9 @@ union bpf_attr { * * *len* must be a statically known value. The returned data slice * is invalidated whenever the dynptr is invalidated. + * + * skb type dynptrs may not use bpf_dynptr_data. They should + * instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr. * Return * Pointer to the underlying dynptr data, NULL if the dynptr is * read-only, if the dynptr is invalid, or if the offset and length -- cgit v1.2.3 From 05421aecd4ed65da0dc17b0c3c13779ef334e9e5 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:51 -0800 Subject: bpf: Add xdp dynptrs Add xdp dynptrs, which are dynptrs whose underlying pointer points to a xdp_buff. The dynptr acts on xdp data. xdp dynptrs have two main benefits. One is that they allow operations on sizes that are not statically known at compile-time (eg variable-sized accesses). Another is that parsing the packet data through dynptrs (instead of through direct access of xdp->data and xdp->data_end) can be more ergonomic and less brittle (eg does not need manual if checking for being within bounds of data_end). For reads and writes on the dynptr, this includes reading/writing from/to and across fragments. Data slices through the bpf_dynptr_data API are not supported; instead bpf_dynptr_slice() and bpf_dynptr_slice_rdwr() should be used. For examples of how xdp dynptrs can be used, please see the attached selftests. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20230301154953.641654-9-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d0351d30e551..faa304c926cf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5344,7 +5344,7 @@ union bpf_attr { * *len* must be a statically known value. The returned data slice * is invalidated whenever the dynptr is invalidated. * - * skb type dynptrs may not use bpf_dynptr_data. They should + * skb and xdp type dynptrs may not use bpf_dynptr_data. They should * instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr. * Return * Pointer to the underlying dynptr data, NULL if the dynptr is -- cgit v1.2.3 From 66e3a13e7c2c44d0c9dd6bb244680ca7529a8845 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 1 Mar 2023 07:49:52 -0800 Subject: bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr Two new kfuncs are added, bpf_dynptr_slice and bpf_dynptr_slice_rdwr. The user must pass in a buffer to store the contents of the data slice if a direct pointer to the data cannot be obtained. For skb and xdp type dynptrs, these two APIs are the only way to obtain a data slice. However, for other types of dynptrs, there is no difference between bpf_dynptr_slice(_rdwr) and bpf_dynptr_data. For skb type dynptrs, the data is copied into the user provided buffer if any of the data is not in the linear portion of the skb. For xdp type dynptrs, the data is copied into the user provided buffer if the data is between xdp frags. If the skb is cloned and a call to bpf_dynptr_data_rdwr is made, then the skb will be uncloned (see bpf_unclone_prologue()). Please note that any bpf_dynptr_write() automatically invalidates any prior data slices of the skb dynptr. This is because the skb may be cloned or may need to pull its paged buffer into the head. As such, any bpf_dynptr_write() will automatically have its prior data slices invalidated, even if the write is to data in the skb head of an uncloned skb. Please note as well that any other helper calls that change the underlying packet buffer (eg bpf_skb_pull_data()) invalidates any data slices of the skb dynptr as well, for the same reasons. Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20230301154953.641654-10-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index faa304c926cf..c9699304aed2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5329,6 +5329,11 @@ union bpf_attr { * *flags* must be 0 except for skb-type dynptrs. * * For skb-type dynptrs: + * * All data slices of the dynptr are automatically + * invalidated after **bpf_dynptr_write**\ (). This is + * because writing may pull the skb and change the + * underlying packet buffer. + * * * For *flags*, please see the flags accepted by * **bpf_skb_store_bytes**\ (). * Return -- cgit v1.2.3 From f71f8530494bb5ab43d3369ef0ce8373eb1ee077 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 2 Mar 2023 13:46:13 +0200 Subject: bpf: Add support for absolute value BPF timers Add a new flag BPF_F_TIMER_ABS that can be passed to bpf_timer_start() to start an absolute value timer instead of the default relative value. This makes the timer expire at an exact point in time, instead of a time with latencies induced by both the BPF and timer subsystems. Suggested-by: Artem Bityutskiy Signed-off-by: Tero Kristo Link: https://lore.kernel.org/r/20230302114614.2985072-2-tero.kristo@linux.intel.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c9699304aed2..976b194eb775 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4969,6 +4969,12 @@ union bpf_attr { * different maps if key/value layout matches across maps. * Every bpf_timer_set_callback() can have different callback_fn. * + * *flags* can be one of: + * + * **BPF_F_TIMER_ABS** + * Start the timer in absolute expire value instead of the + * default relative one. + * * Return * 0 on success. * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier @@ -7097,4 +7103,13 @@ struct bpf_core_relo { enum bpf_core_relo_kind kind; }; +/* + * Flags to control bpf_timer_start() behaviour. + * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is + * relative to current time. + */ +enum { + BPF_F_TIMER_ABS = (1ULL << 0), +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 01c7a5978993209f47ecfd95dcbd52f6c6672384 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 6 Mar 2023 15:48:11 -0500 Subject: fs: dlm: remove deprecated code parts This patch removes code parts which was declared deprecated by commit 6b0afc0cc3e9 ("fs: dlm: don't use deprecated timeout features by default"). This contains the following dlm functionality: - start a cancel of a dlm request did not complete after certain timeout: The current way how dlm cancellation works and interfering with other dlm requests triggered by the user can end in an overlapping and returning in -EBUSY. The most user don't handle this case and are unaware that DLM can return such errno in such situation. Due the timeout the user are mostly unaware when this happens. - start a netlink warning messages for user space if dlm requests did not complete after certain timeout: This feature was never being built in the only known dlm user space side. As we are to remove the timeout cancellation feature we can directly remove this feature as well. There might be the possibility to bring the timeout cancellation feature back. However the current way of handling the -EBUSY case which is only a software limitation and not a hardware limitation should be changed. We minimize the current code base in DLM cancellation feature to not have to deal with those existing features while solving the DLM cancellation feature in general. UAPI define DLM_LSFL_TIMEWARN is commented as deprecated and reserved value. We should avoid at first to give it a new meaning but let possible users still compile by keeping this define. In far future we can give this flag a new meaning. The same for the DLM_LKF_TIMEOUT lock request flag. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- include/uapi/linux/dlm.h | 1 + include/uapi/linux/dlm_netlink.h | 60 --------------------------------------- include/uapi/linux/dlmconstants.h | 5 +++- 3 files changed, 5 insertions(+), 61 deletions(-) delete mode 100644 include/uapi/linux/dlm_netlink.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dlm.h b/include/uapi/linux/dlm.h index 1923f4f3b05e..e7e905fb0bb2 100644 --- a/include/uapi/linux/dlm.h +++ b/include/uapi/linux/dlm.h @@ -68,6 +68,7 @@ struct dlm_lksb { /* dlm_new_lockspace() flags */ +/* DLM_LSFL_TIMEWARN is deprecated and reserved. DO NOT USE! */ #define DLM_LSFL_TIMEWARN 0x00000002 #define DLM_LSFL_NEWEXCL 0x00000008 diff --git a/include/uapi/linux/dlm_netlink.h b/include/uapi/linux/dlm_netlink.h deleted file mode 100644 index 5dc3a67d353d..000000000000 --- a/include/uapi/linux/dlm_netlink.h +++ /dev/null @@ -1,60 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2007 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - */ - -#ifndef _DLM_NETLINK_H -#define _DLM_NETLINK_H - -#include -#include - -enum { - DLM_STATUS_WAITING = 1, - DLM_STATUS_GRANTED = 2, - DLM_STATUS_CONVERT = 3, -}; - -#define DLM_LOCK_DATA_VERSION 1 - -struct dlm_lock_data { - __u16 version; - __u32 lockspace_id; - int nodeid; - int ownpid; - __u32 id; - __u32 remid; - __u64 xid; - __s8 status; - __s8 grmode; - __s8 rqmode; - unsigned long timestamp; - int resource_namelen; - char resource_name[DLM_RESNAME_MAXLEN]; -}; - -enum { - DLM_CMD_UNSPEC = 0, - DLM_CMD_HELLO, /* user->kernel */ - DLM_CMD_TIMEOUT, /* kernel->user */ - __DLM_CMD_MAX, -}; - -#define DLM_CMD_MAX (__DLM_CMD_MAX - 1) - -enum { - DLM_TYPE_UNSPEC = 0, - DLM_TYPE_LOCK, - __DLM_TYPE_MAX, -}; - -#define DLM_TYPE_MAX (__DLM_TYPE_MAX - 1) - -#define DLM_GENL_VERSION 0x1 -#define DLM_GENL_NAME "DLM" - -#endif /* _DLM_NETLINK_H */ diff --git a/include/uapi/linux/dlmconstants.h b/include/uapi/linux/dlmconstants.h index a8ae47c32a37..6ca77a6388bc 100644 --- a/include/uapi/linux/dlmconstants.h +++ b/include/uapi/linux/dlmconstants.h @@ -87,7 +87,6 @@ * DLM_LKF_NODLCKWT * * Do not cancel the lock if it gets into conversion deadlock. - * Exclude this lock from being monitored due to DLM_LSFL_TIMEWARN. * * DLM_LKF_NODLCKBLK * @@ -132,6 +131,10 @@ * Unlock the lock even if it is converting or waiting or has sublocks. * Only really for use by the userland device.c code. * + * DLM_LKF_TIMEOUT + * + * This value is deprecated and reserved. DO NOT USE! + * */ #define DLM_LKF_NOQUEUE 0x00000001 -- cgit v1.2.3 From d509c55cda22096a1836e35b03f66e1ef411c0c2 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 1 Mar 2023 12:09:13 +0200 Subject: wifi: nl80211: Update the documentation of NL80211_SCAN_FLAG_COLOCATED_6GHZ Add a detailed description of NL80211_SCAN_FLAG_COLOCATED_6GHZ flag. Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.487ab04feb39.I5129fd61841332474693046241586f057b134c3c@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f14621a954e1..c22eeb18b996 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -6510,8 +6510,14 @@ enum nl80211_timeout_reason { * @NL80211_SCAN_FLAG_FREQ_KHZ: report scan results with * %NL80211_ATTR_SCAN_FREQ_KHZ. This also means * %NL80211_ATTR_SCAN_FREQUENCIES will not be included. - * @NL80211_SCAN_FLAG_COLOCATED_6GHZ: scan for colocated APs reported by - * 2.4/5 GHz APs + * @NL80211_SCAN_FLAG_COLOCATED_6GHZ: scan for collocated APs reported by + * 2.4/5 GHz APs. When the flag is set, the scan logic will use the + * information from the RNR element found in beacons/probe responses + * received on the 2.4/5 GHz channels to actively scan only the 6GHz + * channels on which APs are expected to be found. Note that when not set, + * the scan logic would scan all 6GHz channels, but since transmission of + * probe requests on non PSC channels is limited, it is highly likely that + * these channels would passively be scanned. */ enum nl80211_scan_flags { NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, -- cgit v1.2.3 From cbbaf2bb829b6c4ef911d4a725fc9b1fadc1e43f Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Wed, 1 Mar 2023 12:09:21 +0200 Subject: wifi: nl80211: add a command to enable/disable HW timestamping Add a command to enable and disable HW timestamping of TM and FTM frames. HW timestamping can be enabled for a specific mac address or for all addresses. The low level driver will indicate how many peers HW timestamping can be enabled concurrently, and this information will be passed to userspace. Signed-off-by: Avraham Stern Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.05678d7b1c17.Iccc08869ea8156f1c71a3111a47f86dd56234bd0@changeid [switch to needing netdev UP, minor edits] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c22eeb18b996..c8520c150f9c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1299,6 +1299,16 @@ * @NL80211_CMD_MODIFY_LINK_STA: Modify a link of an MLD station * @NL80211_CMD_REMOVE_LINK_STA: Remove a link of an MLD station * + * @NL80211_CMD_SET_HW_TIMESTAMP: Enable/disable HW timestamping of Timing + * measurement and Fine timing measurement frames. If %NL80211_ATTR_MAC + * is included, enable/disable HW timestamping only for frames to/from the + * specified MAC address. Otherwise enable/disable HW timestamping for + * all TM/FTM frames (including ones that were enabled with specific MAC + * address). If %NL80211_ATTR_HW_TIMESTAMP_ENABLED is not included, disable + * HW timestamping. + * The number of peers that HW timestamping can be enabled for concurrently + * is indicated by %NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1550,6 +1560,8 @@ enum nl80211_commands { NL80211_CMD_MODIFY_LINK_STA, NL80211_CMD_REMOVE_LINK_STA, + NL80211_CMD_SET_HW_TIMESTAMP, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2775,6 +2787,13 @@ enum nl80211_commands { * indicates that the sub-channel is punctured. Higher 16 bits are * reserved. * + * @NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS: Maximum number of peers that HW + * timestamping can be enabled for concurrently (u16), a wiphy attribute. + * A value of 0xffff indicates setting for all peers (i.e. not specifying + * an address with %NL80211_CMD_SET_HW_TIMESTAMP) is supported. + * @NL80211_ATTR_HW_TIMESTAMP_ENABLED: Indicates whether HW timestamping should + * be enabled or not (flag attribute). + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3306,6 +3325,9 @@ enum nl80211_attrs { NL80211_ATTR_PUNCT_BITMAP, + NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS, + NL80211_ATTR_HW_TIMESTAMP_ENABLED, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 6933486133ecf71bbe273d7ac72cfc4a51286af3 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Thu, 12 Jan 2023 06:54:13 +0530 Subject: wifi: nl80211: Add support for randomizing TA of auth and deauth frames Add support to use a random local address in authentication and deauthentication frames sent to unassociated peer when the driver supports. The driver needs to configure receive behavior to accept frames with random transmit address specified in TX path authentication frames during the time of the frame exchange is pending and such frames need to be acknowledged similarly to frames sent to the local permanent address when this random address functionality is used. This capability allows use of randomized transmit address for PASN authentication frames to improve privacy of WLAN clients. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20230112012415.167556-2-quic_vjakkam@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c8520c150f9c..9a0ac0363f1f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -6348,6 +6348,10 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_SECURE_NAN: Device supports NAN Pairing which enables * authentication, data encryption and message integrity. * + * @NL80211_EXT_FEATURE_AUTH_AND_DEAUTH_RANDOM_TA: Device supports randomized TA + * in authentication and deauthentication frames sent to unassociated peer + * using @NL80211_CMD_FRAME. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6418,6 +6422,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_POWERED_ADDR_CHANGE, NL80211_EXT_FEATURE_PUNCT, NL80211_EXT_FEATURE_SECURE_NAN, + NL80211_EXT_FEATURE_AUTH_AND_DEAUTH_RANDOM_TA, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 4386b921857793440ebd4db3d6b70639149c7074 Mon Sep 17 00:00:00 2001 From: Sriram Yagnaraman Date: Fri, 24 Feb 2023 10:52:51 +0100 Subject: netfilter: bridge: introduce broute meta statement nftables equivalent for ebtables -t broute. Implement broute meta statement to set br_netfilter_broute flag in skb to force a packet to be routed instead of being bridged. Signed-off-by: Sriram Yagnaraman Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index ff677f3a6cad..9c6f02c26054 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -931,6 +931,7 @@ enum nft_exthdr_attributes { * @NFT_META_TIME_HOUR: hour of day (in seconds) * @NFT_META_SDIF: slave device interface index * @NFT_META_SDIFNAME: slave device interface name + * @NFT_META_BRI_BROUTE: packet br_netfilter_broute bit */ enum nft_meta_keys { NFT_META_LEN, @@ -969,6 +970,7 @@ enum nft_meta_keys { NFT_META_TIME_HOUR, NFT_META_SDIF, NFT_META_SDIFNAME, + NFT_META_BRI_BROUTE, __NFT_META_IIFTYPE, }; -- cgit v1.2.3 From 6018e1f407cccf39b804d1f75ad4de7be4e6cc45 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 8 Mar 2023 10:41:17 -0800 Subject: bpf: implement numbers iterator Implement the first open-coded iterator type over a range of integers. It's public API consists of: - bpf_iter_num_new() constructor, which accepts [start, end) range (that is, start is inclusive, end is exclusive). - bpf_iter_num_next() which will keep returning read-only pointer to int until the range is exhausted, at which point NULL will be returned. If bpf_iter_num_next() is kept calling after this, NULL will be persistently returned. - bpf_iter_num_destroy() destructor, which needs to be called at some point to clean up iterator state. BPF verifier enforces that iterator destructor is called at some point before BPF program exits. Note that `start = end = X` is a valid combination to setup an empty iterator. bpf_iter_num_new() will return 0 (success) for any such combination. If bpf_iter_num_new() detects invalid combination of input arguments, it returns error, resets iterator state to, effectively, empty iterator, so any subsequent call to bpf_iter_num_next() will keep returning NULL. BPF verifier has no knowledge that returned integers are in the [start, end) value range, as both `start` and `end` are not statically known and enforced: they are runtime values. While the implementation is pretty trivial, some care needs to be taken to avoid overflows and underflows. Subsequent selftests will validate correctness of [start, end) semantics, especially around extremes (INT_MIN and INT_MAX). Similarly to bpf_loop(), we enforce that no more than BPF_MAX_LOOPS can be specified. bpf_iter_num_{new,next,destroy}() is a logical evolution from bounded BPF loops and bpf_loop() helper and is the basis for implementing ergonomic BPF loops with no statically known or verified bounds. Subsequent patches implement bpf_for() macro, demonstrating how this can be wrapped into something that works and feels like a normal for() loop in C language. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230308184121.1165081-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 976b194eb775..4abddb668a10 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7112,4 +7112,12 @@ enum { BPF_F_TIMER_ABS = (1ULL << 0), }; +/* BPF numbers iterator state */ +struct bpf_iter_num { + /* opaque iterator state; having __u64 here allows to preserve correct + * alignment requirements in vmlinux.h, generated from BTF + */ + __u64 __opaque[1]; +} __attribute__((aligned(8))); + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 4821a076eb602a6238528e9ebafeac853c833415 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:23:26 -0500 Subject: sctp: add fair capacity stream scheduler As it says in rfc8260#section-3.5 about the fair capacity scheduler: A fair capacity distribution between the streams is used. This scheduler considers the lengths of the messages of each stream and schedules them in a specific way to maintain an equal capacity for all streams. The details are implementation dependent. interleaving user messages allows for a better realization of the fair capacity usage. This patch adds Fair Capacity Scheduler based on the foundations added by commit 5bbbbe32a431 ("sctp: introduce stream scheduler foundations"): A fc_list and a fc_length are added into struct sctp_stream_out_ext and a fc_list is added into struct sctp_stream. In .enqueue, when there are chunks enqueued into a stream, this stream will be linked into stream-> fc_list by its fc_list ordered by its fc_length. In .dequeue, it always picks up the 1st skb from stream->fc_list. In .dequeue_done, fc_length is increased by chunk's len and update its location in stream->fc_list according to the its new fc_length. Note that when the new fc_length overflows in .dequeue_done, instead of resetting all fc_lengths to 0, we only reduced them by U32_MAX / 4 to avoid a moment of imbalance in the scheduling, as Marcelo suggested. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Paolo Abeni --- include/uapi/linux/sctp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index ed7d4ecbf53d..6814c5a1c4bc 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -1211,7 +1211,8 @@ enum sctp_sched_type { SCTP_SS_DEFAULT = SCTP_SS_FCFS, SCTP_SS_PRIO, SCTP_SS_RR, - SCTP_SS_MAX = SCTP_SS_RR + SCTP_SS_FC, + SCTP_SS_MAX = SCTP_SS_FC }; /* Probe Interval socket option */ -- cgit v1.2.3 From 42d452e7709fdb4d42376d2a97369e22cc80a5d2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:23:27 -0500 Subject: sctp: add weighted fair queueing stream scheduler As it says in rfc8260#section-3.6 about the weighted fair queueing scheduler: A Weighted Fair Queueing scheduler between the streams is used. The weight is configurable per outgoing SCTP stream. This scheduler considers the lengths of the messages of each stream and schedules them in a specific way to use the capacity according to the given weights. If the weight of stream S1 is n times the weight of stream S2, the scheduler should assign to stream S1 n times the capacity it assigns to stream S2. The details are implementation dependent. Interleaving user messages allows for a better realization of the capacity usage according to the given weights. This patch adds Weighted Fair Queueing Scheduler actually based on the code of Fair Capacity Scheduler by adding fc_weight into struct sctp_stream_out_ext and taking it into account when sorting stream-> fc_list in sctp_sched_fc_sched() and sctp_sched_fc_dequeue_done(). Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Paolo Abeni --- include/uapi/linux/sctp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 6814c5a1c4bc..b7d91d4cf0db 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -1212,7 +1212,8 @@ enum sctp_sched_type { SCTP_SS_PRIO, SCTP_SS_RR, SCTP_SS_FC, - SCTP_SS_MAX = SCTP_SS_FC + SCTP_SS_WFQ, + SCTP_SS_MAX = SCTP_SS_WFQ }; /* Probe Interval socket option */ -- cgit v1.2.3 From 9b12f050c76f090cc6d0aebe0ef76fed79ec3f15 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 22 Feb 2023 10:23:02 +0100 Subject: char: pcmcia: remove all the drivers These char PCMCIA drivers are buggy[1] and receive only minimal care. It was concluded[2], that we should try to remove most pcmcia drivers completely. Let's start with these char broken one. Note that I also removed a UAPI header: include/uapi/linux/cm4000_cs.h. I found only coccinelle tests mentioning some ioctl constants from that file. But they are not actually used. Anyway, should someone complain, we may reintroduce the header (or its parts). [1] https://lore.kernel.org/all/f41c2765-80e0-48bc-b1e4-8cfd3230fd4a@www.fastmail.com/ [2] https://lore.kernel.org/all/c5b39544-a4fb-4796-a046-0b9be9853787@app.fastmail.com/ Signed-off-by: Jiri Slaby (SUSE) Cc: "Hyunwoo Kim" Cc: Harald Welte Cc: Lubomir Rintel Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Acked-by: Dominik Brodowski Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230222092302.6348-2-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/cm4000_cs.h | 64 ------------------------------------------ 1 file changed, 64 deletions(-) delete mode 100644 include/uapi/linux/cm4000_cs.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cm4000_cs.h b/include/uapi/linux/cm4000_cs.h deleted file mode 100644 index c70a62ec8a49..000000000000 --- a/include/uapi/linux/cm4000_cs.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_CM4000_H_ -#define _UAPI_CM4000_H_ - -#include -#include - -#define MAX_ATR 33 - -#define CM4000_MAX_DEV 4 - -/* those two structures are passed via ioctl() from/to userspace. They are - * used by existing userspace programs, so I kepth the awkward "bIFSD" naming - * not to break compilation of userspace apps. -HW */ - -typedef struct atreq { - __s32 atr_len; - unsigned char atr[64]; - __s32 power_act; - unsigned char bIFSD; - unsigned char bIFSC; -} atreq_t; - - -/* what is particularly stupid in the original driver is the arch-dependent - * member sizes. This leads to CONFIG_COMPAT breakage, since 32bit userspace - * will lay out the structure members differently than the 64bit kernel. - * - * I've changed "ptsreq.protocol" from "unsigned long" to "__u32". - * On 32bit this will make no difference. With 64bit kernels, it will make - * 32bit apps work, too. - */ - -typedef struct ptsreq { - __u32 protocol; /*T=0: 2^0, T=1: 2^1*/ - unsigned char flags; - unsigned char pts1; - unsigned char pts2; - unsigned char pts3; -} ptsreq_t; - -#define CM_IOC_MAGIC 'c' -#define CM_IOC_MAXNR 255 - -#define CM_IOCGSTATUS _IOR (CM_IOC_MAGIC, 0, unsigned char *) -#define CM_IOCGATR _IOWR(CM_IOC_MAGIC, 1, atreq_t *) -#define CM_IOCSPTS _IOW (CM_IOC_MAGIC, 2, ptsreq_t *) -#define CM_IOCSRDR _IO (CM_IOC_MAGIC, 3) -#define CM_IOCARDOFF _IO (CM_IOC_MAGIC, 4) - -#define CM_IOSDBGLVL _IOW(CM_IOC_MAGIC, 250, int*) - -/* card and device states */ -#define CM_CARD_INSERTED 0x01 -#define CM_CARD_POWERED 0x02 -#define CM_ATR_PRESENT 0x04 -#define CM_ATR_VALID 0x08 -#define CM_STATE_VALID 0x0f -/* extra info only from CM4000 */ -#define CM_NO_READER 0x10 -#define CM_BAD_CARD 0x20 - - -#endif /* _UAPI_CM4000_H_ */ -- cgit v1.2.3 From 5a70f4a63000ba68004fb3c1aaf2f90303dd228f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Wei=C3=9F?= Date: Thu, 9 Mar 2023 14:38:23 +0100 Subject: bpf: Fix a typo for BPF_F_ANY_ALIGNMENT in bpf.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix s/BPF_PROF_LOAD/BPF_PROG_LOAD/ typo in the documentation comment for BPF_F_ANY_ALIGNMENT in bpf.h. Signed-off-by: Michael Weiß Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230309133823.944097-1-michael.weiss@aisec.fraunhofer.de --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4abddb668a10..d8c534e05b0a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1108,7 +1108,7 @@ enum bpf_link_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) -/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will allow any alignment whatsoever. On platforms * with strict alignment requirements for loads ands stores (such * as sparc and mips) the verifier validates that all loads and -- cgit v1.2.3 From 5fe5a7586c27a13a42b7946334d41a96c776a188 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Fri, 10 Mar 2023 17:07:53 +0100 Subject: Move COMPAT_ATM_ADDPARTY to net/atm/svc.c This used to be behind an #ifdef COMPAT_COMPAT, so most of userspace wouldn't have seen the definition before. Unfortunately this header file became visible to userspace, so the definition has instead been moved to net/atm/svc.c (the only user). Signed-off-by: Palmer Dabbelt Reviewed-by: Andrew Waterman Reviewed-by: Albert Ou Message-Id: <1447119071-19392-4-git-send-email-palmer@dabbelt.com> Reviewed-by: Arnd Bergmann Reviewed-by: Christoph Hellwig Signed-off-by: Thomas Huth Signed-off-by: Arnd Bergmann --- include/uapi/linux/atmdev.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/atmdev.h b/include/uapi/linux/atmdev.h index a5c15cf23bd7..20b0215084fc 100644 --- a/include/uapi/linux/atmdev.h +++ b/include/uapi/linux/atmdev.h @@ -101,10 +101,6 @@ struct atm_dev_stats { /* use backend to make new if */ #define ATM_ADDPARTY _IOW('a', ATMIOC_SPECIAL+4,struct atm_iobuf) /* add party to p2mp call */ -#ifdef CONFIG_COMPAT -/* It actually takes struct sockaddr_atmsvc, not struct atm_iobuf */ -#define COMPAT_ATM_ADDPARTY _IOW('a', ATMIOC_SPECIAL+4,struct compat_atm_iobuf) -#endif #define ATM_DROPPARTY _IOW('a', ATMIOC_SPECIAL+5,int) /* drop party from p2mp call */ -- cgit v1.2.3 From 063f3ed9faf38e41449838bb3258a7d00f75c5d1 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Fri, 10 Mar 2023 17:07:54 +0100 Subject: Move ep_take_care_of_epollwakeup() to fs/eventpoll.c This doesn't make any sense to expose to userspace, so it's been moved to the one user. This was introduced by commit 95f19f658ce1 ("epoll: drop EPOLLWAKEUP if PM_SLEEP is disabled"). Signed-off-by: Palmer Dabbelt Reviewed-by: Andrew Waterman Reviewed-by: Albert Ou Message-Id: <1447119071-19392-7-git-send-email-palmer@dabbelt.com> [thuth: Rebased to fix contextual conflicts] Reviewed-by: Arnd Bergmann Reviewed-by: Christoph Hellwig Signed-off-by: Thomas Huth Signed-off-by: Arnd Bergmann --- include/uapi/linux/eventpoll.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index e687658843b1..cfbcc4cc49ac 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -85,16 +85,4 @@ struct epoll_event { __u64 data; } EPOLL_PACKED; -#ifdef CONFIG_PM_SLEEP -static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev) -{ - if ((epev->events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND)) - epev->events &= ~EPOLLWAKEUP; -} -#else -static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev) -{ - epev->events &= ~EPOLLWAKEUP; -} -#endif #endif /* _UAPI_LINUX_EVENTPOLL_H */ -- cgit v1.2.3 From d3c7ec7588508bd59bbb0e468d3820f3d3cba690 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Fri, 10 Mar 2023 17:07:55 +0100 Subject: Move bp_type_idx to include/linux/hw_breakpoint.h This has a "#ifdef CONFIG_*" that used to be exposed to userspace. The names in here are so generic that I don't think it's a good idea to expose them to userspace (or even the rest of the kernel). There are multiple in-kernel users, so it's been moved to a kernel header file. Signed-off-by: Palmer Dabbelt Reviewed-by: Andrew Waterman Reviewed-by: Albert Ou Message-Id: <1447119071-19392-10-git-send-email-palmer@dabbelt.com> [thuth: Remove it also from tools/include/uapi/linux/hw_breakpoint.h] Reviewed-by: Arnd Bergmann Reviewed-by: Christoph Hellwig Signed-off-by: Thomas Huth Signed-off-by: Arnd Bergmann --- include/uapi/linux/hw_breakpoint.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/hw_breakpoint.h b/include/uapi/linux/hw_breakpoint.h index 965e4d8606d8..1575d3ca6f0d 100644 --- a/include/uapi/linux/hw_breakpoint.h +++ b/include/uapi/linux/hw_breakpoint.h @@ -22,14 +22,4 @@ enum { HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X, }; -enum bp_type_idx { - TYPE_INST = 0, -#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS - TYPE_DATA = 0, -#else - TYPE_DATA = 1, -#endif - TYPE_MAX -}; - #endif /* _UAPI_LINUX_HW_BREAKPOINT_H */ -- cgit v1.2.3 From f5bdc61eb6089e10181b8f51b0a180bbd47a89fc Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 10 Mar 2023 17:07:56 +0100 Subject: pktcdvd: Remove CONFIG_CDROM_PKTCDVD_WCACHE from uapi header CONFIG_* switches should not be exposed in uapi headers, thus let's get rid of the USE_WCACHING macro here (which was also named way to generic) and integrate the logic directly in the only function that needs it. Suggested-by: Christoph Hellwig Signed-off-by: Thomas Huth Signed-off-by: Arnd Bergmann --- include/uapi/linux/pktcdvd.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pktcdvd.h b/include/uapi/linux/pktcdvd.h index 9cbb55d21c94..6a5552dfd6af 100644 --- a/include/uapi/linux/pktcdvd.h +++ b/include/uapi/linux/pktcdvd.h @@ -29,17 +29,6 @@ */ #define PACKET_WAIT_TIME (HZ * 5 / 1000) -/* - * use drive write caching -- we need deferred error handling to be - * able to successfully recover with this option (drive will return good - * status as soon as the cdb is validated). - */ -#if defined(CONFIG_CDROM_PKTCDVD_WCACHE) -#define USE_WCACHING 1 -#else -#define USE_WCACHING 0 -#endif - /* * No user-servicable parts beyond this point -> */ -- cgit v1.2.3 From be50da3e9d4ad1958f7b11322d44d94d5c25a4c1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 9 Mar 2023 10:45:59 +0100 Subject: net: virtio_net: implement exact header length guest feature Virtio spec introduced a feature VIRTIO_NET_F_GUEST_HDRLEN which when set implicates that device benefits from knowing the exact size of the header. For compatibility, to signal to the device that the header is reliable driver also needs to set this feature. Without this feature set by driver, device has to figure out the header size itself. Quoting the original virtio spec: "hdr_len is a hint to the device as to how much of the header needs to be kept to copy into each packet" "a hint" might not be clear for the reader what does it mean, if it is "maybe like that" of "exactly like that". This feature just makes it crystal clear and let the device count on the hdr_len being filled up by the exact length of header. Also note the spec already has following note about hdr_len: "Due to various bugs in implementations, this field is not useful as a guarantee of the transport header size." Without this feature the device needs to parse the header in core data path handling. Accurate information helps the device to eliminate such header parsing and directly use the hardware accelerators for GSO operation. virtio_net_hdr_from_skb() fills up hdr_len to skb_headlen(skb). The driver already complies to fill the correct value. Introduce the feature and advertise it. Note that virtio spec also includes following note for device implementation: "Caution should be taken by the implementation so as to prevent a malicious driver from attacking the device by setting an incorrect hdr_len." There is a plan to support this feature in our emulated device. A device of SolidRun offers this feature bit. They claim this feature will save the device a few cycles for every GSO packet. Link: https://docs.oasis-open.org/virtio/virtio/v1.2/cs01/virtio-v1.2-cs01.html#x1-230006x3 Signed-off-by: Jiri Pirko Reviewed-by: Parav Pandit Reviewed-by: Alvaro Karsz Acked-by: Michael S. Tsirkin Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20230309094559.917857-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- include/uapi/linux/virtio_net.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index b4062bed186a..12c1c9699935 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -61,6 +61,7 @@ #define VIRTIO_NET_F_GUEST_USO6 55 /* Guest can handle USOv6 in. */ #define VIRTIO_NET_F_HOST_USO 56 /* Host can handle USO in. */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ +#define VIRTIO_NET_F_GUEST_HDRLEN 59 /* Guest provides the exact hdr_len value. */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ #define VIRTIO_NET_F_STANDBY 62 /* Act as standby for another device -- cgit v1.2.3 From 27d7fdf06fdb84455ff585b58c8034e2fab42583 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 13 Mar 2023 14:56:27 -0600 Subject: bpf: use canonical ftrace path The canonical location for the tracefs filesystem is at /sys/kernel/tracing. But, from Documentation/trace/ftrace.rst: Before 4.1, all ftrace tracing control files were within the debugfs file system, which is typically located at /sys/kernel/debug/tracing. For backward compatibility, when mounting the debugfs file system, the tracefs file system will be automatically mounted at: /sys/kernel/debug/tracing Many comments and samples in the bpf code still refer to this older debugfs path, so let's update them to avoid confusion. There are a few spots where the bpf code explicitly checks both tracefs and debugfs (tools/bpf/bpftool/tracelog.c and tools/lib/api/fs/fs.c) and I've left those alone so that the tools can continue to work with both paths. Signed-off-by: Ross Zwisler Acked-by: Michael S. Tsirkin Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20230313205628.1058720-2-zwisler@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d8c534e05b0a..13129df937cd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1647,17 +1647,17 @@ union bpf_attr { * Description * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) - * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if + * to file *\/sys/kernel/tracing/trace* from TraceFS, if * available. It can take up to three additional **u64** * arguments (as an eBPF helpers, the total number of arguments is * limited to five). * * Each time the helper is called, it appends a line to the trace. - * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is - * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. + * Lines are discarded while *\/sys/kernel/tracing/trace* is + * open, use *\/sys/kernel/tracing/trace_pipe* to avoid this. * The format of the trace is customizable, and the exact output * one will get depends on the options set in - * *\/sys/kernel/debug/tracing/trace_options* (see also the + * *\/sys/kernel/tracing/trace_options* (see also the * *README* file under the same directory). However, it usually * defaults to something like: * -- cgit v1.2.3 From d805456c712f93ba8a012430f2a93bec133b6ff4 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 7 Mar 2023 23:06:36 -0800 Subject: platform/x86: ISST: Enumerate TPMI SST and create framework Enumerate TPMI SST driver and create basic framework to add more features. The basic user space interface is still same as the legacy using /dev/isst_interface. Users of "intel-speed-select" utility should be able to use same commands as prior gens without being aware of new underlying hardware interface. TPMI SST driver enumerates on device "intel_vsec.tpmi-sst". Since there can be multiple instances and there is one common SST core, split implementation into two parts: A common core part and an enumeration part. The enumeration driver is loaded for each device instance and register with the TPMI SST core driver. On very first enumeration the TPMI SST core driver register with SST core driver to get IOCTL callbacks. The api_version is incremented for IOCTL ISST_IF_GET_PLATFORM_INFO, so that user space can issue new IOCTLs. Each TPMI package contains multiple power domains. Each power domain has its own set of SST controls. For each domain map the MMIO memory and update per domain struct tpmi_per_power_domain_info. This information will be used to implement other SST interfaces. Implement first IOCTL commands to get number of TPMI SST instances and instance mask as some of the power domains may not have any SST controls. Signed-off-by: Srinivas Pandruvada Reviewed-by: Hans de Goede Reviewed-by: Zhang Rui Tested-by: Pragya Tanwar Link: https://lore.kernel.org/r/20230308070642.1727167-3-srinivas.pandruvada@linux.intel.com Signed-off-by: Hans de Goede --- include/uapi/linux/isst_if.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/isst_if.h b/include/uapi/linux/isst_if.h index ba078f8e9add..bf32d959f6e8 100644 --- a/include/uapi/linux/isst_if.h +++ b/include/uapi/linux/isst_if.h @@ -163,10 +163,28 @@ struct isst_if_msr_cmds { struct isst_if_msr_cmd msr_cmd[1]; }; +/** + * struct isst_tpmi_instance_count - Get number of TPMI instances per socket + * @socket_id: Socket/package id + * @count: Number of instances + * @valid_mask: Mask of instances as there can be holes + * + * Structure used to get TPMI instances information using + * IOCTL ISST_IF_COUNT_TPMI_INSTANCES. + */ +struct isst_tpmi_instance_count { + __u8 socket_id; + __u8 count; + __u16 valid_mask; +}; + #define ISST_IF_MAGIC 0xFE #define ISST_IF_GET_PLATFORM_INFO _IOR(ISST_IF_MAGIC, 0, struct isst_if_platform_info *) #define ISST_IF_GET_PHY_ID _IOWR(ISST_IF_MAGIC, 1, struct isst_if_cpu_map *) #define ISST_IF_IO_CMD _IOW(ISST_IF_MAGIC, 2, struct isst_if_io_regs *) #define ISST_IF_MBOX_COMMAND _IOWR(ISST_IF_MAGIC, 3, struct isst_if_mbox_cmds *) #define ISST_IF_MSR_COMMAND _IOWR(ISST_IF_MAGIC, 4, struct isst_if_msr_cmds *) + +#define ISST_IF_COUNT_TPMI_INSTANCES _IOR(ISST_IF_MAGIC, 5, struct isst_tpmi_instance_count *) + #endif -- cgit v1.2.3 From 12a7d2cb811dd8a884dea088a2701fcb8d00136e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 7 Mar 2023 23:06:38 -0800 Subject: platform/x86: ISST: Add SST-CP support via TPMI Intel Speed Select Technology Core Power (SST-CP) is an interface that allows users to define per core priority. This defines a mechanism to distribute power among cores when there is a power constrained scenario. This defines a class of service (CLOS) configuration. Three new IOCTLs are added: ISST_IF_CORE_POWER_STATE : Enable/Disable SST-CP ISST_IF_CLOS_PARAM : Configure CLOS parameters ISST_IF_CLOS_ASSOC : Associate CPUs to a CLOS To associate CPUs to CLOS, either Linux CPU numbering or PUNIT numbering scheme can be used, using parameter punit_cpu_map (1: for PUNIT numbering 0 for Linux CPU number). There is no change to IOCTL to get PUNIT CPU number for a CPU. Introduce get_instance() function, which is used by majority of IOCTLs processing to convert a socket and power domain to tpmi_per_power_domain_info * instance. This instance has all the MMIO offsets stored to read a particular field. Once an instance is identified, read or write from correct MMIO offset for a given field as defined in the specification. For details on SST CP operations using intel-speed-selet utility, refer to: Documentation/admin-guide/pm/intel-speed-select.rst under the kernel documentation Signed-off-by: Srinivas Pandruvada Reviewed-by: Zhang Rui Tested-by: Pragya Tanwar Link: https://lore.kernel.org/r/20230308070642.1727167-5-srinivas.pandruvada@linux.intel.com Signed-off-by: Hans de Goede --- include/uapi/linux/isst_if.h | 79 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/isst_if.h b/include/uapi/linux/isst_if.h index bf32d959f6e8..32687d8023ef 100644 --- a/include/uapi/linux/isst_if.h +++ b/include/uapi/linux/isst_if.h @@ -163,6 +163,82 @@ struct isst_if_msr_cmds { struct isst_if_msr_cmd msr_cmd[1]; }; +/** + * struct isst_core_power - Structure to get/set core_power feature + * @get_set: 0: Get, 1: Set + * @socket_id: Socket/package id + * @power_domain: Power Domain id + * @enable: Feature enable status + * @priority_type: Priority type for the feature (ordered/proportional) + * + * Structure to get/set core_power feature state using IOCTL + * ISST_IF_CORE_POWER_STATE. + */ +struct isst_core_power { + __u8 get_set; + __u8 socket_id; + __u8 power_domain_id; + __u8 enable; + __u8 supported; + __u8 priority_type; +}; + +/** + * struct isst_clos_param - Structure to get/set clos praram + * @get_set: 0: Get, 1: Set + * @socket_id: Socket/package id + * @power_domain: Power Domain id + * clos: Clos ID for the parameters + * min_freq_mhz: Minimum frequency in MHz + * max_freq_mhz: Maximum frequency in MHz + * prop_prio: Proportional priority from 0-15 + * + * Structure to get/set per clos property using IOCTL + * ISST_IF_CLOS_PARAM. + */ +struct isst_clos_param { + __u8 get_set; + __u8 socket_id; + __u8 power_domain_id; + __u8 clos; + __u16 min_freq_mhz; + __u16 max_freq_mhz; + __u8 prop_prio; +}; + +/** + * struct isst_if_clos_assoc - Structure to assign clos to a CPU + * @socket_id: Socket/package id + * @power_domain: Power Domain id + * @logical_cpu: CPU number + * @clos: Clos ID to assign to the logical CPU + * + * Structure to get/set core_power feature. + */ +struct isst_if_clos_assoc { + __u8 socket_id; + __u8 power_domain_id; + __u16 logical_cpu; + __u16 clos; +}; + +/** + * struct isst_if_clos_assoc_cmds - Structure to assign clos to CPUs + * @cmd_count: Number of cmds (cpus) in this request + * @get_set: Request is for get or set + * @punit_cpu_map: Set to 1 if the CPU number is punit numbering not + * Linux CPU number + * + * Structure used to get/set associate CPUs to clos using IOCTL + * ISST_IF_CLOS_ASSOC. + */ +struct isst_if_clos_assoc_cmds { + __u16 cmd_count; + __u16 get_set; + __u16 punit_cpu_map; + struct isst_if_clos_assoc assoc_info[1]; +}; + /** * struct isst_tpmi_instance_count - Get number of TPMI instances per socket * @socket_id: Socket/package id @@ -186,5 +262,8 @@ struct isst_tpmi_instance_count { #define ISST_IF_MSR_COMMAND _IOWR(ISST_IF_MAGIC, 4, struct isst_if_msr_cmds *) #define ISST_IF_COUNT_TPMI_INSTANCES _IOR(ISST_IF_MAGIC, 5, struct isst_tpmi_instance_count *) +#define ISST_IF_CORE_POWER_STATE _IOWR(ISST_IF_MAGIC, 6, struct isst_core_power *) +#define ISST_IF_CLOS_PARAM _IOWR(ISST_IF_MAGIC, 7, struct isst_clos_param *) +#define ISST_IF_CLOS_ASSOC _IOWR(ISST_IF_MAGIC, 8, struct isst_if_clos_assoc_cmds *) #endif -- cgit v1.2.3 From ea009e4769fa3bd05d4c111c3b6865eb3a9be829 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 7 Mar 2023 23:06:39 -0800 Subject: platform/x86: ISST: Add SST-PP support via TPMI This Intel Speed Select Technology - Performance Profile (SST-PP) feature introduces a mechanism that allows multiple optimized performance profiles per system. Each profile defines a set of CPUs that need to be online and rest offline to sustain a guaranteed base frequency. Five new IOCTLs are added: ISST_IF_PERF_LEVELS : Get number of performance levels ISST_IF_PERF_SET_LEVEL : Set to a new performance level ISST_IF_PERF_SET_FEATURE : Activate SST-BF/SST-TF for a performance level ISST_IF_GET_PERF_LEVEL_INFO : Get parameters for a performance level ISST_IF_GET_PERF_LEVEL_CPU_MASK : Get CPU mask for a performance level Once an instance is identified, read or write from correct MMIO offset for a given field as defined in the specification. For details on SST PP operations using intel-speed-selet utility, refer to: Documentation/admin-guide/pm/intel-speed-select.rst under the kernel documentation Signed-off-by: Srinivas Pandruvada Reviewed-by: Zhang Rui Tested-by: Pragya Tanwar Link: https://lore.kernel.org/r/20230308070642.1727167-6-srinivas.pandruvada@linux.intel.com Signed-off-by: Hans de Goede --- include/uapi/linux/isst_if.h | 180 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/isst_if.h b/include/uapi/linux/isst_if.h index 32687d8023ef..c4b350ea5cbe 100644 --- a/include/uapi/linux/isst_if.h +++ b/include/uapi/linux/isst_if.h @@ -254,6 +254,178 @@ struct isst_tpmi_instance_count { __u16 valid_mask; }; +/** + * struct isst_perf_level_info - Structure to get information on SST-PP levels + * @socket_id: Socket/package id + * @power_domain: Power Domain id + * @logical_cpu: CPU number + * @clos: Clos ID to assign to the logical CPU + * @max_level: Maximum performance level supported by the platform + * @feature_rev: The feature revision for SST-PP supported by the platform + * @level_mask: Mask of supported performance levels + * @current_level: Current performance level + * @feature_state: SST-BF and SST-TF (enabled/disabled) status at current level + * @locked: SST-PP performance level change is locked/unlocked + * @enabled: SST-PP feature is enabled or not + * @sst-tf_support: SST-TF support status at this level + * @sst-bf_support: SST-BF support status at this level + * + * Structure to get SST-PP details using IOCTL ISST_IF_PERF_LEVELS. + */ +struct isst_perf_level_info { + __u8 socket_id; + __u8 power_domain_id; + __u8 max_level; + __u8 feature_rev; + __u8 level_mask; + __u8 current_level; + __u8 feature_state; + __u8 locked; + __u8 enabled; + __u8 sst_tf_support; + __u8 sst_bf_support; +}; + +/** + * struct isst_perf_level_control - Structure to set SST-PP level + * @socket_id: Socket/package id + * @power_domain: Power Domain id + * @level: level to set + * + * Structure used change SST-PP level using IOCTL ISST_IF_PERF_SET_LEVEL. + */ +struct isst_perf_level_control { + __u8 socket_id; + __u8 power_domain_id; + __u8 level; +}; + +/** + * struct isst_perf_feature_control - Structure to activate SST-BF/SST-TF + * @socket_id: Socket/package id + * @power_domain: Power Domain id + * @feature: bit 0 = SST-BF state, bit 1 = SST-TF state + * + * Structure used to enable SST-BF/SST-TF using IOCTL ISST_IF_PERF_SET_FEATURE. + */ +struct isst_perf_feature_control { + __u8 socket_id; + __u8 power_domain_id; + __u8 feature; +}; + +#define TRL_MAX_BUCKETS 8 +#define TRL_MAX_LEVELS 6 + +/** + * struct isst_perf_level_data_info - Structure to get SST-PP level details + * @socket_id: Socket/package id + * @power_domain: Power Domain id + * @level: SST-PP level for which caller wants to get information + * @tdp_ratio: TDP Ratio + * @base_freq_mhz: Base frequency in MHz + * @base_freq_avx2_mhz: AVX2 Base frequency in MHz + * @base_freq_avx512_mhz: AVX512 base frequency in MHz + * @base_freq_amx_mhz: AMX base frequency in MHz + * @thermal_design_power_w: Thermal design (TDP) power + * @tjunction_max_c: Max junction temperature + * @max_memory_freq_mhz: Max memory frequency in MHz + * @cooling_type: Type of cooling is used + * @p0_freq_mhz: core maximum frequency + * @p1_freq_mhz: Core TDP frequency + * @pn_freq_mhz: Core maximum efficiency frequency + * @pm_freq_mhz: Core minimum frequency + * @p0_fabric_freq_mhz: Fabric (Uncore) maximum frequency + * @p1_fabric_freq_mhz: Fabric (Uncore) TDP frequency + * @pn_fabric_freq_mhz: Fabric (Uncore) minimum efficiency frequency + * @pm_fabric_freq_mhz: Fabric (Uncore) minimum frequency + * @max_buckets: Maximum trl buckets + * @max_trl_levels: Maximum trl levels + * @bucket_core_counts[TRL_MAX_BUCKETS]: Number of cores per bucket + * @trl_freq_mhz[TRL_MAX_LEVELS][TRL_MAX_BUCKETS]: maximum frequency + * for a bucket and trl level + * + * Structure used to get information on frequencies and TDP for a SST-PP + * level using ISST_IF_GET_PERF_LEVEL_INFO. + */ +struct isst_perf_level_data_info { + __u8 socket_id; + __u8 power_domain_id; + __u16 level; + __u16 tdp_ratio; + __u16 base_freq_mhz; + __u16 base_freq_avx2_mhz; + __u16 base_freq_avx512_mhz; + __u16 base_freq_amx_mhz; + __u16 thermal_design_power_w; + __u16 tjunction_max_c; + __u16 max_memory_freq_mhz; + __u16 cooling_type; + __u16 p0_freq_mhz; + __u16 p1_freq_mhz; + __u16 pn_freq_mhz; + __u16 pm_freq_mhz; + __u16 p0_fabric_freq_mhz; + __u16 p1_fabric_freq_mhz; + __u16 pn_fabric_freq_mhz; + __u16 pm_fabric_freq_mhz; + __u16 max_buckets; + __u16 max_trl_levels; + __u16 bucket_core_counts[TRL_MAX_BUCKETS]; + __u16 trl_freq_mhz[TRL_MAX_LEVELS][TRL_MAX_BUCKETS]; +}; + +/** + * struct isst_perf_level_cpu_mask - Structure to get SST-PP level CPU mask + * @socket_id: Socket/package id + * @power_domain: Power Domain id + * @level: SST-PP level for which caller wants to get information + * @punit_cpu_map: Set to 1 if the CPU number is punit numbering not + * Linux CPU number. If 0 CPU buffer is copied to user space + * supplied cpu_buffer of size cpu_buffer_size. Punit + * cpu mask is copied to "mask" field. + * @mask: cpu mask for this PP level (punit CPU numbering) + * @cpu_buffer_size: size of cpu_buffer also used to return the copied CPU + * buffer size. + * @cpu_buffer: Buffer to copy CPU mask when punit_cpu_map is 0 + * + * Structure used to get cpumask for a SST-PP level using + * IOCTL ISST_IF_GET_PERF_LEVEL_CPU_MASK. Also used to get CPU mask for + * IOCTL ISST_IF_GET_BASE_FREQ_CPU_MASK for SST-BF. + */ +struct isst_perf_level_cpu_mask { + __u8 socket_id; + __u8 power_domain_id; + __u8 level; + __u8 punit_cpu_map; + __u64 mask; + __u16 cpu_buffer_size; + __s8 cpu_buffer[1]; +}; + +/** + * struct isst_base_freq_info - Structure to get SST-BF frequencies + * @socket_id: Socket/package id + * @power_domain: Power Domain id + * @level: SST-PP level for which caller wants to get information + * @high_base_freq_mhz: High priority CPU base frequency + * @low_base_freq_mhz: Low priority CPU base frequency + * @tjunction_max_c: Max junction temperature + * @thermal_design_power_w: Thermal design power in watts + * + * Structure used to get SST-BF information using + * IOCTL ISST_IF_GET_BASE_FREQ_INFO. + */ +struct isst_base_freq_info { + __u8 socket_id; + __u8 power_domain_id; + __u16 level; + __u16 high_base_freq_mhz; + __u16 low_base_freq_mhz; + __u16 tjunction_max_c; + __u16 thermal_design_power_w; +}; + #define ISST_IF_MAGIC 0xFE #define ISST_IF_GET_PLATFORM_INFO _IOR(ISST_IF_MAGIC, 0, struct isst_if_platform_info *) #define ISST_IF_GET_PHY_ID _IOWR(ISST_IF_MAGIC, 1, struct isst_if_cpu_map *) @@ -266,4 +438,12 @@ struct isst_tpmi_instance_count { #define ISST_IF_CLOS_PARAM _IOWR(ISST_IF_MAGIC, 7, struct isst_clos_param *) #define ISST_IF_CLOS_ASSOC _IOWR(ISST_IF_MAGIC, 8, struct isst_if_clos_assoc_cmds *) +#define ISST_IF_PERF_LEVELS _IOWR(ISST_IF_MAGIC, 9, struct isst_perf_level_info *) +#define ISST_IF_PERF_SET_LEVEL _IOW(ISST_IF_MAGIC, 10, struct isst_perf_level_control *) +#define ISST_IF_PERF_SET_FEATURE _IOW(ISST_IF_MAGIC, 11, struct isst_perf_feature_control *) +#define ISST_IF_GET_PERF_LEVEL_INFO _IOR(ISST_IF_MAGIC, 12, struct isst_perf_level_data_info *) +#define ISST_IF_GET_PERF_LEVEL_CPU_MASK _IOR(ISST_IF_MAGIC, 13, struct isst_perf_level_cpu_mask *) +#define ISST_IF_GET_BASE_FREQ_INFO _IOR(ISST_IF_MAGIC, 14, struct isst_base_freq_info *) +#define ISST_IF_GET_BASE_FREQ_CPU_MASK _IOR(ISST_IF_MAGIC, 15, struct isst_perf_level_cpu_mask *) + #endif -- cgit v1.2.3 From f8e0077a9d526cac7abbc682d83e98f834ffa909 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 7 Mar 2023 23:06:41 -0800 Subject: platform/x86: ISST: Add SST-TF support via TPMI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The support of Intel Speed Select Technology - Turbo Frequency (SST-TF) feature enables the ability to set different “All core turbo ratio limits” to cores based on the priority. By using this feature, some cores can be configured to get higher turbo frequency by designating them as high priority at the cost of lower or no turbo frequency on the low priority cores. One new IOCTLs are added: ISST_IF_GET_TURBO_FREQ_INFO : Get information about turbo frequency buckets Once an instance is identified, read or write from correct MMIO offset for a given field as defined in the specification. For details on SST-TF operations using intel-speed-selet utility, refer to: Documentation/admin-guide/pm/intel-speed-select.rst under the kernel documentation Signed-off-by: Srinivas Pandruvada Reviewed-by: Zhang Rui Tested-by: Pragya Tanwar Link: https://lore.kernel.org/r/20230308070642.1727167-8-srinivas.pandruvada@linux.intel.com Signed-off-by: Hans de Goede --- include/uapi/linux/isst_if.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/isst_if.h b/include/uapi/linux/isst_if.h index c4b350ea5cbe..0df1a1c3caf4 100644 --- a/include/uapi/linux/isst_if.h +++ b/include/uapi/linux/isst_if.h @@ -426,6 +426,31 @@ struct isst_base_freq_info { __u16 thermal_design_power_w; }; +/** + * struct isst_turbo_freq_info - Structure to get SST-TF frequencies + * @socket_id: Socket/package id + * @power_domain: Power Domain id + * @level: SST-PP level for which caller wants to get information + * @max_clip_freqs: Maximum number of low priority core clipping frequencies + * @lp_clip_freq_mhz: Clip frequencies per trl level + * @bucket_core_counts: Maximum number of cores for a bucket + * @trl_freq_mhz: Frequencies per trl level for each bucket + * + * Structure used to get SST-TF information using + * IOCTL ISST_IF_GET_TURBO_FREQ_INFO. + */ +struct isst_turbo_freq_info { + __u8 socket_id; + __u8 power_domain_id; + __u16 level; + __u16 max_clip_freqs; + __u16 max_buckets; + __u16 max_trl_levels; + __u16 lp_clip_freq_mhz[TRL_MAX_LEVELS]; + __u16 bucket_core_counts[TRL_MAX_BUCKETS]; + __u16 trl_freq_mhz[TRL_MAX_LEVELS][TRL_MAX_BUCKETS]; +}; + #define ISST_IF_MAGIC 0xFE #define ISST_IF_GET_PLATFORM_INFO _IOR(ISST_IF_MAGIC, 0, struct isst_if_platform_info *) #define ISST_IF_GET_PHY_ID _IOWR(ISST_IF_MAGIC, 1, struct isst_if_cpu_map *) @@ -445,5 +470,6 @@ struct isst_base_freq_info { #define ISST_IF_GET_PERF_LEVEL_CPU_MASK _IOR(ISST_IF_MAGIC, 13, struct isst_perf_level_cpu_mask *) #define ISST_IF_GET_BASE_FREQ_INFO _IOR(ISST_IF_MAGIC, 14, struct isst_base_freq_info *) #define ISST_IF_GET_BASE_FREQ_CPU_MASK _IOR(ISST_IF_MAGIC, 15, struct isst_perf_level_cpu_mask *) +#define ISST_IF_GET_TURBO_FREQ_INFO _IOR(ISST_IF_MAGIC, 16, struct isst_turbo_freq_info *) #endif -- cgit v1.2.3 From c5edd753a0bd6243a597f5199c227a50457ee179 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 8 Feb 2023 15:01:02 +0100 Subject: KVM: x86: Remove the KVM_GET_NR_MMU_PAGES ioctl The KVM_GET_NR_MMU_PAGES ioctl is quite questionable on 64-bit hosts since it fails to return the full 64 bits of the value that can be set with the corresponding KVM_SET_NR_MMU_PAGES call. Its "long" return value is truncated into an "int" in the kvm_arch_vm_ioctl() function. Since this ioctl also never has been used by userspace applications (QEMU, Google's internal VMM, kvmtool and CrosVM have been checked), it's likely the best if we remove this badly designed ioctl before anybody really tries to use it. Signed-off-by: Thomas Huth Reviewed-by: Sean Christopherson Message-Id: <20230208140105.655814-4-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d77aef872a0a..4003a166328c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1451,7 +1451,7 @@ struct kvm_vfio_spapr_tce { #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) #define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) -#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) +#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) /* deprecated */ #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \ struct kvm_userspace_memory_region) #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) -- cgit v1.2.3 From a3a48de5eade770e911d35291217bdd69ce04ef1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Mar 2023 15:11:51 +0200 Subject: vxlan: mdb: Add MDB control path support Implement MDB control path support, enabling the creation, deletion, replacement and dumping of MDB entries in a similar fashion to the bridge driver. Unlike the bridge driver, each entry stores a list of remote VTEPs to which matched packets need to be replicated to and not a list of bridge ports. The motivating use case is the installation of MDB entries by a user space control plane in response to received EVPN routes. As such, only allow permanent MDB entries to be installed and do not implement snooping functionality, avoiding a lot of unnecessary complexity. Since entries can only be modified by user space under RTNL, use RTNL as the write lock. Use RCU to ensure that MDB entries and remotes are not freed while being accessed from the data path during transmission. In terms of uAPI, reuse the existing MDB netlink interface, but add a few new attributes to request and response messages: * IP address of the destination VXLAN tunnel endpoint where the multicast receivers reside. * UDP destination port number to use to connect to the remote VXLAN tunnel endpoint. * VXLAN VNI Network Identifier to use to connect to the remote VXLAN tunnel endpoint. Required when Ingress Replication (IR) is used and the remote VTEP is not a member of originating broadcast domain (VLAN/VNI) [1]. * Source VNI Network Identifier the MDB entry belongs to. Used only when the VXLAN device is in external mode. * Interface index of the outgoing interface to reach the remote VXLAN tunnel endpoint. This is required when the underlay destination IP is multicast (P2MP), as the multicast routing tables are not consulted. All the new attributes are added under the 'MDBA_SET_ENTRY_ATTRS' nest which is strictly validated by the bridge driver, thereby automatically rejecting the new attributes. [1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast#section-3.2.2 Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index d60c456710b3..c9d624f528c5 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -633,6 +633,11 @@ enum { MDBA_MDB_EATTR_GROUP_MODE, MDBA_MDB_EATTR_SOURCE, MDBA_MDB_EATTR_RTPROT, + MDBA_MDB_EATTR_DST, + MDBA_MDB_EATTR_DST_PORT, + MDBA_MDB_EATTR_VNI, + MDBA_MDB_EATTR_IFINDEX, + MDBA_MDB_EATTR_SRC_VNI, __MDBA_MDB_EATTR_MAX }; #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1) @@ -728,6 +733,11 @@ enum { MDBE_ATTR_SRC_LIST, MDBE_ATTR_GROUP_MODE, MDBE_ATTR_RTPROT, + MDBE_ATTR_DST, + MDBE_ATTR_DST_PORT, + MDBE_ATTR_VNI, + MDBE_ATTR_IFINDEX, + MDBE_ATTR_SRC_VNI, __MDBE_ATTR_MAX, }; #define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1) -- cgit v1.2.3 From fc650d2eba1087589bbb6ede866756df13b9a9dc Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 2 Mar 2023 13:57:29 +0100 Subject: media: videodev.h: drop V4L2_FBUF_CAP_LIST/BITMAP_CLIPPING These two capabilities are no longer supported, so no longer define them when compiling the kernel. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 17a9b975177a..b5b3d1fddea2 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1224,8 +1224,10 @@ struct v4l2_framebuffer { /* Flags for the 'capability' field. Read only */ #define V4L2_FBUF_CAP_EXTERNOVERLAY 0x0001 #define V4L2_FBUF_CAP_CHROMAKEY 0x0002 +#ifndef __KERNEL__ #define V4L2_FBUF_CAP_LIST_CLIPPING 0x0004 #define V4L2_FBUF_CAP_BITMAP_CLIPPING 0x0008 +#endif #define V4L2_FBUF_CAP_LOCAL_ALPHA 0x0010 #define V4L2_FBUF_CAP_GLOBAL_ALPHA 0x0020 #define V4L2_FBUF_CAP_LOCAL_INV_ALPHA 0x0040 -- cgit v1.2.3 From efb339a83368ab25de1a18c0fdff85e01c13a1ea Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Tue, 7 Mar 2023 20:24:39 +0100 Subject: crypto: ccp - Name -1 return value as SEV_RET_NO_FW_CALL The PSP can return a "firmware error" code of -1 in circumstances where the PSP has not actually been called. To make this protocol unambiguous, name the value SEV_RET_NO_FW_CALL. [ bp: Massage a bit. ] Signed-off-by: Peter Gonda Signed-off-by: Dionna Glaze Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20221207010210.2563293-2-dionnaglaze@google.com --- include/uapi/linux/psp-sev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h index 91b4c63d5cbf..1c9da485318f 100644 --- a/include/uapi/linux/psp-sev.h +++ b/include/uapi/linux/psp-sev.h @@ -36,6 +36,13 @@ enum { * SEV Firmware status code */ typedef enum { + /* + * This error code is not in the SEV spec. Its purpose is to convey that + * there was an error that prevented the SEV firmware from being called. + * The SEV API error codes are 16 bits, so the -1 value will not overlap + * with possible values from the specification. + */ + SEV_RET_NO_FW_CALL = -1, SEV_RET_SUCCESS = 0, SEV_RET_INVALID_PLATFORM_STATE, SEV_RET_INVALID_GUEST_STATE, -- cgit v1.2.3 From 0144e3b85d7b42e8a4cda991c0e81f131897457a Mon Sep 17 00:00:00 2001 From: Dionna Glaze Date: Tue, 7 Mar 2023 20:24:49 +0100 Subject: x86/sev: Change snp_guest_issue_request()'s fw_err argument The GHCB specification declares that the firmware error value for a guest request will be stored in the lower 32 bits of EXIT_INFO_2. The upper 32 bits are for the VMM's own error code. The fw_err argument to snp_guest_issue_request() is thus a misnomer, and callers will need access to all 64 bits. The type of unsigned long also causes problems, since sw_exit_info2 is u64 (unsigned long long) vs the argument's unsigned long*. Change this type for issuing the guest request. Pass the ioctl command struct's error field directly instead of in a local variable, since an incomplete guest request may not set the error code, and uninitialized stack memory would be written back to user space. The firmware might not even be called, so bookend the call with the no firmware call error and clear the error. Since the "fw_err" field is really exitinfo2 split into the upper bits' vmm error code and lower bits' firmware error code, convert the 64 bit value to a union. [ bp: - Massage commit message - adjust code - Fix a build issue as Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202303070609.vX6wp2Af-lkp@intel.com - print exitinfo2 in hex Tom: - Correct -EIO exit case. ] Signed-off-by: Dionna Glaze Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230214164638.1189804-5-dionnaglaze@google.com Link: https://lore.kernel.org/r/20230307192449.24732-12-bp@alien8.de --- include/uapi/linux/sev-guest.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h index 256aaeff7e65..2aa39112cf8d 100644 --- a/include/uapi/linux/sev-guest.h +++ b/include/uapi/linux/sev-guest.h @@ -52,8 +52,14 @@ struct snp_guest_request_ioctl { __u64 req_data; __u64 resp_data; - /* firmware error code on failure (see psp-sev.h) */ - __u64 fw_err; + /* bits[63:32]: VMM error code, bits[31:0] firmware error code (see psp-sev.h) */ + union { + __u64 exitinfo2; + struct { + __u32 fw_error; + __u32 vmm_error; + }; + }; }; struct snp_ext_report_req { @@ -77,4 +83,12 @@ struct snp_ext_report_req { /* Get SNP extended report as defined in the GHCB specification version 2. */ #define SNP_GET_EXT_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x2, struct snp_guest_request_ioctl) +/* Guest message request EXIT_INFO_2 constants */ +#define SNP_GUEST_FW_ERR_MASK GENMASK_ULL(31, 0) +#define SNP_GUEST_VMM_ERR_SHIFT 32 +#define SNP_GUEST_VMM_ERR(x) (((u64)x) << SNP_GUEST_VMM_ERR_SHIFT) + +#define SNP_GUEST_VMM_ERR_INVALID_LEN 1 +#define SNP_GUEST_VMM_ERR_BUSY 2 + #endif /* __UAPI_LINUX_SEV_GUEST_H_ */ -- cgit v1.2.3 From 8e40c3b6e1538401d2cf8d43087ebe1db8026af9 Mon Sep 17 00:00:00 2001 From: Manikanta Pubbisetty Date: Wed, 8 Mar 2023 16:15:56 +0530 Subject: wifi: nl80211: Update the documentation of NL80211_SCAN_FLAG_COLOCATED_6GHZ Currently when NL80211_SCAN_FLAG_COLOCATED_6GHZ is set in the scan flags, in addition to the co-located APs, PSC channels in the 6 GHz band would also be scanned if the user space has asked for it. In other words, the scan would happen on PSC channels & co-located 6 GHz channels that were reported in the RNR IE. Update the documentation of NL80211_SCAN_FLAG_COLOCATED_6GHZ flag to reflect the above said behavior. Signed-off-by: Manikanta Pubbisetty Link: https://lore.kernel.org/r/20230308104556.9399-1-quic_mpubbise@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9a0ac0363f1f..14e958a32b84 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -6544,7 +6544,9 @@ enum nl80211_timeout_reason { * channels on which APs are expected to be found. Note that when not set, * the scan logic would scan all 6GHz channels, but since transmission of * probe requests on non PSC channels is limited, it is highly likely that - * these channels would passively be scanned. + * these channels would passively be scanned. Also note that when the flag + * is set, in addition to the colocated APs, PSC channels would also be + * scanned if the user space has asked for it. */ enum nl80211_scan_flags { NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, -- cgit v1.2.3 From 05f0adefd48a2ccbd66b3793d220beb9dcaf6988 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Tue, 7 Mar 2023 23:46:06 +0100 Subject: ata: parport_pc: add 16-bit and 8-bit fast EPP transfer flags PARPORT_EPP_FAST flag currently uses 32-bit I/O port access for data read/write (insl/outsl). Add PARPORT_EPP_FAST_16 and PARPORT_EPP_FAST_8 that use insw/outsw and insb/outsb (and PARPORT_EPP_FAST_32 as alias for PARPORT_EPP_FAST). Signed-off-by: Ondrej Zary Signed-off-by: Damien Le Moal --- include/uapi/linux/parport.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/parport.h b/include/uapi/linux/parport.h index f41388f88dc3..fe93e41fc205 100644 --- a/include/uapi/linux/parport.h +++ b/include/uapi/linux/parport.h @@ -90,6 +90,9 @@ typedef enum { /* Flags for block transfer operations. */ #define PARPORT_EPP_FAST (1<<0) /* Unreliable counts. */ #define PARPORT_W91284PIC (1<<1) /* have a Warp9 w91284pic in the device */ +#define PARPORT_EPP_FAST_32 PARPORT_EPP_FAST /* 32-bit EPP transfers */ +#define PARPORT_EPP_FAST_16 (1<<2) /* 16-bit EPP transfers */ +#define PARPORT_EPP_FAST_8 (1<<3) /* 8-bit EPP transfers */ /* The rest is for the kernel only */ #endif /* _UAPI_PARPORT_H_ */ -- cgit v1.2.3 From 68b04864ca425d1894c96b8141d4fba1181f11cb Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 22 Mar 2023 20:24:00 -0700 Subject: bpf: Create links for BPF struct_ops maps. Make bpf_link support struct_ops. Previously, struct_ops were always used alone without any associated links. Upon updating its value, a struct_ops would be activated automatically. Yet other BPF program types required to make a bpf_link with their instances before they could become active. Now, however, you can create an inactive struct_ops, and create a link to activate it later. With bpf_links, struct_ops has a behavior similar to other BPF program types. You can pin/unpin them from their links and the struct_ops will be deactivated when its link is removed while previously need someone to delete the value for it to be deactivated. bpf_links are responsible for registering their associated struct_ops. You can only use a struct_ops that has the BPF_F_LINK flag set to create a bpf_link, while a structs without this flag behaves in the same manner as before and is registered upon updating its value. The BPF_LINK_TYPE_STRUCT_OPS serves a dual purpose. Not only is it used to craft the links for BPF struct_ops programs, but also to create links for BPF struct_ops them-self. Since the links of BPF struct_ops programs are only used to create trampolines internally, they are never seen in other contexts. Thus, they can be reused for struct_ops themself. To maintain a reference to the map supporting this link, we add bpf_struct_ops_link as an additional type. The pointer of the map is RCU and won't be necessary until later in the patchset. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20230323032405.3735486-4-kuifeng@meta.com Signed-off-by: Martin KaFai Lau --- include/uapi/linux/bpf.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 13129df937cd..42f40ee083bf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1033,6 +1033,7 @@ enum bpf_attach_type { BPF_PERF_EVENT, BPF_TRACE_KPROBE_MULTI, BPF_LSM_CGROUP, + BPF_STRUCT_OPS, __MAX_BPF_ATTACH_TYPE }; @@ -1266,6 +1267,9 @@ enum { /* Create a map that is suitable to be an inner map with dynamic max entries */ BPF_F_INNER_MAP = (1U << 12), + +/* Create a map that will be registered/unregesitered by the backed bpf_link */ + BPF_F_LINK = (1U << 13), }; /* Flags for BPF_PROG_QUERY. */ @@ -1507,7 +1511,10 @@ union bpf_attr { } task_fd_query; struct { /* struct used by BPF_LINK_CREATE command */ - __u32 prog_fd; /* eBPF program to attach */ + union { + __u32 prog_fd; /* eBPF program to attach */ + __u32 map_fd; /* struct_ops to attach */ + }; union { __u32 target_fd; /* object to attach to */ __u32 target_ifindex; /* target ifindex */ @@ -6379,6 +6386,9 @@ struct bpf_link_info { struct { __u32 ifindex; } xdp; + struct { + __u32 map_id; + } struct_ops; }; } __attribute__((aligned(8))); -- cgit v1.2.3 From aef56f2e918bf8fc8de25f0b36e8c2aba44116ec Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 22 Mar 2023 20:24:02 -0700 Subject: bpf: Update the struct_ops of a bpf_link. By improving the BPF_LINK_UPDATE command of bpf(), it should allow you to conveniently switch between different struct_ops on a single bpf_link. This would enable smoother transitions from one struct_ops to another. The struct_ops maps passing along with BPF_LINK_UPDATE should have the BPF_F_LINK flag. Signed-off-by: Kui-Feng Lee Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230323032405.3735486-6-kuifeng@meta.com Signed-off-by: Martin KaFai Lau --- include/uapi/linux/bpf.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 42f40ee083bf..e3d3b5160d26 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1555,12 +1555,23 @@ union bpf_attr { struct { /* struct used by BPF_LINK_UPDATE command */ __u32 link_fd; /* link fd */ - /* new program fd to update link with */ - __u32 new_prog_fd; + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; __u32 flags; /* extra flags */ - /* expected link's program fd; is specified only if - * BPF_F_REPLACE flag is set in flags */ - __u32 old_prog_fd; + union { + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags. + */ + __u32 old_prog_fd; + /* expected link's map fd; is specified only + * if BPF_F_REPLACE flag is set. + */ + __u32 old_map_fd; + }; } link_update; struct { -- cgit v1.2.3 From 1fb1ea0d9cb8359ae9d6f67f22666c74d5b9f47d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 10 Mar 2023 19:07:47 +0200 Subject: mei: Move uuid.h to the MEI namespace There is only a single user of the UUID uAPI, let's make it part of that user. The way it's done is to prevent compilation time breakage for the user space that does #include In the future MEI user space tools can switch over to use mei_uuid.h. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230310170747.22782-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/mei.h | 2 +- include/uapi/linux/mei_uuid.h | 29 +++++++++++++++++++++++++++++ include/uapi/linux/uuid.h | 31 +------------------------------ 3 files changed, 31 insertions(+), 31 deletions(-) create mode 100644 include/uapi/linux/mei_uuid.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mei.h b/include/uapi/linux/mei.h index 4f3638489d01..6e57743628c0 100644 --- a/include/uapi/linux/mei.h +++ b/include/uapi/linux/mei.h @@ -7,7 +7,7 @@ #ifndef _LINUX_MEI_H #define _LINUX_MEI_H -#include +#include /* * This IOCTL is used to associate the current file descriptor with a diff --git a/include/uapi/linux/mei_uuid.h b/include/uapi/linux/mei_uuid.h new file mode 100644 index 000000000000..676ebe12d623 --- /dev/null +++ b/include/uapi/linux/mei_uuid.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * MEI UUID definition + * + * Copyright (C) 2010, Intel Corp. + * Huang Ying + */ + +#ifndef _UAPI_LINUX_MEI_UUID_H_ +#define _UAPI_LINUX_MEI_UUID_H_ + +#include + +typedef struct { + __u8 b[16]; +} uuid_le; + +#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ +((uuid_le) \ +{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ + (b) & 0xff, ((b) >> 8) & 0xff, \ + (c) & 0xff, ((c) >> 8) & 0xff, \ + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) + +#define NULL_UUID_LE \ + UUID_LE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00) + +#endif /* _UAPI_LINUX_MEI_UUID_H_ */ diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h index 96ac684a4b2f..8443738f4bb2 100644 --- a/include/uapi/linux/uuid.h +++ b/include/uapi/linux/uuid.h @@ -1,30 +1 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* DO NOT USE in new code! This is solely for MEI due to legacy reasons */ -/* - * MEI UUID definition - * - * Copyright (C) 2010, Intel Corp. - * Huang Ying - */ - -#ifndef _UAPI_LINUX_UUID_H_ -#define _UAPI_LINUX_UUID_H_ - -#include - -typedef struct { - __u8 b[16]; -} uuid_le; - -#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ -((uuid_le) \ -{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ - (b) & 0xff, ((b) >> 8) & 0xff, \ - (c) & 0xff, ((c) >> 8) & 0xff, \ - (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) - -#define NULL_UUID_LE \ - UUID_LE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ - 0x00, 0x00, 0x00, 0x00) - -#endif /* _UAPI_LINUX_UUID_H_ */ +#include -- cgit v1.2.3 From dbbb27e183b1568d5a907ace1cd144b0709ea52a Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Thu, 23 Mar 2023 04:38:00 -0700 Subject: cfg80211: support RNR for EMA AP As per IEEE Std 802.11ax-2021, 11.1.3.8.3 Discovery of a nontransmitted BSSID profile, an EMA AP that transmits a Beacon frame carrying a partial list of nontransmitted BSSID profiles should include in the frame a Reduced Neighbor Report element carrying information for at least the nontransmitted BSSIDs that are not present in the Multiple BSSID element carried in that frame. Add new nested attribute NL80211_ATTR_EMA_RNR_ELEMS to support the above. Number of RNR elements must be more than or equal to the number of MBSSID elements. This attribute can be used only when EMA is enabled. Userspace is responsible for splitting the RNR into multiple elements such that each element excludes the non-transmitting profiles already included in the MBSSID element (%NL80211_ATTR_MBSSID_ELEMS) at the same index. Each EMA beacon will be generated by adding MBSSID and RNR elements at the same index. If the userspace provides more RNR elements than the number of MBSSID elements then these will be added in every EMA beacon. Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/20230323113801.6903-2-quic_alokad@quicinc.com [Johannes: validate elements] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 14e958a32b84..cf4fb981e131 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2794,6 +2794,17 @@ enum nl80211_commands { * @NL80211_ATTR_HW_TIMESTAMP_ENABLED: Indicates whether HW timestamping should * be enabled or not (flag attribute). * + * @NL80211_ATTR_EMA_RNR_ELEMS: Optional nested attribute for + * reduced neighbor report (RNR) elements. This attribute can be used + * only when NL80211_MBSSID_CONFIG_ATTR_EMA is enabled. + * Userspace is responsible for splitting the RNR into multiple + * elements such that each element excludes the non-transmitting + * profiles already included in the MBSSID element + * (%NL80211_ATTR_MBSSID_ELEMS) at the same index. Each EMA beacon + * will be generated by adding MBSSID and RNR elements at the same + * index. If the userspace includes more RNR elements than number of + * MBSSID elements then these will be added in every EMA beacon. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3328,6 +3339,8 @@ enum nl80211_attrs { NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS, NL80211_ATTR_HW_TIMESTAMP_ENABLED, + NL80211_ATTR_EMA_RNR_ELEMS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 5c8c74ef20e7973c270498dbbf96170c9f92dae3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 17 Mar 2023 10:59:48 -0600 Subject: scsi: target: uapi: Replace fake flex-array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zero-length arrays as fake flexible arrays are deprecated and we are moving towards adopting C99 flexible-array members instead. Address the following warning found with GCC-13 and -fstrict-flex-arrays=3 enabled: CC drivers/target/target_core_user.o drivers/target/target_core_user.c: In function ‘queue_cmd_ring’: drivers/target/target_core_user.c:1096:15: warning: array subscript 0 is outside array bounds of ‘struct iovec[0]’ [-Warray-bounds=] 1096 | iov = &entry->req.iov[0]; | ^~~~~~~~~~~~~~~~~~ In file included from drivers/target/target_core_user.c:31: ./include/uapi/linux/target_core_user.h:122:38: note: while referencing ‘iov’ 122 | struct iovec iov[0]; | ^~~ This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy() and help us make progress towards globally enabling -fstrict-flex-arrays=3 [1]. Link: https://github.com/KSPP/linux/issues/21 Link: https://github.com/KSPP/linux/issues/270 Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [1] Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/ZBSchMvTdl7VObKI@work Reviewed-by: Bodo Stroesser Reviewed-by: Kees Cook Signed-off-by: Martin K. Petersen --- include/uapi/linux/target_core_user.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index fbd8ca67e107..f925a77f19ed 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -119,7 +119,7 @@ struct tcmu_cmd_entry { __u64 cdb_off; __u64 __pad1; __u64 __pad2; - struct iovec iov[0]; + __DECLARE_FLEX_ARRAY(struct iovec, iov); } req; struct { __u8 scsi_status; -- cgit v1.2.3 From 233eb4e786b57ea686b51c13a04cc2839fd682fc Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Thu, 23 Mar 2023 18:36:05 +0200 Subject: ethtool: Add support for configuring tx_push_buf_len This attribute, which is part of ethtool's ring param configuration allows the user to specify the maximum number of the packet's payload that can be written directly to the device. Example usage: # ethtool -G [interface] tx-push-buf-len [number of bytes] Co-developed-by: Jakub Kicinski Signed-off-by: Shay Agroskin Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index d39ce21381c5..1ebf8d455f07 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -357,6 +357,8 @@ enum { ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ ETHTOOL_A_RINGS_TX_PUSH, /* u8 */ ETHTOOL_A_RINGS_RX_PUSH, /* u8 */ + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, /* u32 */ + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, /* u32 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, -- cgit v1.2.3 From 954d1fa1ac93aa8a66f7d9a9ba545cf7f020d348 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 28 Mar 2023 10:57:59 +0800 Subject: macvlan: Add netlink attribute for broadcast cutoff Make the broadcast cutoff configurable through netlink. Note that macvlan is weird because there is no central device for us to configure (the lowerdev could be anything). So all the options are duplicated over what could be thousands of child devices. IFLA_MACVLAN_BC_QUEUE_LEN took the approach of taking the maximum of all child device settings. This is unnecessary as we could simply store the option in the port device and take the last child device that gets updated as the value to use. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 57ceb788250f..8d679688efe0 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -635,6 +635,7 @@ enum { IFLA_MACVLAN_MACADDR_COUNT, IFLA_MACVLAN_BC_QUEUE_LEN, IFLA_MACVLAN_BC_QUEUE_LEN_USED, + IFLA_MACVLAN_BC_CUTOFF, __IFLA_MACVLAN_MAX, }; -- cgit v1.2.3 From 4ca589661d964840d0d5de4b3baabbef78f453e3 Mon Sep 17 00:00:00 2001 From: Daniel Starke Date: Wed, 15 Mar 2023 11:53:52 +0100 Subject: tty: n_gsm: add ioctl for DLC specific parameter configuration Parameter negotiation has been introduced with commit 92f1f0c3290d ("tty: n_gsm: add parameter negotiation support") However, means to set individual parameters per DLCI are not yet implemented. Furthermore, it is currently not possible to keep a DLCI half open until the user application sets the right parameters for it. This is required to allow a user application to set its specific parameters before the underlying link is established. Otherwise, the link is opened and re-established right afterwards if the user application sets incompatible parameters. This may be an unexpected behavior for the peer. Add parameter 'wait_config' to 'gsm_config' to support setups where the DLCI specific user application sets its specific parameters after open() and before the link gets fully established. Setting this to zero disables the user application specific DLCI configuration option. Add the ioctls 'GSMIOC_GETCONF_DLCI' and 'GSMIOC_SETCONF_DLCI' for the ldisc and virtual ttys. This gets/sets the DLCI specific parameters and may trigger a reconnect of the DLCI if incompatible values have been set. Only the parameters for the DLCI associated with the virtual tty can be set or retrieved if called on these. Add remark within the documentation to introduce the new ioctls. Link: https://lore.kernel.org/oe-kbuild-all/202302281856.S9Lz4gHB-lkp@intel.com/ Reported-by: kernel test robot Signed-off-by: Daniel Starke Link: https://lore.kernel.org/r/20230315105354.6234-1-daniel.starke@siemens.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/gsmmux.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/gsmmux.h b/include/uapi/linux/gsmmux.h index a703780aa095..eb67884e5f38 100644 --- a/include/uapi/linux/gsmmux.h +++ b/include/uapi/linux/gsmmux.h @@ -43,10 +43,25 @@ struct gsm_config_ext { __u32 keep_alive; /* Control channel keep-alive in 1/100th of a * second (0 to disable) */ - __u32 reserved[7]; /* For future use, must be initialized to zero */ + __u32 wait_config; /* Wait for DLCI config before opening virtual link? */ + __u32 reserved[6]; /* For future use, must be initialized to zero */ }; #define GSMIOC_GETCONF_EXT _IOR('G', 5, struct gsm_config_ext) #define GSMIOC_SETCONF_EXT _IOW('G', 6, struct gsm_config_ext) +/* Set channel accordingly before calling GSMIOC_GETCONF_DLCI. */ +struct gsm_dlci_config { + __u32 channel; /* DLCI (0 for the associated DLCI) */ + __u32 adaption; /* Convergence layer type */ + __u32 mtu; /* Maximum transfer unit */ + __u32 priority; /* Priority (0 for default value) */ + __u32 i; /* Frame type (1 = UIH, 2 = UI) */ + __u32 k; /* Window size (0 for default value) */ + __u32 reserved[8]; /* For future use, must be initialized to zero */ +}; + +#define GSMIOC_GETCONF_DLCI _IOWR('G', 7, struct gsm_dlci_config) +#define GSMIOC_SETCONF_DLCI _IOW('G', 8, struct gsm_dlci_config) + #endif -- cgit v1.2.3 From e5a26a4048eeb9558e5c84f340a989c78db4adf4 Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Tue, 28 Mar 2023 16:52:08 -0700 Subject: tracing/user_events: Split header into uapi and kernel The UAPI parts need to be split out from the kernel parts of user_events now that other parts of the kernel will reference it. Do so by moving the existing include/linux/user_events.h into include/uapi/linux/user_events.h. Link: https://lkml.kernel.org/r/20230328235219.203-2-beaub@linux.microsoft.com Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- include/uapi/linux/user_events.h | 48 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 include/uapi/linux/user_events.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h new file mode 100644 index 000000000000..03f92366068d --- /dev/null +++ b/include/uapi/linux/user_events.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2021-2022, Microsoft Corporation. + * + * Authors: + * Beau Belgrave + */ +#ifndef _UAPI_LINUX_USER_EVENTS_H +#define _UAPI_LINUX_USER_EVENTS_H + +#include +#include + +#define USER_EVENTS_SYSTEM "user_events" +#define USER_EVENTS_PREFIX "u:" + +/* Create dynamic location entry within a 32-bit value */ +#define DYN_LOC(offset, size) ((size) << 16 | (offset)) + +/* + * Describes an event registration and stores the results of the registration. + * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum + * must set the size and name_args before invocation. + */ +struct user_reg { + + /* Input: Size of the user_reg structure being used */ + __u32 size; + + /* Input: Pointer to string with event name, description and flags */ + __u64 name_args; + + /* Output: Bitwise index of the event within the status page */ + __u32 status_bit; + + /* Output: Index of the event to use when writing data */ + __u32 write_index; +} __attribute__((__packed__)); + +#define DIAG_IOC_MAGIC '*' + +/* Request to register a user_event */ +#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg *) + +/* Request to delete a user_event */ +#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char *) + +#endif /* _UAPI_LINUX_USER_EVENTS_H */ -- cgit v1.2.3 From 7235759084a4f8524a46bd2638885ff3b34ce279 Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Tue, 28 Mar 2023 16:52:10 -0700 Subject: tracing/user_events: Use remote writes for event enablement As part of the discussions for user_events aligned with user space tracers, it was determined that user programs should register a aligned value to set or clear a bit when an event becomes enabled. Currently a shared page is being used that requires mmap(). Remove the shared page implementation and move to a user registered address implementation. In this new model during the event registration from user programs 3 new values are specified. The first is the address to update when the event is either enabled or disabled. The second is the bit to set/clear to reflect the event being enabled. The third is the size of the value at the specified address. This allows for a local 32/64-bit value in user programs to support both kernel and user tracers. As an example, setting bit 31 for kernel tracers when the event becomes enabled allows for user tracers to use the other bits for ref counts or other flags. The kernel side updates the bit atomically, user programs need to also update these values atomically. User provided addresses must be aligned on a natural boundary, this allows for single page checking and prevents odd behaviors such as a enable value straddling 2 pages instead of a single page. Currently page faults are only logged, future patches will handle these. Link: https://lkml.kernel.org/r/20230328235219.203-4-beaub@linux.microsoft.com Suggested-by: Mathieu Desnoyers Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- include/uapi/linux/user_events.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h index 03f92366068d..22521bc622db 100644 --- a/include/uapi/linux/user_events.h +++ b/include/uapi/linux/user_events.h @@ -27,12 +27,21 @@ struct user_reg { /* Input: Size of the user_reg structure being used */ __u32 size; + /* Input: Bit in enable address to use */ + __u8 enable_bit; + + /* Input: Enable size in bytes at address */ + __u8 enable_size; + + /* Input: Flags for future use, set to 0 */ + __u16 flags; + + /* Input: Address to update when enabled */ + __u64 enable_addr; + /* Input: Pointer to string with event name, description and flags */ __u64 name_args; - /* Output: Bitwise index of the event within the status page */ - __u32 status_bit; - /* Output: Index of the event to use when writing data */ __u32 write_index; } __attribute__((__packed__)); -- cgit v1.2.3 From dcb8177c13953872c9e5ce4a99b63a87a3c2f683 Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Tue, 28 Mar 2023 16:52:12 -0700 Subject: tracing/user_events: Add ioctl for disabling addresses Enablements are now tracked by the lifetime of the task/mm. User processes need to be able to disable their addresses if tracing is requested to be turned off. Before unmapping the page would suffice. However, we now need a stronger contract. Add an ioctl to enable this. A new flag bit is added, freeing, to user_event_enabler to ensure that if the event is attempted to be removed while a fault is being handled that the remove is delayed until after the fault is reattempted. Link: https://lkml.kernel.org/r/20230328235219.203-6-beaub@linux.microsoft.com Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- include/uapi/linux/user_events.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h index 22521bc622db..3e7275e3234a 100644 --- a/include/uapi/linux/user_events.h +++ b/include/uapi/linux/user_events.h @@ -46,6 +46,27 @@ struct user_reg { __u32 write_index; } __attribute__((__packed__)); +/* + * Describes an event unregister, callers must set the size, address and bit. + * This structure is passed to the DIAG_IOCSUNREG ioctl to disable bit updates. + */ +struct user_unreg { + /* Input: Size of the user_unreg structure being used */ + __u32 size; + + /* Input: Bit to unregister */ + __u8 disable_bit; + + /* Input: Reserved, set to 0 */ + __u8 __reserved; + + /* Input: Reserved, set to 0 */ + __u16 __reserved2; + + /* Input: Address to unregister */ + __u64 disable_addr; +} __attribute__((__packed__)); + #define DIAG_IOC_MAGIC '*' /* Request to register a user_event */ @@ -54,4 +75,7 @@ struct user_reg { /* Request to delete a user_event */ #define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char *) +/* Requests to unregister a user_event */ +#define DIAG_IOCSUNREG _IOW(DIAG_IOC_MAGIC, 2, struct user_unreg*) + #endif /* _UAPI_LINUX_USER_EVENTS_H */ -- cgit v1.2.3 From a4c40c1349e32f9510707ed09e0961626980d8cb Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Tue, 28 Mar 2023 16:52:19 -0700 Subject: tracing/user_events: Align structs with tabs for readability Add tabs to make struct members easier to read and unify the style of the code. Link: https://lkml.kernel.org/r/20230328235219.203-13-beaub@linux.microsoft.com Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- include/uapi/linux/user_events.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h index 3e7275e3234a..2984aae4a2b4 100644 --- a/include/uapi/linux/user_events.h +++ b/include/uapi/linux/user_events.h @@ -25,25 +25,25 @@ struct user_reg { /* Input: Size of the user_reg structure being used */ - __u32 size; + __u32 size; /* Input: Bit in enable address to use */ - __u8 enable_bit; + __u8 enable_bit; /* Input: Enable size in bytes at address */ - __u8 enable_size; + __u8 enable_size; /* Input: Flags for future use, set to 0 */ - __u16 flags; + __u16 flags; /* Input: Address to update when enabled */ - __u64 enable_addr; + __u64 enable_addr; /* Input: Pointer to string with event name, description and flags */ - __u64 name_args; + __u64 name_args; /* Output: Index of the event to use when writing data */ - __u32 write_index; + __u32 write_index; } __attribute__((__packed__)); /* @@ -52,19 +52,19 @@ struct user_reg { */ struct user_unreg { /* Input: Size of the user_unreg structure being used */ - __u32 size; + __u32 size; /* Input: Bit to unregister */ - __u8 disable_bit; + __u8 disable_bit; /* Input: Reserved, set to 0 */ - __u8 __reserved; + __u8 __reserved; /* Input: Reserved, set to 0 */ - __u16 __reserved2; + __u16 __reserved2; /* Input: Address to unregister */ - __u64 disable_addr; + __u64 disable_addr; } __attribute__((__packed__)); #define DIAG_IOC_MAGIC '*' -- cgit v1.2.3 From 9a8aac92eba90b3b7c71d0531db535f5588388f5 Mon Sep 17 00:00:00 2001 From: Kieran Frewen Date: Fri, 24 Feb 2023 10:29:17 +1300 Subject: wifi: nl80211: support advertising S1G capabilities Include S1G capabilities in netlink band info messages. Signed-off-by: Kieran Frewen Co-developed-by: Gilad Itzkovitch Signed-off-by: Gilad Itzkovitch Link: https://lore.kernel.org/r/20230223212917.4010246-1-gilad.itzkovitch@virscient.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index cf4fb981e131..c59fec406da5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4061,6 +4061,10 @@ enum nl80211_band_iftype_attr { * @NL80211_BAND_ATTR_EDMG_BW_CONFIG: Channel BW Configuration subfield encodes * the allowed channel bandwidth configurations. * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13. + * @NL80211_BAND_ATTR_S1G_MCS_NSS_SET: S1G capabilities, supported S1G-MCS and NSS + * set subfield, as in the S1G information IE, 5 bytes + * @NL80211_BAND_ATTR_S1G_CAPA: S1G capabilities information subfield as in the + * S1G information IE, 10 bytes * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined * @__NL80211_BAND_ATTR_AFTER_LAST: internal use */ @@ -4081,6 +4085,9 @@ enum nl80211_band_attr { NL80211_BAND_ATTR_EDMG_CHANNELS, NL80211_BAND_ATTR_EDMG_BW_CONFIG, + NL80211_BAND_ATTR_S1G_MCS_NSS_SET, + NL80211_BAND_ATTR_S1G_CAPA, + /* keep last */ __NL80211_BAND_ATTR_AFTER_LAST, NL80211_BAND_ATTR_MAX = __NL80211_BAND_ATTR_AFTER_LAST - 1 -- cgit v1.2.3 From 30ec7997d175cd689fc61bfc4059f4d35b11858c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:47 +0100 Subject: KVM: arm64: timers: Allow userspace to set the global counter offset And this is the moment you have all been waiting for: setting the counter offset from userspace. We expose a brand new capability that reports the ability to set the offset for both the virtual and physical sides. In keeping with the architecture, the offset is expressed as a delta that is substracted from the physical counter value. Once this new API is used, there is no going back, and the counters cannot be written to to set the offsets implicitly (the writes are instead ignored). Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-8-maz@kernel.org --- include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d77aef872a0a..6a7e1a0ecf04 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1184,6 +1184,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224 #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225 #define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226 +#define KVM_CAP_COUNTER_OFFSET 227 #ifdef KVM_CAP_IRQ_ROUTING @@ -1543,6 +1544,8 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) #define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) +/* Available with KVM_CAP_COUNTER_OFFSET */ +#define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) -- cgit v1.2.3 From 06961c487a33a222fd3d84998dc6398ed0449373 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 1 Mar 2023 12:48:43 -0500 Subject: dm: split discards further if target sets max_discard_granularity The block core (bio_split_discard) will already split discards based on the 'discard_granularity' and 'max_discard_sectors' queue_limits. But the DM thin target also needs to ensure that it doesn't receive a discard that spans a 'max_discard_sectors' boundary. Introduce a dm_target 'max_discard_granularity' flag that if set will cause DM core to split discard bios relative to 'max_discard_sectors'. This treats 'discard_granularity' as a "min_discard_granularity" and 'max_discard_sectors' as a "max_discard_granularity". Requested-by: Joe Thornber Signed-off-by: Mike Snitzer --- include/uapi/linux/dm-ioctl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 7edf335778ba..1990b5700f69 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -286,9 +286,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 47 +#define DM_VERSION_MINOR 48 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2022-07-28)" +#define DM_VERSION_EXTRA "-ioctl (2023-03-01)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From 00168b415a60cec7558608efb4fc50f2a73daae2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 23 Mar 2023 14:41:16 -0600 Subject: uapi: net: ipv6: Replace fake flex-array with flex-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zero-length arrays as fake flexible arrays are deprecated and we are moving towards adopting C99 flexible-array members instead. Address the following warning found with GCC-13 and -fstrict-flex-arrays=3 enabled: net/ipv6/exthdrs.c: In function ‘fl6_update_dst’: net/ipv6/exthdrs.c:1393:28: warning: array subscript 0 is outside array bounds of ‘struct in6_addr[0]’ [-Warray-bounds=] 1393 | fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; | ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from ./include/linux/ipv6.h:5, from ./include/linux/icmpv6.h:6, from net/ipv6/exthdrs.c:27: ./include/uapi/linux/ipv6.h:84:33: note: while referencing ‘addr’ 84 | struct in6_addr addr[0]; | ^~~~ net/ipv6/exthdrs.c: In function ‘ipv6_push_rthdr0.isra’: net/ipv6/exthdrs.c:1125:19: warning: array subscript is outside array bounds of ‘struct in6_addr[0]’ [-Warray-bounds=] 1125 | phdr->addr[hops - 1] = **addr_p; | ~~~~~~~~~~^~~~~~~~~~ ./include/uapi/linux/ipv6.h:84:33: note: while referencing ‘addr’ 84 | struct in6_addr addr[0]; | ^~~~ This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy() and help us make progress towards globally enabling -fstrict-flex-arrays=3 [1]. Link: https://github.com/KSPP/linux/issues/21 Link: https://github.com/KSPP/linux/issues/276 Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [1] Signed-off-by: Gustavo A. R. Silva --- include/uapi/linux/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 53326dfc59ec..ac56605fe9bc 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -81,7 +81,7 @@ struct ipv6_opt_hdr { struct rt0_hdr { struct ipv6_rt_hdr rt_hdr; __u32 reserved; - struct in6_addr addr[0]; + struct in6_addr addr[]; #define rt0_type rt_hdr.type }; -- cgit v1.2.3 From 28c1b6df436819a7ed8a781835766e45139771a3 Mon Sep 17 00:00:00 2001 From: Eric Sage Date: Mon, 27 Mar 2023 13:44:49 -0400 Subject: netfilter: nfnetlink_queue: enable classid socket info retrieval This enables associating a socket with a v1 net_cls cgroup. Useful for applying a per-cgroup policy when processing packets in userspace. Signed-off-by: Eric Sage Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nfnetlink_queue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h index ef7c97f21a15..efcb7c044a74 100644 --- a/include/uapi/linux/netfilter/nfnetlink_queue.h +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h @@ -62,6 +62,7 @@ enum nfqnl_attr_type { NFQA_VLAN, /* nested attribute: packet vlan info */ NFQA_L2HDR, /* full L2 header */ NFQA_PRIORITY, /* skb->priority */ + NFQA_CGROUP_CLASSID, /* __u32 cgroup classid */ __NFQA_MAX }; -- cgit v1.2.3 From a25b8b7136ad43760bd876af62b6e59abd30496c Mon Sep 17 00:00:00 2001 From: Matthieu De Beule Date: Wed, 29 Mar 2023 12:52:18 +0000 Subject: netfilter: Correct documentation errors in nf_tables.h NFTA_RANGE_OP incorrectly says nft_cmp_ops instead of nft_range_ops. NFTA_LOG_GROUP and NFTA_LOG_QTHRESHOLD claim NLA_U32 instead of NLA_U16 NFTA_EXTHDR_SREG isn't documented as a register Signed-off-by: Matthieu De Beule Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 9c6f02c26054..c4d4d8e42dc8 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -685,7 +685,7 @@ enum nft_range_ops { * enum nft_range_attributes - nf_tables range expression netlink attributes * * @NFTA_RANGE_SREG: source register of data to compare (NLA_U32: nft_registers) - * @NFTA_RANGE_OP: cmp operation (NLA_U32: nft_cmp_ops) + * @NFTA_RANGE_OP: cmp operation (NLA_U32: nft_range_ops) * @NFTA_RANGE_FROM_DATA: data range from (NLA_NESTED: nft_data_attributes) * @NFTA_RANGE_TO_DATA: data range to (NLA_NESTED: nft_data_attributes) */ @@ -878,7 +878,7 @@ enum nft_exthdr_op { * @NFTA_EXTHDR_LEN: extension header length (NLA_U32) * @NFTA_EXTHDR_FLAGS: extension header flags (NLA_U32) * @NFTA_EXTHDR_OP: option match type (NLA_U32) - * @NFTA_EXTHDR_SREG: option match type (NLA_U32) + * @NFTA_EXTHDR_SREG: source register (NLA_U32: nft_registers) */ enum nft_exthdr_attributes { NFTA_EXTHDR_UNSPEC, @@ -1262,10 +1262,10 @@ enum nft_last_attributes { /** * enum nft_log_attributes - nf_tables log expression netlink attributes * - * @NFTA_LOG_GROUP: netlink group to send messages to (NLA_U32) + * @NFTA_LOG_GROUP: netlink group to send messages to (NLA_U16) * @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING) * @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32) - * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32) + * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U16) * @NFTA_LOG_LEVEL: log level (NLA_U32) * @NFTA_LOG_FLAGS: logging flags (NLA_U32) */ -- cgit v1.2.3 From 2384127e98db52a6ac2577924ad9cae25f3e7472 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 29 Mar 2023 11:54:52 +0200 Subject: net/sched: act_tunnel_key: add support for "don't fragment" extend "act_tunnel_key" to allow specifying TUNNEL_DONT_FRAGMENT. Suggested-by: Ilya Maximets Reviewed-by: Pedro Tammela Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Signed-off-by: Jakub Kicinski --- include/uapi/linux/tc_act/tc_tunnel_key.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h index 49ad4033951b..37c6f612f161 100644 --- a/include/uapi/linux/tc_act/tc_tunnel_key.h +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -34,6 +34,7 @@ enum { */ TCA_TUNNEL_KEY_ENC_TOS, /* u8 */ TCA_TUNNEL_KEY_ENC_TTL, /* u8 */ + TCA_TUNNEL_KEY_NO_FRAG, /* flag */ __TCA_TUNNEL_KEY_MAX, }; -- cgit v1.2.3 From 9e410fe3dc9a938bc47f71dff254be7419bd40d2 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 3 Mar 2023 13:34:11 -0800 Subject: dmaengine: idxd: Add descriptor definitions for 16 bytes of pattern in memory fill operation The memory fill operation (0x04) can fill in memory with either 8 bytes or 16 bytes of pattern. To fill in memory with 16 bytes of pattern, the first 8 bytes are provided in pattern lower in bytes 16-23 and the next 8 bytes are in pattern upper in bytes 40-47 in the descriptor. Currently only 8 bytes of pattern is enabled. Add descriptor definitions for pattern lower and pattern upper so that user can use 16 bytes of pattern to fill memory. Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230303213413.3357431-2-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 1d553bedbdb5..c43d7df5fc15 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -180,6 +180,7 @@ struct dsa_hw_desc { uint64_t rdback_addr; uint64_t pattern; uint64_t desc_list_addr; + uint64_t pattern_lower; }; union { uint64_t dst_addr; @@ -244,6 +245,9 @@ struct dsa_hw_desc { uint16_t dest_app_tag_seed; }; + /* Fill */ + uint64_t pattern_upper; + uint8_t op_specific[24]; }; } __attribute__((packed)); -- cgit v1.2.3 From 12bbc2c2605516e781cd86e3cde9fe1f889b72cc Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 3 Mar 2023 13:34:12 -0800 Subject: dmaengine: idxd: Add descriptor definitions for DIX generate operation The Data Integrity Extension (DIX) generate operation (0x17) computes the Data Integrity Field (DIF) on the source data and writes only the computed DIF for each source block to the PI destination address. Add descriptor definitions for this operation so that user can use DSA to accelerate DIX generate operation. Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230303213413.3357431-3-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index c43d7df5fc15..4c12e93a6aa6 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -78,6 +78,7 @@ enum dsa_opcode { DSA_OPCODE_DIF_INS, DSA_OPCODE_DIF_STRP, DSA_OPCODE_DIF_UPDT, + DSA_OPCODE_DIX_GEN = 0x17, DSA_OPCODE_CFLUSH = 0x20, }; @@ -248,6 +249,17 @@ struct dsa_hw_desc { /* Fill */ uint64_t pattern_upper; + /* DIX generate */ + struct { + uint8_t dix_gen_res; + uint8_t dest_dif_flags; + uint8_t dif_flags; + uint8_t dix_gen_res2[13]; + uint32_t ref_tag_seed; + uint16_t app_tag_mask; + uint16_t app_tag_seed; + }; + uint8_t op_specific[24]; }; } __attribute__((packed)); @@ -326,6 +338,14 @@ struct dsa_completion_record { uint16_t dif_upd_dest_app_tag; }; + /* DIX generate */ + struct { + uint64_t dix_gen_res; + uint32_t dix_ref_tag; + uint16_t dix_app_tag_mask; + uint16_t dix_app_tag; + }; + uint8_t op_specific[16]; }; } __attribute__((packed)); -- cgit v1.2.3 From 6fec8938b7b4fe2b2c503fe87b2783a50bff0415 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 3 Mar 2023 13:34:13 -0800 Subject: dmaengine: idxd: Add descriptor definitions for translation fetch operation The translation fetch operation (0x0A) fetches address translations for the address range specified in the descriptor by issuing address translation (ATS) requests to the IOMMU. Add descriptor definitions for the operation so that user can use DSA to accelerate translation fetch. Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230303213413.3357431-4-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 4c12e93a6aa6..fc47635b57dc 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -72,6 +72,7 @@ enum dsa_opcode { DSA_OPCODE_CR_DELTA, DSA_OPCODE_AP_DELTA, DSA_OPCODE_DUALCAST, + DSA_OPCODE_TRANSL_FETCH, DSA_OPCODE_CRCGEN = 0x10, DSA_OPCODE_COPY_CRC, DSA_OPCODE_DIF_CHECK, @@ -182,6 +183,7 @@ struct dsa_hw_desc { uint64_t pattern; uint64_t desc_list_addr; uint64_t pattern_lower; + uint64_t transl_fetch_addr; }; union { uint64_t dst_addr; @@ -192,6 +194,7 @@ struct dsa_hw_desc { union { uint32_t xfer_size; uint32_t desc_count; + uint32_t region_size; }; uint16_t int_handle; uint16_t rsvd1; @@ -249,6 +252,12 @@ struct dsa_hw_desc { /* Fill */ uint64_t pattern_upper; + /* Translation fetch */ + struct { + uint64_t transl_fetch_res; + uint32_t region_stride; + }; + /* DIX generate */ struct { uint8_t dix_gen_res; -- cgit v1.2.3 From 81cf17cd3ab3e5441e876a8e9e9c38ae9920cecb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Mar 2023 11:01:45 -0600 Subject: io_uring/kbuf: rename struct io_uring_buf_reg 'pad' to'flags' In preparation for allowing flags to be set for registration, rename the padding and use it for that. Acked-by: Helge Deller Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 709de6d4feb2..c3f3ea997f3a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -640,7 +640,7 @@ struct io_uring_buf_reg { __u64 ring_addr; __u32 ring_entries; __u16 bgid; - __u16 pad; + __u16 flags; __u64 resv[3]; }; -- cgit v1.2.3 From c56e022c0a27142b7b59ae6bdf45f86bf4b298a1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Mar 2023 11:07:19 -0600 Subject: io_uring: add support for user mapped provided buffer ring The ring mapped provided buffer rings rely on the application allocating the memory for the ring, and then the kernel will map it. This generally works fine, but runs into issues on some architectures where we need to be able to ensure that the kernel and application virtual address for the ring play nicely together. This at least impacts architectures that set SHM_COLOUR, but potentially also anyone setting SHMLBA. To use this variant of ring provided buffers, the application need not allocate any memory for the ring. Instead the kernel will do so, and the allocation must subsequently call mmap(2) on the ring with the offset set to: IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT) to get a virtual address for the buffer ring. Normally the application would allocate a suitable piece of memory (and correctly aligned) and simply pass that in via io_uring_buf_reg.ring_addr and the kernel would map it. Outside of the setup differences, the kernel allocate + user mapped provided buffer ring works exactly the same. Acked-by: Helge Deller Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index c3f3ea997f3a..1d59c816a5b8 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -389,6 +389,9 @@ enum { #define IORING_OFF_SQ_RING 0ULL #define IORING_OFF_CQ_RING 0x8000000ULL #define IORING_OFF_SQES 0x10000000ULL +#define IORING_OFF_PBUF_RING 0x80000000ULL +#define IORING_OFF_PBUF_SHIFT 16 +#define IORING_OFF_MMAP_MASK 0xf8000000ULL /* * Filled with the offset for mmap(2) @@ -635,6 +638,20 @@ struct io_uring_buf_ring { }; }; +/* + * Flags for IORING_REGISTER_PBUF_RING. + * + * IOU_PBUF_RING_MMAP: If set, kernel will allocate the memory for the ring. + * The application must not set a ring_addr in struct + * io_uring_buf_reg, instead it must subsequently call + * mmap(2) with the offset set as: + * IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT) + * to get a virtual mapping for the ring. + */ +enum { + IOU_PBUF_RING_MMAP = 1, +}; + /* argument for IORING_(UN)REGISTER_PBUF_RING */ struct io_uring_buf_reg { __u64 ring_addr; -- cgit v1.2.3 From d322818ef4c752d79cd667474418691237aa9ccf Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 27 Mar 2023 16:34:48 +0100 Subject: io_uring: kill unused notif declarations There are two leftover structures from the notification registration mechanism that has never been released, kill them. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/f05f65aebaf8b1b5bf28519a8fdb350e3e7c9ad0.1679924536.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1d59c816a5b8..f8d14d1c58d3 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -571,19 +571,6 @@ struct io_uring_rsrc_update2 { __u32 resv2; }; -struct io_uring_notification_slot { - __u64 tag; - __u64 resv[3]; -}; - -struct io_uring_notification_register { - __u32 nr_slots; - __u32 resv; - __u64 resv2; - __u64 data; - __u64 resv3; -}; - /* Skip updating fd indexes set to this value in the fd table */ #define IORING_REGISTER_FILES_SKIP (-2) -- cgit v1.2.3 From f1ba4e674febf5c0e9f725a75ca43b7722b4e963 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Thu, 30 Mar 2023 17:49:52 -0400 Subject: virtio-blk: fix to match virtio spec The merged patch series to support zoned block devices in virtio-blk is not the most up to date version. The merged patch can be found at https://lore.kernel.org/linux-block/20221016034127.330942-3-dmitry.fomichev@wdc.com/ but the latest and reviewed version is https://lore.kernel.org/linux-block/20221110053952.3378990-3-dmitry.fomichev@wdc.com/ The reason is apparently that the correct mailing lists and maintainers were not copied. The differences between the two are mostly cleanups, but there is one change that is very important in terms of compatibility with the approved virtio-zbd specification. Before it was approved, the OASIS virtio spec had a change in VIRTIO_BLK_T_ZONE_APPEND request layout that is not reflected in the current virtio-blk driver code. In the running code, the status is the first byte of the in-header that is followed by some pad bytes and the u64 that carries the sector at which the data has been written to the zone back to the driver, aka the append sector. This layout turned out to be problematic for implementing in QEMU and the request status byte has been eventually made the last byte of the in-header. The current code doesn't expect that and this causes the append sector value always come as zero to the block layer. This needs to be fixed ASAP. Fixes: 95bfec41bd3d ("virtio-blk: add support for zoned block devices") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Stefan Hajnoczi Reviewed-by: Damien Le Moal Message-Id: <20230330214953.1088216-2-dmitry.fomichev@wdc.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_blk.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index 5af2a0300bb9..3744e4da1b2a 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -140,11 +140,11 @@ struct virtio_blk_config { /* Zoned block device characteristics (if VIRTIO_BLK_F_ZONED) */ struct virtio_blk_zoned_characteristics { - __le32 zone_sectors; - __le32 max_open_zones; - __le32 max_active_zones; - __le32 max_append_sectors; - __le32 write_granularity; + __virtio32 zone_sectors; + __virtio32 max_open_zones; + __virtio32 max_active_zones; + __virtio32 max_append_sectors; + __virtio32 write_granularity; __u8 model; __u8 unused2[3]; } zoned; @@ -241,11 +241,11 @@ struct virtio_blk_outhdr { */ struct virtio_blk_zone_descriptor { /* Zone capacity */ - __le64 z_cap; + __virtio64 z_cap; /* The starting sector of the zone */ - __le64 z_start; + __virtio64 z_start; /* Zone write pointer position in sectors */ - __le64 z_wp; + __virtio64 z_wp; /* Zone type */ __u8 z_type; /* Zone state */ @@ -254,7 +254,7 @@ struct virtio_blk_zone_descriptor { }; struct virtio_blk_zone_report { - __le64 nr_zones; + __virtio64 nr_zones; __u8 reserved[56]; struct virtio_blk_zone_descriptor zones[]; }; -- cgit v1.2.3 From e65733b5c59a1ea20324a03494364958bef3fc68 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:38 +0000 Subject: KVM: x86: Redefine 'longmode' as a flag for KVM_EXIT_HYPERCALL The 'longmode' field is a bit annoying as it blows an entire __u32 to represent a boolean value. Since other architectures are looking to add support for KVM_EXIT_HYPERCALL, now is probably a good time to clean it up. Redefine the field (and the remaining padding) as a set of flags. Preserve the existing ABI by using bit 0 to indicate if the guest was in long mode and requiring that the remaining 31 bits must be zero. Cc: Paolo Bonzini Acked-by: Sean Christopherson Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-2-oliver.upton@linux.dev --- include/uapi/linux/kvm.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d77aef872a0a..dd42d7dfb86c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -341,8 +341,13 @@ struct kvm_run { __u64 nr; __u64 args[6]; __u64 ret; - __u32 longmode; - __u32 pad; + + union { +#ifndef __KERNEL__ + __u32 longmode; +#endif + __u64 flags; + }; } hypercall; /* KVM_EXIT_TPR_ACCESS */ struct { -- cgit v1.2.3 From 4c4dd04e75e8177311d17387326253674cb0558b Mon Sep 17 00:00:00 2001 From: Ondrej Kozina Date: Wed, 5 Apr 2023 13:12:23 +0200 Subject: sed-opal: Add command to read locking range parameters. It returns following attributes: locking range start locking range length read lock enabled write lock enabled lock state (RW, RO or LK) It can be retrieved by user authority provided the authority was added to locking range via prior IOC_OPAL_ADD_USR_TO_LR ioctl command. The command was extended to add user in ACE that allows to read attributes listed above. Signed-off-by: Ondrej Kozina Tested-by: Luca Boccassi Tested-by: Milan Broz Link: https://lore.kernel.org/r/20230405111223.272816-6-okozina@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/sed-opal.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index d7a1524023db..3905c8ffedbf 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h @@ -78,6 +78,16 @@ struct opal_user_lr_setup { struct opal_session_info session; }; +struct opal_lr_status { + struct opal_session_info session; + __u64 range_start; + __u64 range_length; + __u32 RLE; /* Read Lock enabled */ + __u32 WLE; /* Write Lock Enabled */ + __u32 l_state; + __u8 align[4]; +}; + struct opal_lock_unlock { struct opal_session_info session; __u32 l_state; @@ -168,5 +178,6 @@ struct opal_status { #define IOC_OPAL_WRITE_SHADOW_MBR _IOW('p', 234, struct opal_shadow_mbr) #define IOC_OPAL_GENERIC_TABLE_RW _IOW('p', 235, struct opal_read_write_table) #define IOC_OPAL_GET_STATUS _IOR('p', 236, struct opal_status) +#define IOC_OPAL_GET_LR_STATUS _IOW('p', 237, struct opal_lr_status) #endif /* _UAPI_SED_OPAL_H */ -- cgit v1.2.3 From 2bad466cc9d9b4c3b4b16eb9c03c919b59561316 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 9 Mar 2023 17:37:10 -0500 Subject: mm/uffd: UFFD_FEATURE_WP_UNPOPULATED Patch series "mm/uffd: Add feature bit UFFD_FEATURE_WP_UNPOPULATED", v4. The new feature bit makes anonymous memory acts the same as file memory on userfaultfd-wp in that it'll also wr-protect none ptes. It can be useful in two cases: (1) Uffd-wp app that needs to wr-protect none ptes like QEMU snapshot, so pre-fault can be replaced by enabling this flag and speed up protections (2) It helps to implement async uffd-wp mode that Muhammad is working on [1] It's debatable whether this is the most ideal solution because with the new feature bit set, wr-protect none pte needs to pre-populate the pgtables to the last level (PAGE_SIZE). But it seems fine so far to service either purpose above, so we can leave optimizations for later. The series brings pte markers to anonymous memory too. There's some change in the common mm code path in the 1st patch, great to have some eye looking at it, but hopefully they're still relatively straightforward. This patch (of 2): This is a new feature that controls how uffd-wp handles none ptes. When it's set, the kernel will handle anonymous memory the same way as file memory, by allowing the user to wr-protect unpopulated ptes. File memories handles none ptes consistently by allowing wr-protecting of none ptes because of the unawareness of page cache being exist or not. For anonymous it was not as persistent because we used to assume that we don't need protections on none ptes or known zero pages. One use case of such a feature bit was VM live snapshot, where if without wr-protecting empty ptes the snapshot can contain random rubbish in the holes of the anonymous memory, which can cause misbehave of the guest when the guest OS assumes the pages should be all zeros. QEMU worked it around by pre-populate the section with reads to fill in zero page entries before starting the whole snapshot process [1]. Recently there's another need raised on using userfaultfd wr-protect for detecting dirty pages (to replace soft-dirty in some cases) [2]. In that case if without being able to wr-protect none ptes by default, the dirty info can get lost, since we cannot treat every none pte to be dirty (the current design is identify a page dirty based on uffd-wp bit being cleared). In general, we want to be able to wr-protect empty ptes too even for anonymous. This patch implements UFFD_FEATURE_WP_UNPOPULATED so that it'll make uffd-wp handling on none ptes being consistent no matter what the memory type is underneath. It doesn't have any impact on file memories so far because we already have pte markers taking care of that. So it only affects anonymous. The feature bit is by default off, so the old behavior will be maintained. Sometimes it may be wanted because the wr-protect of none ptes will contain overheads not only during UFFDIO_WRITEPROTECT (by applying pte markers to anonymous), but also on creating the pgtables to store the pte markers. So there's potentially less chance of using thp on the first fault for a none pmd or larger than a pmd. The major implementation part is teaching the whole kernel to understand pte markers even for anonymously mapped ranges, meanwhile allowing the UFFDIO_WRITEPROTECT ioctl to apply pte markers for anonymous too when the new feature bit is set. Note that even if the patch subject starts with mm/uffd, there're a few small refactors to major mm path of handling anonymous page faults. But they should be straightforward. With WP_UNPOPUATED, application like QEMU can avoid pre-read faults all the memory before wr-protect during taking a live snapshot. Quotting from Muhammad's test result here [3] based on a simple program [4]: (1) With huge page disabled echo madvise > /sys/kernel/mm/transparent_hugepage/enabled ./uffd_wp_perf Test DEFAULT: 4 Test PRE-READ: 1111453 (pre-fault 1101011) Test MADVISE: 278276 (pre-fault 266378) Test WP-UNPOPULATE: 11712 (2) With Huge page enabled echo always > /sys/kernel/mm/transparent_hugepage/enabled ./uffd_wp_perf Test DEFAULT: 4 Test PRE-READ: 22521 (pre-fault 22348) Test MADVISE: 4909 (pre-fault 4743) Test WP-UNPOPULATE: 14448 There'll be a great perf boost for no-thp case, while for thp enabled with extreme case of all-thp-zero WP_UNPOPULATED can be slower than MADVISE, but that's low possibility in reality, also the overhead was not reduced but postponed until a follow up write on any huge zero thp, so potentially it is faster by making the follow up writes slower. [1] https://lore.kernel.org/all/20210401092226.102804-4-andrey.gruzdev@virtuozzo.com/ [2] https://lore.kernel.org/all/Y+v2HJ8+3i%2FKzDBu@x1n/ [3] https://lore.kernel.org/all/d0eb0a13-16dc-1ac1-653a-78b7273781e3@collabora.com/ [4] https://github.com/xzpeter/clibs/blob/master/uffd-test/uffd-wp-perf.c [peterx@redhat.com: comment changes, oneliner fix to khugepaged] Link: https://lkml.kernel.org/r/ZB2/8jPhD3fpx5U8@x1n Link: https://lkml.kernel.org/r/20230309223711.823547-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20230309223711.823547-2-peterx@redhat.com Signed-off-by: Peter Xu Acked-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: Mike Rapoport Cc: Muhammad Usama Anjum Cc: Nadav Amit Cc: Paul Gofman Signed-off-by: Andrew Morton --- include/uapi/linux/userfaultfd.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 005e5e306266..90c958952bfc 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -38,7 +38,8 @@ UFFD_FEATURE_MINOR_HUGETLBFS | \ UFFD_FEATURE_MINOR_SHMEM | \ UFFD_FEATURE_EXACT_ADDRESS | \ - UFFD_FEATURE_WP_HUGETLBFS_SHMEM) + UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \ + UFFD_FEATURE_WP_UNPOPULATED) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -203,6 +204,12 @@ struct uffdio_api { * * UFFD_FEATURE_WP_HUGETLBFS_SHMEM indicates that userfaultfd * write-protection mode is supported on both shmem and hugetlbfs. + * + * UFFD_FEATURE_WP_UNPOPULATED indicates that userfaultfd + * write-protection mode will always apply to unpopulated pages + * (i.e. empty ptes). This will be the default behavior for shmem + * & hugetlbfs, so this flag only affects anonymous memory behavior + * when userfault write-protection mode is registered. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -217,6 +224,7 @@ struct uffdio_api { #define UFFD_FEATURE_MINOR_SHMEM (1<<10) #define UFFD_FEATURE_EXACT_ADDRESS (1<<11) #define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12) +#define UFFD_FEATURE_WP_UNPOPULATED (1<<13) __u64 features; __u64 ioctls; -- cgit v1.2.3 From 0289184476c845968ad6ac9083c96cc0f75ca505 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Tue, 14 Mar 2023 15:12:50 -0700 Subject: mm: userfaultfd: add UFFDIO_CONTINUE_MODE_WP to install WP PTEs UFFDIO_COPY already has UFFDIO_COPY_MODE_WP, so when installing a new PTE to resolve a missing fault, one can install a write-protected one. This is useful when using UFFDIO_REGISTER_MODE_{MISSING,WP} in combination. This was motivated by testing HugeTLB HGM [1], and in particular its interaction with userfaultfd features. Existing userfaultfd code supports using WP and MINOR modes together (i.e. you can register an area with both enabled), but without this CONTINUE flag the combination is in practice unusable. So, add an analogous UFFDIO_CONTINUE_MODE_WP, which does the same thing as UFFDIO_COPY_MODE_WP, but for *minor* faults. Update the selftest to do some very basic exercising of the new flag. Update Documentation/ to describe how these flags are used (neither the COPY nor the new CONTINUE versions of this mode flag were described there before). [1]: https://patchwork.kernel.org/project/linux-mm/cover/20230218002819.1486479-1-jthoughton@google.com/ Link: https://lkml.kernel.org/r/20230314221250.682452-5-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Acked-by: Peter Xu Acked-by: Mike Rapoport (IBM) Cc: Al Viro Cc: Hugh Dickins Cc: Jan Kara Cc: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Muchun Song Cc: Nadav Amit Cc: Shuah Khan Signed-off-by: Andrew Morton --- include/uapi/linux/userfaultfd.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 90c958952bfc..66dd4cd277bd 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -305,6 +305,13 @@ struct uffdio_writeprotect { struct uffdio_continue { struct uffdio_range range; #define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0) + /* + * UFFDIO_CONTINUE_MODE_WP will map the page write protected on + * the fly. UFFDIO_CONTINUE_MODE_WP is available only if the + * write protected ioctl is implemented for the range + * according to the uffdio_register.ioctls. + */ +#define UFFDIO_CONTINUE_MODE_WP ((__u64)1<<1) __u64 mode; /* -- cgit v1.2.3 From ae77d1391445f1357d888990c07b5288a4cacac5 Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Thu, 12 Jan 2023 09:04:47 +0000 Subject: media: add Sorenson Spark video format Sorenson Spark is an implementation of H.263 for use in Flash Video and Adobe Flash files. Sorenson Spark is an incomplete implementation of H.263. It differs mostly in header structure and ranges of the coefficients. Signed-off-by: Ming Qian Reviewed-by: Nicolas Dufresne Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index b5b3d1fddea2..f943d58ec9df 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -750,6 +750,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_FWHT_STATELESS v4l2_fourcc('S', 'F', 'W', 'H') /* Stateless FWHT (vicodec) */ #define V4L2_PIX_FMT_H264_SLICE v4l2_fourcc('S', '2', '6', '4') /* H264 parsed slices */ #define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ +#define V4L2_PIX_FMT_SPK v4l2_fourcc('S', 'P', 'K', '0') /* Sorenson Spark */ /* Vendor-specific formats */ #define V4L2_PIX_FMT_CPIA1 v4l2_fourcc('C', 'P', 'I', 'A') /* cpia1 YUV */ -- cgit v1.2.3 From ec9aa62a1e4d151e9f14b7bda0b13438a901f904 Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Tue, 17 Jan 2023 02:31:54 +0000 Subject: media: add RealVideo format RV30 and RV40 RealVideo, or also spelled as Real Video, is a suite of proprietary video compression formats developed by RealNetworks - the specific format changes with the version. RealVideo codecs are identified by four-character codes. RV30 and RV40 are RealNetworks' proprietary H.264-based codecs. Reviewed-by: Nicolas Dufresne Signed-off-by: Ming Qian Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index f943d58ec9df..d452481a5316 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -751,6 +751,8 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_H264_SLICE v4l2_fourcc('S', '2', '6', '4') /* H264 parsed slices */ #define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ #define V4L2_PIX_FMT_SPK v4l2_fourcc('S', 'P', 'K', '0') /* Sorenson Spark */ +#define V4L2_PIX_FMT_RV30 v4l2_fourcc('R', 'V', '3', '0') /* RealVideo 8 */ +#define V4L2_PIX_FMT_RV40 v4l2_fourcc('R', 'V', '4', '0') /* RealVideo 9 & 10 */ /* Vendor-specific formats */ #define V4L2_PIX_FMT_CPIA1 v4l2_fourcc('C', 'P', 'I', 'A') /* cpia1 YUV */ -- cgit v1.2.3 From 47a71c1f9af0a334c9dfa97633c41de4feda4287 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Apr 2023 16:41:58 -0700 Subject: bpf: Add log_true_size output field to return necessary log buffer size Add output-only log_true_size and btf_log_true_size field to BPF_PROG_LOAD and BPF_BTF_LOAD commands, respectively. It will return the size of log buffer necessary to fit in all the log contents at specified log_level. This is very useful for BPF loader libraries like libbpf to be able to size log buffer correctly, but could be used by users directly, if necessary, as well. This patch plumbs all this through the code, taking into account actual bpf_attr size provided by user to determine if these new fields are expected by users. And if they are, set them from kernel on return. We refactory btf_parse() function to accommodate this, moving attr and uattr handling inside it. The rest is very straightforward code, which is split from the logging accounting changes in the previous patch to make it simpler to review logic vs UAPI changes. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20230406234205.323208-13-andrii@kernel.org --- include/uapi/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e3d3b5160d26..3823100b7934 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1407,6 +1407,11 @@ union bpf_attr { __aligned_u64 fd_array; /* array of FDs */ __aligned_u64 core_relos; __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 log_true_size; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1492,6 +1497,11 @@ union bpf_attr { __u32 btf_size; __u32 btf_log_size; __u32 btf_log_level; + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 btf_log_true_size; }; struct { -- cgit v1.2.3 From 244da66cda359227d80ccb41dbcb99da40eae186 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:30 -0700 Subject: dmaengine: idxd: setup event log configuration Add setup of event log feature for supported device. Event log addresses error reporting that was lacking in gen 1 DSA devices where a second error event does not get reported when a first event is pending software handling. The event log allows a circular buffer that the device can push error events to. It is up to the user to create a large enough event log ring in order to capture the expected events. The evl size can be set in the device sysfs attribute. By default 64 entries are supported as minimal when event log is enabled. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-4-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index fc47635b57dc..5d05bf12f2bd 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -30,6 +30,7 @@ enum idxd_scmd_stat { IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, IDXD_SCMD_WQ_IRQ_ERR = 0x80100000, IDXD_SCMD_WQ_USER_NO_IOMMU = 0x80110000, + IDXD_SCMD_DEV_EVL_ERR = 0x80120000, }; #define IDXD_SCMD_SOFTERR_MASK 0x80000000 -- cgit v1.2.3 From 2f431ba908d2ef05da478d10925207728f1ff483 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:31 -0700 Subject: dmaengine: idxd: add interrupt handling for event log An event log interrupt is raised in the misc interrupt INTCAUSE register when an event is written by the hardware. Add basic event log processing support to the interrupt handler. The event log is a ring where the hardware owns the tail and the software owns the head. The hardware will advance the tail index when an additional event has been pushed to memory. The software will process the log entry and then advances the head. The log is full when (tail + 1) % log_size = head. The hardware will stop writing when the log is full. The user is expected to create a log size large enough to handle all the expected events. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-5-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 5d05bf12f2bd..0bc8eea18586 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -170,6 +170,7 @@ enum iax_completion_status { #define DSA_COMP_STATUS_MASK 0x7f #define DSA_COMP_STATUS_WRITE 0x80 +#define DSA_COMP_STATUS(status) ((status) & DSA_COMP_STATUS_MASK) struct dsa_hw_desc { uint32_t pasid:20; -- cgit v1.2.3 From 5fbe6503b52f5665560584f62adab5db70ac910e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:32 -0700 Subject: dmanegine: idxd: add debugfs for event log dump Add debugfs entry to dump the content of the event log for debugging. The function will dump all non-zero entries in the event log. It will note which entries are processed and which entries are still pending processing at the time of the dump. The entries may not always be in chronological order due to the log is a circular buffer. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-6-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 0bc8eea18586..e86199d09a91 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -311,7 +311,8 @@ struct dsa_completion_record { uint8_t result; uint8_t dif_status; }; - uint16_t rsvd; + uint8_t fault_info; + uint8_t rsvd; uint32_t bytes_completed; uint64_t fault_addr; union { @@ -368,7 +369,8 @@ struct dsa_raw_completion_record { struct iax_completion_record { volatile uint8_t status; uint8_t error_code; - uint16_t rsvd; + uint8_t fault_info; + uint8_t rsvd; uint32_t bytes_completed; uint64_t fault_addr; uint32_t invalid_flags; -- cgit v1.2.3 From c40bd7d9737bdcfb02d42765bc6c59b338151123 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:36 -0700 Subject: dmaengine: idxd: process user page faults for completion record DSA supports page fault handling through PRS. However, the DMA engine that's processing the descriptor is blocked until the PRS response is received. Other workqueues sharing the engine are also blocked. Page fault handing by the driver with PRS disabled can be used to mitigate the stalling. With PRS disabled while ATS remain enabled, DSA handles page faults on a completion record by reporting an event in the event log. In this instance, the descriptor is completed and the event log contains the completion record address and the contents of the completion record. Add support to the event log handling code to fault in the completion record and copy the content of the completion record to user memory. A bitmap is introduced to keep track of discarded event log entries. When the user process initiates ->release() of the char device, it no longer is interested in any remaining event log entries tied to the relevant wq and PASID. The driver will mark the event log entry index in the bitmap. Upon encountering the entries during processing, the event log handler will just clear the bitmap bit and skip the entry rather than attempt to process the event log entry. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-10-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index e86199d09a91..4b584d5afd87 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -135,6 +135,7 @@ enum dsa_completion_status { DSA_COMP_HW_ERR1, DSA_COMP_HW_ERR_DRB, DSA_COMP_TRANSLATION_FAIL, + DSA_COMP_DRAIN_EVL = 0x26, }; enum iax_completion_status { -- cgit v1.2.3 From 6926987185a3ae92c31b99ce1bfdfb04e95057c0 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:37 -0700 Subject: dmaengine: idxd: add descs_completed field for completion record The descs_completed field for a completion record is part of a batch descriptor completion record. It takes the same location as bytes_completed in a normal descriptor field. Add to expose to user. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-11-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 4b584d5afd87..76ad71bf751e 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -314,7 +314,10 @@ struct dsa_completion_record { }; uint8_t fault_info; uint8_t rsvd; - uint32_t bytes_completed; + union { + uint32_t bytes_completed; + uint32_t descs_completed; + }; uint64_t fault_addr; union { /* common record */ -- cgit v1.2.3 From 2442b7473ad03671378d2d95651bd6bbe09a0943 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:38 -0700 Subject: dmaengine: idxd: process batch descriptor completion record faults Add event log processing for faulting of user batch descriptor completion record. When encountering an event log entry for a page fault on a completion record, the driver is expected to do the following: 1. If the "first error in batch" bit in event log entry error info is set, discard any previously recorded errors associated with the "batch identifier". 2. Fix the page fault according to the fault address in the event log. If successful, write the completion record to the fault address in user space. 3. If an error is encountered while writing the completion record and it is associated to a descriptor in the batch, the driver associates the error with the batch identifier of the event log entry and tracks it until the event log entry for the corresponding batch desc is encountered. While processing an event log entry for a batch descriptor with error indicating that one or more descs in the batch had event log entries, the driver will do the following before writing the batch completion record: 1. If the status field of the completion record is 0x1, the driver will change it to error code 0x5 (one or more operations in batch completed with status not successful) and changes the result field to 1. 2. If the status is error code 0x6 (page fault on batch descriptor list address), change the result field to 1. 3. If status is any other value, the completion record is not changed. 4. Clear the recorded error in preparation for next batch with same batch identifier. The result field is for user software to determine whether to set the "Batch Error" flag bit in the descriptor for continuation of partial batch descriptor completion. See DSA spec 2.0 for additional information. If no error has been recorded for the batch, the batch completion record is written to user space as is. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-12-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 76ad71bf751e..606b52e88ce3 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -136,6 +136,7 @@ enum dsa_completion_status { DSA_COMP_HW_ERR_DRB, DSA_COMP_TRANSLATION_FAIL, DSA_COMP_DRAIN_EVL = 0x26, + DSA_COMP_BATCH_EVL_ERR, }; enum iax_completion_status { -- cgit v1.2.3 From f62af20bed2d9e824f51cfc97ff01bc261f40e58 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 11 Apr 2023 21:01:54 +0300 Subject: net/sched: mqprio: allow per-TC user input of FP adminStatus IEEE 802.1Q-2018 clause 6.7.2 Frame preemption specifies that each packet priority can be assigned to a "frame preemption status" value of either "express" or "preemptible". Express priorities are transmitted by the local device through the eMAC, and preemptible priorities through the pMAC (the concepts of eMAC and pMAC come from the 802.3 MAC Merge layer). The FP adminStatus is defined per packet priority, but 802.1Q clause 12.30.1.1.1 framePreemptionAdminStatus also says that: | Priorities that all map to the same traffic class should be | constrained to use the same value of preemption status. It is impossible to ignore the cognitive dissonance in the standard here, because it practically means that the FP adminStatus only takes distinct values per traffic class, even though it is defined per priority. I can see no valid use case which is prevented by having the kernel take the FP adminStatus as input per traffic class (what we do here). In addition, this also enforces the above constraint by construction. User space network managers which wish to expose FP adminStatus per priority are free to do so; they must only observe the prio_tc_map of the netdev (which presumably is also under their control, when constructing the mqprio netlink attributes). The reason for configuring frame preemption as a property of the Qdisc layer is that the information about "preemptible TCs" is closest to the place which handles the num_tc and prio_tc_map of the netdev. If the UAPI would have been any other layer, it would be unclear what to do with the FP information when num_tc collapses to 0. A key assumption is that only mqprio/taprio change the num_tc and prio_tc_map of the netdev. Not sure if that's a great assumption to make. Having FP in tc-mqprio can be seen as an implementation of the use case defined in 802.1Q Annex S.2 "Preemption used in isolation". There will be a separate implementation of FP in tc-taprio, for the other use cases. Signed-off-by: Vladimir Oltean Reviewed-by: Ferenc Fejes Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_sched.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 000eec106856..b8d29be91b62 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -719,6 +719,11 @@ enum { #define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1) +enum { + TC_FP_EXPRESS = 1, + TC_FP_PREEMPTIBLE = 2, +}; + struct tc_mqprio_qopt { __u8 num_tc; __u8 prio_tc_map[TC_QOPT_BITMASK + 1]; @@ -732,12 +737,23 @@ struct tc_mqprio_qopt { #define TC_MQPRIO_F_MIN_RATE 0x4 #define TC_MQPRIO_F_MAX_RATE 0x8 +enum { + TCA_MQPRIO_TC_ENTRY_UNSPEC, + TCA_MQPRIO_TC_ENTRY_INDEX, /* u32 */ + TCA_MQPRIO_TC_ENTRY_FP, /* u32 */ + + /* add new constants above here */ + __TCA_MQPRIO_TC_ENTRY_CNT, + TCA_MQPRIO_TC_ENTRY_MAX = (__TCA_MQPRIO_TC_ENTRY_CNT - 1) +}; + enum { TCA_MQPRIO_UNSPEC, TCA_MQPRIO_MODE, TCA_MQPRIO_SHAPER, TCA_MQPRIO_MIN_RATE64, TCA_MQPRIO_MAX_RATE64, + TCA_MQPRIO_TC_ENTRY, __TCA_MQPRIO_MAX, }; -- cgit v1.2.3 From a721c3e54b80e45cd9202e7fca29ef018bed9069 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 11 Apr 2023 21:01:55 +0300 Subject: net/sched: taprio: allow per-TC user input of FP adminStatus This is a duplication of the FP adminStatus logic introduced for tc-mqprio. Offloading is done through the tc_mqprio_qopt_offload structure embedded within tc_taprio_qopt_offload. So practically, if a device driver is written to treat the mqprio portion of taprio just like standalone mqprio, it gets unified handling of frame preemption. I would have reused more code with taprio, but this is mostly netlink attribute parsing, which is hard to transform into generic code without having something that stinks as a result. We have the same variables with the same semantics, just different nlattr type values (TCA_MQPRIO_TC_ENTRY=5 vs TCA_TAPRIO_ATTR_TC_ENTRY=12; TCA_MQPRIO_TC_ENTRY_FP=2 vs TCA_TAPRIO_TC_ENTRY_FP=3, etc) and consequently, different policies for the nest. Every time nla_parse_nested() is called, an on-stack table "tb" of nlattr pointers is allocated statically, up to the maximum understood nlattr type. That array size is hardcoded as a constant, but when transforming this into a common parsing function, it would become either a VLA (which the Linux kernel rightfully doesn't like) or a call to the allocator. Having FP adminStatus in tc-taprio can be seen as addressing the 802.1Q Annex S.3 "Scheduling and preemption used in combination, no HOLD/RELEASE" and S.4 "Scheduling and preemption used in combination with HOLD/RELEASE" use cases. HOLD and RELEASE events are emitted towards the underlying MAC Merge layer when the schedule hits a Set-And-Hold-MAC or a Set-And-Release-MAC gate operation. So within the tc-taprio UAPI space, one can distinguish between the 2 use cases by choosing whether to use the TC_TAPRIO_CMD_SET_AND_HOLD and TC_TAPRIO_CMD_SET_AND_RELEASE gate operations within the schedule, or just TC_TAPRIO_CMD_SET_GATES. A small part of the change is dedicated to refactoring the max_sdu nlattr parsing to put all logic under the "if" that tests for presence of that nlattr. Signed-off-by: Vladimir Oltean Reviewed-by: Ferenc Fejes Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index b8d29be91b62..51a7addc56c6 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1252,6 +1252,7 @@ enum { TCA_TAPRIO_TC_ENTRY_UNSPEC, TCA_TAPRIO_TC_ENTRY_INDEX, /* u32 */ TCA_TAPRIO_TC_ENTRY_MAX_SDU, /* u32 */ + TCA_TAPRIO_TC_ENTRY_FP, /* u32 */ /* add new constants above here */ __TCA_TAPRIO_TC_ENTRY_CNT, -- cgit v1.2.3 From f57fa2959244026ea5e885249e6c55fe3f133bd7 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 23 Mar 2023 13:58:35 +0000 Subject: media: v4l2-subdev: Add new ioctl for client capabilities Add new ioctls to set and get subdev client capabilities. Client in this context means the userspace application which opens the subdev device node. The client capabilities are stored in the file handle of the opened subdev device node, and the client must set the capabilities for each opened subdev. For now we only add a single flag, V4L2_SUBDEV_CLIENT_CAP_STREAMS, which indicates that the client is streams-aware. The reason for needing such a flag is as follows: Many structs passed via ioctls, e.g. struct v4l2_subdev_format, contain reserved fields (usually a single array field). These reserved fields can be used to extend the ioctl. The userspace is required to zero the reserved fields. We recently added a new 'stream' field to many of these structs, and the space for the field was taken from these reserved arrays. The assumption was that these new 'stream' fields are always initialized to zero if the userspace does not use them. This was a mistake, as, as mentioned above, the userspace is required to zero the _reserved_ fields. In other words, there is no requirement to zero this new stream field, and if the userspace doesn't use the field (which is the case for all userspace applications at the moment), the field may contain random data. This shows that the way the reserved fields are defined in v4l2 is, in my opinion, somewhat broken, but there is nothing to do about that. To fix this issue we need a way for the userspace to tell the kernel that the userspace has indeed set the 'stream' field, and it's fine for the kernel to access it. This is achieved with the new ioctl, which the userspace should usually use right after opening the subdev device node. Signed-off-by: Tomi Valkeinen Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-subdev.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h index 654d659de835..4a195b68f28f 100644 --- a/include/uapi/linux/v4l2-subdev.h +++ b/include/uapi/linux/v4l2-subdev.h @@ -233,6 +233,24 @@ struct v4l2_subdev_routing { __u32 reserved[6]; }; +/* + * The client is aware of streams. Setting this flag enables the use of 'stream' + * fields (referring to the stream number) with various ioctls. If this is not + * set (which is the default), the 'stream' fields will be forced to 0 by the + * kernel. + */ + #define V4L2_SUBDEV_CLIENT_CAP_STREAMS (1U << 0) + +/** + * struct v4l2_subdev_client_capability - Capabilities of the client accessing + * the subdev + * + * @capabilities: A bitmask of V4L2_SUBDEV_CLIENT_CAP_* flags. + */ +struct v4l2_subdev_client_capability { + __u64 capabilities; +}; + /* Backwards compatibility define --- to be removed */ #define v4l2_subdev_edid v4l2_edid @@ -250,6 +268,9 @@ struct v4l2_subdev_routing { #define VIDIOC_SUBDEV_S_SELECTION _IOWR('V', 62, struct v4l2_subdev_selection) #define VIDIOC_SUBDEV_G_ROUTING _IOWR('V', 38, struct v4l2_subdev_routing) #define VIDIOC_SUBDEV_S_ROUTING _IOWR('V', 39, struct v4l2_subdev_routing) +#define VIDIOC_SUBDEV_G_CLIENT_CAP _IOR('V', 101, struct v4l2_subdev_client_capability) +#define VIDIOC_SUBDEV_S_CLIENT_CAP _IOWR('V', 102, struct v4l2_subdev_client_capability) + /* The following ioctls are identical to the ioctls in videodev2.h */ #define VIDIOC_SUBDEV_G_STD _IOR('V', 23, v4l2_std_id) #define VIDIOC_SUBDEV_S_STD _IOW('V', 24, v4l2_std_id) -- cgit v1.2.3 From aa1080404200694aace5989f99664ca75e73b03d Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Wed, 22 Mar 2023 05:13:04 +0000 Subject: media: Add P012 and P012M video format P012 is a YUV format with 12-bits per component with interleaved UV, like NV12, expanded to 16 bits. Data in the 12 high bits, zeros in the 4 low bits, arranged in little endian order. And P012M has two non contiguous planes. Signed-off-by: Ming Qian Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index d452481a5316..1f88e00327ea 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -637,12 +637,14 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_NV24 v4l2_fourcc('N', 'V', '2', '4') /* 24 Y/CbCr 4:4:4 */ #define V4L2_PIX_FMT_NV42 v4l2_fourcc('N', 'V', '4', '2') /* 24 Y/CrCb 4:4:4 */ #define V4L2_PIX_FMT_P010 v4l2_fourcc('P', '0', '1', '0') /* 24 Y/CbCr 4:2:0 10-bit per component */ +#define V4L2_PIX_FMT_P012 v4l2_fourcc('P', '0', '1', '2') /* 24 Y/CbCr 4:2:0 12-bit per component */ /* two non contiguous planes - one Y, one Cr + Cb interleaved */ #define V4L2_PIX_FMT_NV12M v4l2_fourcc('N', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 */ #define V4L2_PIX_FMT_NV21M v4l2_fourcc('N', 'M', '2', '1') /* 21 Y/CrCb 4:2:0 */ #define V4L2_PIX_FMT_NV16M v4l2_fourcc('N', 'M', '1', '6') /* 16 Y/CbCr 4:2:2 */ #define V4L2_PIX_FMT_NV61M v4l2_fourcc('N', 'M', '6', '1') /* 16 Y/CrCb 4:2:2 */ +#define V4L2_PIX_FMT_P012M v4l2_fourcc('P', 'M', '1', '2') /* 24 Y/CbCr 4:2:0 12-bit per component */ /* three planes - Y Cb, Cr */ #define V4L2_PIX_FMT_YUV410 v4l2_fourcc('Y', 'U', 'V', '9') /* 9 YUV 4:1:0 */ -- cgit v1.2.3 From a490ea68444084ec0368c019e11ee4a7e5c8bb13 Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Wed, 22 Mar 2023 05:13:05 +0000 Subject: media: Add Y012 video format Y012 is a luma-only formats with 12-bits per pixel, expanded to 16bits. Data in the 12 high bits, zeros in the 4 low bits, arranged in little endian order. Signed-off-by: Ming Qian Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 1f88e00327ea..2eb3e66a3427 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -586,6 +586,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_Y6 v4l2_fourcc('Y', '0', '6', ' ') /* 6 Greyscale */ #define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ') /* 10 Greyscale */ #define V4L2_PIX_FMT_Y12 v4l2_fourcc('Y', '1', '2', ' ') /* 12 Greyscale */ +#define V4L2_PIX_FMT_Y012 v4l2_fourcc('Y', '0', '1', '2') /* 12 Greyscale */ #define V4L2_PIX_FMT_Y14 v4l2_fourcc('Y', '1', '4', ' ') /* 14 Greyscale */ #define V4L2_PIX_FMT_Y16 v4l2_fourcc('Y', '1', '6', ' ') /* 16 Greyscale */ #define V4L2_PIX_FMT_Y16_BE v4l2_fourcc_be('Y', '1', '6', ' ') /* 16 Greyscale BE */ -- cgit v1.2.3 From 99c954967762976b15265ea383354095e1ed1efa Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Wed, 22 Mar 2023 05:13:07 +0000 Subject: media: Add YUV48_12 video format YUV48_12 is a YUV format with 12-bits per component like YUV24, expanded to 16bits. Data in the 12 high bits, zeros in the 4 low bits, arranged in little endian order. [hverkuil: replaced a . by ,] Signed-off-by: Ming Qian Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 2eb3e66a3427..25ca4bf83ba4 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -621,6 +621,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_YUVA32 v4l2_fourcc('Y', 'U', 'V', 'A') /* 32 YUVA-8-8-8-8 */ #define V4L2_PIX_FMT_YUVX32 v4l2_fourcc('Y', 'U', 'V', 'X') /* 32 YUVX-8-8-8-8 */ #define V4L2_PIX_FMT_M420 v4l2_fourcc('M', '4', '2', '0') /* 12 YUV 4:2:0 2 lines y, 1 line uv interleaved */ +#define V4L2_PIX_FMT_YUV48_12 v4l2_fourcc('Y', '3', '1', '2') /* 48 YUV 4:4:4 12-bit per component */ /* * YCbCr packed format. For each Y2xx format, xx bits of valid data occupy the MSBs -- cgit v1.2.3 From da0b7a400e4f39726c3c383f377fb51dbd8b0c71 Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Wed, 22 Mar 2023 05:13:08 +0000 Subject: media: Add BGR48_12 video format BGR48_12 is a reversed RGB format with 12 bits per component like BGR24, expanded to 16bits. Data in the 12 high bits, zeros in the 4 low bits, arranged in little endian order. Signed-off-by: Ming Qian Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 25ca4bf83ba4..42358d04075d 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -580,6 +580,9 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_RGBA1010102 v4l2_fourcc('R', 'A', '3', '0') /* 32 RGBA-10-10-10-2 */ #define V4L2_PIX_FMT_ARGB2101010 v4l2_fourcc('A', 'R', '3', '0') /* 32 ARGB-2-10-10-10 */ +/* RGB formats (6 or 8 bytes per pixel) */ +#define V4L2_PIX_FMT_BGR48_12 v4l2_fourcc('B', '3', '1', '2') /* 48 BGR 12-bit per component */ + /* Grey formats */ #define V4L2_PIX_FMT_GREY v4l2_fourcc('G', 'R', 'E', 'Y') /* 8 Greyscale */ #define V4L2_PIX_FMT_Y4 v4l2_fourcc('Y', '0', '4', ' ') /* 4 Greyscale */ -- cgit v1.2.3 From 302b988ca03d83da0a7e006a57efda646c30f978 Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Wed, 22 Mar 2023 05:13:09 +0000 Subject: media: Add ABGR64_12 video format ABGR64_12 is a reversed RGB format with alpha channel last, 12 bits per component like ABGR32, expanded to 16bits. Data in the 12 high bits, zeros in the 4 low bits, arranged in little endian order. Signed-off-by: Ming Qian Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 42358d04075d..aee75eb9e686 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -582,6 +582,7 @@ struct v4l2_pix_format { /* RGB formats (6 or 8 bytes per pixel) */ #define V4L2_PIX_FMT_BGR48_12 v4l2_fourcc('B', '3', '1', '2') /* 48 BGR 12-bit per component */ +#define V4L2_PIX_FMT_ABGR64_12 v4l2_fourcc('B', '4', '1', '2') /* 64 BGRA 12-bit per component */ /* Grey formats */ #define V4L2_PIX_FMT_GREY v4l2_fourcc('G', 'R', 'E', 'Y') /* 8 Greyscale */ -- cgit v1.2.3 From d54730b50bae1f3119bd686d551d66f0fcc387ca Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Sat, 15 Apr 2023 13:18:04 -0700 Subject: bpf: Introduce opaque bpf_refcount struct and add btf_record plumbing A 'struct bpf_refcount' is added to the set of opaque uapi/bpf.h types meant for use in BPF programs. Similarly to other opaque types like bpf_spin_lock and bpf_rbtree_node, the verifier needs to know where in user-defined struct types a bpf_refcount can be located, so necessary btf_record plumbing is added to enable this. bpf_refcount is sized to hold a refcount_t. Similarly to bpf_spin_lock, the offset of a bpf_refcount is cached in btf_record as refcount_off in addition to being in the field array. Caching refcount_off makes sense for this field because further patches in the series will modify functions that take local kptrs (e.g. bpf_obj_drop) to change their behavior if the type they're operating on is refcounted. So enabling fast "is this type refcounted?" checks is desirable. No such verifier behavior changes are introduced in this patch, just logic to recognize 'struct bpf_refcount' in btf_record. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20230415201811.343116-3-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3823100b7934..4b20a7269bee 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6985,6 +6985,10 @@ struct bpf_rb_node { __u64 :64; } __attribute__((aligned(8))); +struct bpf_refcount { + __u32 :32; +} __attribute__((aligned(4))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. -- cgit v1.2.3 From 3f67987cdc09778e75098f9f5168832f8f8e1f1c Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Fri, 7 Apr 2023 13:18:33 -0400 Subject: ptrace: Provide set/get interface for syscall user dispatch The syscall user dispatch configuration can only be set by the task itself, but lacks a ptrace set/get interface which makes it impossible to implement checkpoint/restore for it. Add the required ptrace requests and the get/set functions in the syscall user dispatch code to make that possible. Signed-off-by: Gregory Price Signed-off-by: Thomas Gleixner Reviewed-by: Oleg Nesterov Link: https://lore.kernel.org/r/20230407171834.3558-4-gregory.price@memverge.com --- include/uapi/linux/ptrace.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index 195ae64a8c87..72c038fc71d0 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -112,6 +112,36 @@ struct ptrace_rseq_configuration { __u32 pad; }; +#define PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG 0x4210 +#define PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG 0x4211 + +/* + * struct ptrace_sud_config - Per-task configuration for Syscall User Dispatch + * @mode: One of PR_SYS_DISPATCH_ON or PR_SYS_DISPATCH_OFF + * @selector: Tracees user virtual address of SUD selector + * @offset: SUD exclusion area (virtual address) + * @len: Length of SUD exclusion area + * + * Used to get/set the syscall user dispatch configuration for a tracee. + * Selector is optional (may be NULL), and if invalid will produce + * a SIGSEGV in the tracee upon first access. + * + * If mode is PR_SYS_DISPATCH_ON, syscall dispatch will be enabled. If + * PR_SYS_DISPATCH_OFF, syscall dispatch will be disabled and all other + * parameters must be 0. The value in *selector (if not null), also determines + * whether syscall dispatch will occur. + * + * The Syscall User Dispatch Exclusion area described by offset/len is the + * virtual address space from which syscalls will not produce a user + * dispatch. + */ +struct ptrace_sud_config { + __u64 mode; + __u64 selector; + __u64 offset; + __u64 len; +}; + /* * These values are stored in task->ptrace_message * by ptrace_stop to describe the current syscall-stop. -- cgit v1.2.3 From 604e6681e114d05a2e384c4d1e8ef81918037ef5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 6 Apr 2023 13:00:34 +0800 Subject: btrfs: scrub: reject unsupported scrub flags Since the introduction of scrub interface, the only flag that we support is BTRFS_SCRUB_READONLY. Thus there is no sanity checks, if there are some undefined flags passed in, we just ignore them. This is problematic if we want to introduce new scrub flags, as we have no way to determine if such flags are supported. Address the problem by introducing a check for the flags, and if unsupported flags are set, return -EOPNOTSUPP to inform the user space. This check should be backported for all supported kernels before any new scrub flags are introduced. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index ada0a489bf2b..dbb8b96da50d 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -187,6 +187,7 @@ struct btrfs_scrub_progress { }; #define BTRFS_SCRUB_READONLY 1 +#define BTRFS_SCRUB_SUPPORTED_FLAGS (BTRFS_SCRUB_READONLY) struct btrfs_ioctl_scrub_args { __u64 devid; /* in */ __u64 start; /* in */ -- cgit v1.2.3 From ddc65971bb677aa9f6a4c21f76d3133e106f88eb Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Tue, 4 Apr 2023 21:31:48 +0900 Subject: prctl: add PR_GET_AUXV to copy auxv to userspace If a library wants to get information from auxv (for instance, AT_HWCAP/AT_HWCAP2), it has a few options, none of them perfectly reliable or ideal: - Be main or the pre-main startup code, and grub through the stack above main. Doesn't work for a library. - Call libc getauxval. Not ideal for libraries that are trying to be libc-independent and/or don't otherwise require anything from other libraries. - Open and read /proc/self/auxv. Doesn't work for libraries that may run in arbitrarily constrained environments that may not have /proc mounted (e.g. libraries that might be used by an init program or a container setup tool). - Assume you're on the main thread and still on the original stack, and try to walk the stack upwards, hoping to find auxv. Extremely bad idea. - Ask the caller to pass auxv in for you. Not ideal for a user-friendly library, and then your caller may have the same problem. Add a prctl that copies current->mm->saved_auxv to a userspace buffer. Link: https://lkml.kernel.org/r/d81864a7f7f43bca6afa2a09fc2e850e4050ab42.1680611394.git.josh@joshtriplett.org Signed-off-by: Josh Triplett Signed-off-by: Andrew Morton --- include/uapi/linux/prctl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 1312a137f7fb..b99c0be72577 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -290,4 +290,6 @@ struct prctl_mm_map { #define PR_SET_VMA 0x53564d41 # define PR_SET_VMA_ANON_NAME 0 +#define PR_GET_AUXV 0x41555856 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From a3b2aeac9d154e5e15ddbf19de934c0c606b6acd Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Sat, 8 Apr 2023 17:28:35 +0800 Subject: delayacct: track delays from IRQ/SOFTIRQ Delay accounting does not track the delay of IRQ/SOFTIRQ. While IRQ/SOFTIRQ could have obvious impact on some workloads productivity, such as when workloads are running on system which is busy handling network IRQ/SOFTIRQ. Get the delay of IRQ/SOFTIRQ could help users to reduce such delay. Such as setting interrupt affinity or task affinity, using kernel thread for NAPI etc. This is inspired by "sched/psi: Add PSI_IRQ to track IRQ/SOFTIRQ pressure"[1]. Also fix some code indent problems of older code. And update tools/accounting/getdelays.c: / # ./getdelays -p 156 -di print delayacct stats ON printing IO accounting PID 156 CPU count real total virtual total delay total delay average 15 15836008 16218149 275700790 18.380ms IO count delay total delay average 0 0 0.000ms SWAP count delay total delay average 0 0 0.000ms RECLAIM count delay total delay average 0 0 0.000ms THRASHING count delay total delay average 0 0 0.000ms COMPACT count delay total delay average 0 0 0.000ms WPCOPY count delay total delay average 36 7586118 0.211ms IRQ count delay total delay average 42 929161 0.022ms [1] commit 52b1364ba0b1("sched/psi: Add PSI_IRQ to track IRQ/SOFTIRQ pressure") Link: https://lkml.kernel.org/r/202304081728353557233@zte.com.cn Signed-off-by: Yang Yang Cc: Jiang Xuexin Cc: wangyong Cc: junhua huang Cc: Balbir Singh Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Juri Lelli Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/uapi/linux/taskstats.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index a7f5b11a8f1b..b50b2eb257a0 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 13 +#define TASKSTATS_VERSION 14 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -198,6 +198,10 @@ struct taskstats { /* v13: Delay waiting for write-protect copy */ __u64 wpcopy_count; __u64 wpcopy_delay_total; + + /* v14: Delay waiting for IRQ/SOFTIRQ */ + __u64 irq_count; + __u64 irq_delay_total; }; -- cgit v1.2.3 From 31088f6f7906253ef4577f6a9b84e2d42447dba0 Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Tue, 11 Apr 2023 10:27:47 +0100 Subject: uapi/linux/const.h: prefer ISO-friendly __typeof__ typeof is (still) a GNU extension, which means that it cannot be used when building ISO C (e.g. -std=c99). It should therefore be avoided in uapi headers in favour of the ISO-friendly __typeof__. Unfortunately this issue could not be detected by CONFIG_UAPI_HEADER_TEST=y as the __ALIGN_KERNEL() macro is not expanded in any uapi header. This matters from a userspace perspective, not a kernel one. uapi headers and their contents are expected to be usable in a variety of situations, and in particular when building ISO C applications (with -std=c99 or similar). This particular problem can be reproduced by trying to use the __ALIGN_KERNEL macro directly in application code, say: #include int align(int x, int a) { return __KERNEL_ALIGN(x, a); } and trying to build that with -std=c99. Link: https://lkml.kernel.org/r/20230411092747.3759032-1-kevin.brodsky@arm.com Fixes: a79ff731a1b2 ("netfilter: xtables: make XT_ALIGN() usable in exported headers by exporting __ALIGN_KERNEL()") Signed-off-by: Kevin Brodsky Reported-by: Ruben Ayrapetyan Tested-by: Ruben Ayrapetyan Reviewed-by: Petr Vorel Tested-by: Petr Vorel Reviewed-by: Masahiro Yamada Cc: Sam Ravnborg Signed-off-by: Andrew Morton --- include/uapi/linux/const.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h index af2a44c08683..a429381e7ca5 100644 --- a/include/uapi/linux/const.h +++ b/include/uapi/linux/const.h @@ -28,7 +28,7 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) -#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) -- cgit v1.2.3 From ea97f6c8558e83cb457c3b5f53351e4fd8519ab1 Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 18 Apr 2023 15:58:18 -0700 Subject: io_uring: add support for multishot timeouts A multishot timeout submission will repeatedly generate completions with the IORING_CQE_F_MORE cflag set. Depending on the value of the `off' field in the submission, these timeouts can either repeat indefinitely until cancelled (`off' = 0) or for a fixed number of times (`off' > 0). Only noseq timeouts (i.e. not dependent on the number of I/O completions) are supported. An indefinite timer will be cancelled if the CQ ever overflows. Signed-off-by: David Wei Link: https://lore.kernel.org/r/20230418225817.1905027-1-davidhwei@meta.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index f8d14d1c58d3..0716cb17e436 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -250,6 +250,7 @@ enum io_uring_op { #define IORING_TIMEOUT_REALTIME (1U << 3) #define IORING_LINK_TIMEOUT_UPDATE (1U << 4) #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) +#define IORING_TIMEOUT_MULTISHOT (1U << 6) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) /* -- cgit v1.2.3 From 2d786e66c9662d84cbeab981ce3a371d2fb5a4bb Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 18 Apr 2023 21:18:10 +0800 Subject: block: ublk: switch to ioctl command encoding All ublk commands(control, IO) should have taken ioctl command encoding from the beginning, because ioctl command encoding defines each code uniquely, so driver can figure out wrong command sent from userspace easily; 2) it might help security subsystem for audit uring cmd[1]. Unfortunately we didn't do that way, and it could be one lesson for ublk driver. So switch to ioctl command encoding now, we still support commands encoded in old way, but they become legacy definition. Any new command should take ioctl encoding. See ublksrv code for switching to ioctl command encoding in [2]. [1] https://lore.kernel.org/io-uring/CAHC9VhSVzujW9LOj5Km80AjU0EfAuukoLrxO6BEfnXeK_s6bAg@mail.gmail.com/ [2] https://github.com/ming1/ubdsrv/commits/ioctl_cmd_encoding Cc: Christoph Hellwig Cc: Ken Kurematsu Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230418131810.855959-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index f6238ccc7800..640bf687b94a 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -8,6 +8,9 @@ /* * Admin commands, issued by ublk server, and handled by ublk driver. + * + * Legacy command definition, don't use in new application, and don't + * add new such definition any more */ #define UBLK_CMD_GET_QUEUE_AFFINITY 0x01 #define UBLK_CMD_GET_DEV_INFO 0x02 @@ -21,6 +24,30 @@ #define UBLK_CMD_END_USER_RECOVERY 0x11 #define UBLK_CMD_GET_DEV_INFO2 0x12 +/* Any new ctrl command should encode by __IO*() */ +#define UBLK_U_CMD_GET_QUEUE_AFFINITY \ + _IOR('u', UBLK_CMD_GET_QUEUE_AFFINITY, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_GET_DEV_INFO \ + _IOR('u', UBLK_CMD_GET_DEV_INFO, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_ADD_DEV \ + _IOWR('u', UBLK_CMD_ADD_DEV, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_DEL_DEV \ + _IOWR('u', UBLK_CMD_DEL_DEV, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_START_DEV \ + _IOWR('u', UBLK_CMD_START_DEV, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_STOP_DEV \ + _IOWR('u', UBLK_CMD_STOP_DEV, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_SET_PARAMS \ + _IOWR('u', UBLK_CMD_SET_PARAMS, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_GET_PARAMS \ + _IOR('u', UBLK_CMD_GET_PARAMS, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_START_USER_RECOVERY \ + _IOWR('u', UBLK_CMD_START_USER_RECOVERY, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_END_USER_RECOVERY \ + _IOWR('u', UBLK_CMD_END_USER_RECOVERY, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_GET_DEV_INFO2 \ + _IOR('u', UBLK_CMD_GET_DEV_INFO2, struct ublksrv_ctrl_cmd) + /* * IO commands, issued by ublk server, and handled by ublk driver. * @@ -41,10 +68,23 @@ * It is only used if ublksrv set UBLK_F_NEED_GET_DATA flag * while starting a ublk device. */ + +/* + * Legacy IO command definition, don't use in new application, and don't + * add new such definition any more + */ #define UBLK_IO_FETCH_REQ 0x20 #define UBLK_IO_COMMIT_AND_FETCH_REQ 0x21 #define UBLK_IO_NEED_GET_DATA 0x22 +/* Any new IO command should encode by __IOWR() */ +#define UBLK_U_IO_FETCH_REQ \ + _IOWR('u', UBLK_IO_FETCH_REQ, struct ublksrv_io_cmd) +#define UBLK_U_IO_COMMIT_AND_FETCH_REQ \ + _IOWR('u', UBLK_IO_COMMIT_AND_FETCH_REQ, struct ublksrv_io_cmd) +#define UBLK_U_IO_NEED_GET_DATA \ + _IOWR('u', UBLK_IO_NEED_GET_DATA, struct ublksrv_io_cmd) + /* only ABORT means that no re-fetch */ #define UBLK_IO_RES_OK 0 #define UBLK_IO_RES_NEED_GET_DATA 1 @@ -102,6 +142,9 @@ */ #define UBLK_F_UNPRIVILEGED_DEV (1UL << 5) +/* use ioctl encoding for uring command */ +#define UBLK_F_CMD_IOCTL_ENCODE (1UL << 6) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 -- cgit v1.2.3 From 9e05a2599a37295eb2dc5c03441daa6741abed4b Mon Sep 17 00:00:00 2001 From: Ondrej Kozina Date: Tue, 11 Apr 2023 11:09:31 +0200 Subject: sed-opal: geometry feature reporting command Locking range start and locking range length attributes may be require to satisfy restrictions exposed by OPAL2 geometry feature reporting. Geometry reporting feature is described in TCG OPAL SSC, section 3.1.1.4 (ALIGN, LogicalBlockSize, AlignmentGranularity and LowestAlignedLBA). 4.3.5.2.1.1 RangeStart Behavior: [ StartAlignment = (RangeStart modulo AlignmentGranularity) - LowestAlignedLBA ] When processing a Set method or CreateRow method on the Locking table for a non-Global Range row, if: a) the AlignmentRequired (ALIGN above) column in the LockingInfo table is TRUE; b) RangeStart is non-zero; and c) StartAlignment is non-zero, then the method SHALL fail and return an error status code INVALID_PARAMETER. 4.3.5.2.1.2 RangeLength Behavior: If RangeStart is zero, then [ LengthAlignment = (RangeLength modulo AlignmentGranularity) - LowestAlignedLBA ] If RangeStart is non-zero, then [ LengthAlignment = (RangeLength modulo AlignmentGranularity) ] When processing a Set method or CreateRow method on the Locking table for a non-Global Range row, if: a) the AlignmentRequired (ALIGN above) column in the LockingInfo table is TRUE; b) RangeLength is non-zero; and c) LengthAlignment is non-zero, then the method SHALL fail and return an error status code INVALID_PARAMETER In userspace we stuck to logical block size reported by general block device (via sysfs or ioctl), but we can not read 'AlignmentGranularity' or 'LowestAlignedLBA' anywhere else and we need to get those values from sed-opal interface otherwise we will not be able to report or avoid locking range setup INVALID_PARAMETER errors above. Signed-off-by: Ondrej Kozina Reviewed-by: Christoph Hellwig Reviewed-by: Christian Brauner Tested-by: Milan Broz Link: https://lore.kernel.org/r/20230411090931.9193-2-okozina@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/sed-opal.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index 3905c8ffedbf..dc2efd345133 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h @@ -161,6 +161,18 @@ struct opal_status { __u32 reserved; }; +/* + * Geometry Reporting per TCG Storage OPAL SSC + * section 3.1.1.4 + */ +struct opal_geometry { + __u8 align; + __u32 logical_block_size; + __u64 alignment_granularity; + __u64 lowest_aligned_lba; + __u8 __align[3]; +}; + #define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock) #define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock) #define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key) @@ -179,5 +191,6 @@ struct opal_status { #define IOC_OPAL_GENERIC_TABLE_RW _IOW('p', 235, struct opal_read_write_table) #define IOC_OPAL_GET_STATUS _IOR('p', 236, struct opal_status) #define IOC_OPAL_GET_LR_STATUS _IOW('p', 237, struct opal_lr_status) +#define IOC_OPAL_GET_GEOMETRY _IOR('p', 238, struct opal_geometry) #endif /* _UAPI_SED_OPAL_H */ -- cgit v1.2.3 From 3b3009ea8abb713b022d94fba95ec270cf6e7eae Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 17 Apr 2023 10:32:26 -0400 Subject: net/handshake: Create a NETLINK service for handling handshake requests When a kernel consumer needs a transport layer security session, it first needs a handshake to negotiate and establish a session. This negotiation can be done in user space via one of the several existing library implementations, or it can be done in the kernel. No in-kernel handshake implementations yet exist. In their absence, we add a netlink service that can: a. Notify a user space daemon that a handshake is needed. b. Once notified, the daemon calls the kernel back via this netlink service to get the handshake parameters, including an open socket on which to establish the session. c. Once the handshake is complete, the daemon reports the session status and other information via a second netlink operation. This operation marks that it is safe for the kernel to use the open socket and the security session established there. The notification service uses a multicast group. Each handshake mechanism (eg, tlshd) adopts its own group number so that the handshake services are completely independent of one another. The kernel can then tell via netlink_has_listeners() whether a handshake service is active and prepared to handle a handshake request. A new netlink operation, ACCEPT, acts like accept(2) in that it instantiates a file descriptor in the user space daemon's fd table. If this operation is successful, the reply carries the fd number, which can be treated as an open and ready file descriptor. While user space is performing the handshake, the kernel keeps its muddy paws off the open socket. A second new netlink operation, DONE, indicates that the user space daemon is finished with the socket and it is safe for the kernel to use again. The operation also indicates whether a session was established successfully. Signed-off-by: Chuck Lever Signed-off-by: Jakub Kicinski --- include/uapi/linux/handshake.h | 71 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 include/uapi/linux/handshake.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/handshake.h b/include/uapi/linux/handshake.h new file mode 100644 index 000000000000..7f66ff489b87 --- /dev/null +++ b/include/uapi/linux/handshake.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/handshake.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_HANDSHAKE_H +#define _UAPI_LINUX_HANDSHAKE_H + +#define HANDSHAKE_FAMILY_NAME "handshake" +#define HANDSHAKE_FAMILY_VERSION 1 + +enum handshake_handler_class { + HANDSHAKE_HANDLER_CLASS_NONE, + HANDSHAKE_HANDLER_CLASS_MAX, +}; + +enum handshake_msg_type { + HANDSHAKE_MSG_TYPE_UNSPEC, + HANDSHAKE_MSG_TYPE_CLIENTHELLO, + HANDSHAKE_MSG_TYPE_SERVERHELLO, +}; + +enum handshake_auth { + HANDSHAKE_AUTH_UNSPEC, + HANDSHAKE_AUTH_UNAUTH, + HANDSHAKE_AUTH_PSK, + HANDSHAKE_AUTH_X509, +}; + +enum { + HANDSHAKE_A_X509_CERT = 1, + HANDSHAKE_A_X509_PRIVKEY, + + __HANDSHAKE_A_X509_MAX, + HANDSHAKE_A_X509_MAX = (__HANDSHAKE_A_X509_MAX - 1) +}; + +enum { + HANDSHAKE_A_ACCEPT_SOCKFD = 1, + HANDSHAKE_A_ACCEPT_HANDLER_CLASS, + HANDSHAKE_A_ACCEPT_MESSAGE_TYPE, + HANDSHAKE_A_ACCEPT_TIMEOUT, + HANDSHAKE_A_ACCEPT_AUTH_MODE, + HANDSHAKE_A_ACCEPT_PEER_IDENTITY, + HANDSHAKE_A_ACCEPT_CERTIFICATE, + + __HANDSHAKE_A_ACCEPT_MAX, + HANDSHAKE_A_ACCEPT_MAX = (__HANDSHAKE_A_ACCEPT_MAX - 1) +}; + +enum { + HANDSHAKE_A_DONE_STATUS = 1, + HANDSHAKE_A_DONE_SOCKFD, + HANDSHAKE_A_DONE_REMOTE_AUTH, + + __HANDSHAKE_A_DONE_MAX, + HANDSHAKE_A_DONE_MAX = (__HANDSHAKE_A_DONE_MAX - 1) +}; + +enum { + HANDSHAKE_CMD_READY = 1, + HANDSHAKE_CMD_ACCEPT, + HANDSHAKE_CMD_DONE, + + __HANDSHAKE_CMD_MAX, + HANDSHAKE_CMD_MAX = (__HANDSHAKE_CMD_MAX - 1) +}; + +#define HANDSHAKE_MCGRP_NONE "none" + +#endif /* _UAPI_LINUX_HANDSHAKE_H */ -- cgit v1.2.3 From 2fd5532044a89d2403b543520b4902e196f7d165 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 17 Apr 2023 10:32:33 -0400 Subject: net/handshake: Add a kernel API for requesting a TLSv1.3 handshake To enable kernel consumers of TLS to request a TLS handshake, add support to net/handshake/ to request a handshake upcall. This patch also acts as a template for adding handshake upcall support for other kernel transport layer security providers. Signed-off-by: Chuck Lever Signed-off-by: Jakub Kicinski --- include/uapi/linux/handshake.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/handshake.h b/include/uapi/linux/handshake.h index 7f66ff489b87..1de4d0b95325 100644 --- a/include/uapi/linux/handshake.h +++ b/include/uapi/linux/handshake.h @@ -11,6 +11,7 @@ enum handshake_handler_class { HANDSHAKE_HANDLER_CLASS_NONE, + HANDSHAKE_HANDLER_CLASS_TLSHD, HANDSHAKE_HANDLER_CLASS_MAX, }; @@ -67,5 +68,6 @@ enum { }; #define HANDSHAKE_MCGRP_NONE "none" +#define HANDSHAKE_MCGRP_TLSHD "tlshd" #endif /* _UAPI_LINUX_HANDSHAKE_H */ -- cgit v1.2.3 From 519fe1bae7e20fc4e7f179d50b6102b49980e85d Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sun, 2 Apr 2023 11:37:42 +0900 Subject: ext4: Add a uapi header for ext4 userspace APIs Create a uapi header include/uapi/linux/ext4.h, move the ioctls and associated data structures to the uapi header, and include it from fs/ext4/ext4.h. Signed-off-by: Josh Triplett Link: https://lore.kernel.org/r/680175260970d977d16b5cc7e7606483ec99eb63.1680402881.git.josh@joshtriplett.org Signed-off-by: Theodore Ts'o --- include/uapi/linux/ext4.h | 117 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 include/uapi/linux/ext4.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ext4.h b/include/uapi/linux/ext4.h new file mode 100644 index 000000000000..1c4c2dd29112 --- /dev/null +++ b/include/uapi/linux/ext4.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_EXT4_H +#define _UAPI_LINUX_EXT4_H +#include +#include +#include +#include + +/* + * ext4-specific ioctl commands + */ +#define EXT4_IOC_GETVERSION _IOR('f', 3, long) +#define EXT4_IOC_SETVERSION _IOW('f', 4, long) +#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION +#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION +#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) +#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) +#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) +#define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) +#define EXT4_IOC_MIGRATE _IO('f', 9) + /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ + /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ +#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) +#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) +#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) +#define EXT4_IOC_SWAP_BOOT _IO('f', 17) +#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18) +/* ioctl codes 19--39 are reserved for fscrypt */ +#define EXT4_IOC_CLEAR_ES_CACHE _IO('f', 40) +#define EXT4_IOC_GETSTATE _IOW('f', 41, __u32) +#define EXT4_IOC_GET_ES_CACHE _IOWR('f', 42, struct fiemap) +#define EXT4_IOC_CHECKPOINT _IOW('f', 43, __u32) +#define EXT4_IOC_GETFSUUID _IOR('f', 44, struct fsuuid) +#define EXT4_IOC_SETFSUUID _IOW('f', 44, struct fsuuid) + +#define EXT4_IOC_SHUTDOWN _IOR('X', 125, __u32) + +/* + * ioctl commands in 32 bit emulation + */ +#define EXT4_IOC32_GETVERSION _IOR('f', 3, int) +#define EXT4_IOC32_SETVERSION _IOW('f', 4, int) +#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) +#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) +#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) +#define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input) +#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION +#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION + +/* + * Flags returned by EXT4_IOC_GETSTATE + * + * We only expose to userspace a subset of the state flags in + * i_state_flags + */ +#define EXT4_STATE_FLAG_EXT_PRECACHED 0x00000001 +#define EXT4_STATE_FLAG_NEW 0x00000002 +#define EXT4_STATE_FLAG_NEWENTRY 0x00000004 +#define EXT4_STATE_FLAG_DA_ALLOC_CLOSE 0x00000008 + +/* + * Flags for ioctl EXT4_IOC_CHECKPOINT + */ +#define EXT4_IOC_CHECKPOINT_FLAG_DISCARD 0x1 +#define EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT 0x2 +#define EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN 0x4 +#define EXT4_IOC_CHECKPOINT_FLAG_VALID (EXT4_IOC_CHECKPOINT_FLAG_DISCARD | \ + EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT | \ + EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN) + +/* + * Structure for EXT4_IOC_GETFSUUID/EXT4_IOC_SETFSUUID + */ +struct fsuuid { + __u32 fsu_len; + __u32 fsu_flags; + __u8 fsu_uuid[]; +}; + +/* + * Structure for EXT4_IOC_MOVE_EXT + */ +struct move_extent { + __u32 reserved; /* should be zero */ + __u32 donor_fd; /* donor file descriptor */ + __u64 orig_start; /* logical start offset in block for orig */ + __u64 donor_start; /* logical start offset in block for donor */ + __u64 len; /* block length to be moved */ + __u64 moved_len; /* moved block length */ +}; + +/* + * Flags used by EXT4_IOC_SHUTDOWN + */ +#define EXT4_GOING_FLAGS_DEFAULT 0x0 /* going down */ +#define EXT4_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ +#define EXT4_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ + +/* Used to pass group descriptor data when online resize is done */ +struct ext4_new_group_input { + __u32 group; /* Group number for this data */ + __u64 block_bitmap; /* Absolute block number of block bitmap */ + __u64 inode_bitmap; /* Absolute block number of inode bitmap */ + __u64 inode_table; /* Absolute block number of inode table start */ + __u32 blocks_count; /* Total number of blocks in this group */ + __u16 reserved_blocks; /* Number of reserved blocks in this group */ + __u16 unused; +}; + +/* + * Returned by EXT4_IOC_GET_ES_CACHE as an additional possible flag. + * It indicates that the entry in extent status cache is for a hole. + */ +#define EXT4_FIEMAP_EXTENT_HOLE 0x08000000 + +#endif /* _UAPI_LINUX_EXT4_H */ -- cgit v1.2.3 From af8ececda185078c096852edb4e1d7a2349e6856 Mon Sep 17 00:00:00 2001 From: Viktor Prutyanov Date: Thu, 13 Apr 2023 11:18:54 +0300 Subject: virtio: add VIRTIO_F_NOTIFICATION_DATA feature support According to VirtIO spec v1.2, VIRTIO_F_NOTIFICATION_DATA feature indicates that the driver passes extra data along with the queue notifications. In a split queue case, the extra data is 16-bit available index. In a packed queue case, the extra data is 1-bit wrap counter and 15-bit available index. Add support for this feature for MMIO, channel I/O and modern PCI transports. Signed-off-by: Viktor Prutyanov Acked-by: Jason Wang Reviewed-by: Xuan Zhuo Message-Id: <20230413081855.36643-2-alvaro.karsz@solid-run.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_config.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index 3c05162bc988..2c712c654165 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -99,6 +99,12 @@ */ #define VIRTIO_F_SR_IOV 37 +/* + * This feature indicates that the driver passes extra data (besides + * identifying the virtqueue) in its device notifications. + */ +#define VIRTIO_F_NOTIFICATION_DATA 38 + /* * This feature indicates that the driver can reset a queue individually. */ -- cgit v1.2.3 From 83f6d600796c65ab34b08dbddb5795099dfda4d1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 19 Apr 2023 18:34:58 +0300 Subject: bridge: vlan: Allow setting VLAN neighbor suppression state Add a new VLAN attribute that allows user space to set the neighbor suppression state of the port VLAN. Example: # bridge -d -j -p vlan show dev swp1 vid 10 | jq '.[]["vlans"][]["neigh_suppress"]' false # bridge vlan set vid 10 dev swp1 neigh_suppress on # bridge -d -j -p vlan show dev swp1 vid 10 | jq '.[]["vlans"][]["neigh_suppress"]' true # bridge vlan set vid 10 dev swp1 neigh_suppress off # bridge -d -j -p vlan show dev swp1 vid 10 | jq '.[]["vlans"][]["neigh_suppress"]' false # bridge vlan set vid 10 dev br0 neigh_suppress on Error: bridge: Can't set neigh_suppress for non-port vlans. Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index c9d624f528c5..f95326fce6bb 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -525,6 +525,7 @@ enum { BRIDGE_VLANDB_ENTRY_MCAST_ROUTER, BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS, BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS, + BRIDGE_VLANDB_ENTRY_NEIGH_SUPPRESS, __BRIDGE_VLANDB_ENTRY_MAX, }; #define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1) -- cgit v1.2.3 From 160656d7201d861a1f2a0bf279a765e8cda2317a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 19 Apr 2023 18:34:59 +0300 Subject: bridge: Allow setting per-{Port, VLAN} neighbor suppression state Add a new bridge port attribute that allows user space to enable per-{Port, VLAN} neighbor suppression. Example: # bridge -d -j -p link show dev swp1 | jq '.[]["neigh_vlan_suppress"]' false # bridge link set dev swp1 neigh_vlan_suppress on # bridge -d -j -p link show dev swp1 | jq '.[]["neigh_vlan_suppress"]' true # bridge link set dev swp1 neigh_vlan_suppress off # bridge -d -j -p link show dev swp1 | jq '.[]["neigh_vlan_suppress"]' false Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8d679688efe0..4ac1000b0ef2 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -569,6 +569,7 @@ enum { IFLA_BRPORT_MAB, IFLA_BRPORT_MCAST_N_GROUPS, IFLA_BRPORT_MCAST_MAX_GROUPS, + IFLA_BRPORT_NEIGH_VLAN_SUPPRESS, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) -- cgit v1.2.3 From dfc39d4026fb2432363c0f77543c4cf3adca4c7b Mon Sep 17 00:00:00 2001 From: Jianfeng Tan Date: Wed, 19 Apr 2023 15:24:16 +0800 Subject: net/packet: support mergeable feature of virtio Packet sockets, like tap, can be used as the backend for kernel vhost. In packet sockets, virtio net header size is currently hardcoded to be the size of struct virtio_net_hdr, which is 10 bytes; however, it is not always the case: some virtio features, such as mrg_rxbuf, need virtio net header to be 12-byte long. Mergeable buffers, as a virtio feature, is worthy of supporting: packets that are larger than one-mbuf size will be dropped in vhost worker's handle_rx if mrg_rxbuf feature is not used, but large packets cannot be avoided and increasing mbuf's size is not economical. With this virtio feature enabled by virtio-user, packet sockets with hardcoded 10-byte virtio net header will parse mac head incorrectly in packet_snd by taking the last two bytes of virtio net header as part of mac header. This incorrect mac header parsing will cause packet to be dropped due to invalid ether head checking in later under-layer device packet receiving. By adding extra field vnet_hdr_sz with utilizing holes in struct packet_sock to record currently used virtio net header size and supporting extra sockopt PACKET_VNET_HDR_SZ to set specified vnet_hdr_sz, packet sockets can know the exact length of virtio net header that virtio user gives. In packet_snd, tpacket_snd and packet_recvmsg, instead of using hardcoded virtio net header size, it can get the exact vnet_hdr_sz from corresponding packet_sock, and parse mac header correctly based on this information to avoid the packets being mistakenly dropped. Signed-off-by: Jianfeng Tan Co-developed-by: Anqi Shen Signed-off-by: Anqi Shen Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 78c981d6a9d4..9efc42382fdb 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -59,6 +59,7 @@ struct sockaddr_ll { #define PACKET_ROLLOVER_STATS 21 #define PACKET_FANOUT_DATA 22 #define PACKET_IGNORE_OUTGOING 23 +#define PACKET_VNET_HDR_SZ 24 #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 -- cgit v1.2.3 From 84601d6ee68ae820dec97450934797046d62db4b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 21 Apr 2023 19:02:54 +0200 Subject: bpf: add bpf_link support for BPF_NETFILTER programs Add bpf_link support skeleton. To keep this reviewable, no bpf program can be invoked yet, if a program is attached only a c-stub is called and not the actual bpf program. Defaults to 'y' if both netfilter and bpf syscall are enabled in kconfig. Uapi example usage: union bpf_attr attr = { }; attr.link_create.prog_fd = progfd; attr.link_create.attach_type = 0; /* unused */ attr.link_create.netfilter.pf = PF_INET; attr.link_create.netfilter.hooknum = NF_INET_LOCAL_IN; attr.link_create.netfilter.priority = -128; err = bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); ... this would attach progfd to ipv4:input hook. Such hook gets removed automatically if the calling program exits. BPF_NETFILTER program invocation is added in followup change. NF_HOOK_OP_BPF enum will eventually be read from nfnetlink_hook, it allows to tell userspace which program is attached at the given hook when user runs 'nft hook list' command rather than just the priority and not-very-helpful 'this hook runs a bpf prog but I can't tell which one'. Will also be used to disallow registration of two bpf programs with same priority in a followup patch. v4: arm32 cmpxchg only supports 32bit operand s/prio/priority/ v3: restrict prog attachment to ip/ip6 for now, lets lift restrictions if more use cases pop up (arptables, ebtables, netdev ingress/egress etc). Signed-off-by: Florian Westphal Link: https://lore.kernel.org/r/20230421170300.24115-2-fw@strlen.de Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4b20a7269bee..1bb11a6ee667 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -986,6 +986,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ + BPF_PROG_TYPE_NETFILTER, }; enum bpf_attach_type { @@ -1050,6 +1051,7 @@ enum bpf_link_type { BPF_LINK_TYPE_PERF_EVENT = 7, BPF_LINK_TYPE_KPROBE_MULTI = 8, BPF_LINK_TYPE_STRUCT_OPS = 9, + BPF_LINK_TYPE_NETFILTER = 10, MAX_BPF_LINK_TYPE, }; @@ -1560,6 +1562,12 @@ union bpf_attr { */ __u64 cookie; } tracing; + struct { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + } netfilter; }; } link_create; @@ -6410,6 +6418,12 @@ struct bpf_link_info { struct { __u32 map_id; } struct_ops; + struct { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + } netfilter; }; } __attribute__((aligned(8))); -- cgit v1.2.3 From 506a74db7e019a277e987fa65654bdd953859d5b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 21 Apr 2023 19:02:56 +0200 Subject: netfilter: nfnetlink hook: dump bpf prog id This allows userspace ("nft list hooks") to show which bpf program is attached to which hook. Without this, user only knows bpf prog is attached at prio x, y, z at INPUT and FORWARD, but can't tell which program is where. v4: kdoc fixups (Simon Horman) Link: https://lore.kernel.org/bpf/ZEELzpNCnYJuZyod@corigine.com/ Signed-off-by: Florian Westphal Link: https://lore.kernel.org/r/20230421170300.24115-4-fw@strlen.de Signed-off-by: Alexei Starovoitov --- include/uapi/linux/netfilter/nfnetlink_hook.h | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_hook.h b/include/uapi/linux/netfilter/nfnetlink_hook.h index bbcd285b22e1..84a561a74b98 100644 --- a/include/uapi/linux/netfilter/nfnetlink_hook.h +++ b/include/uapi/linux/netfilter/nfnetlink_hook.h @@ -32,8 +32,12 @@ enum nfnl_hook_attributes { /** * enum nfnl_hook_chain_info_attributes - chain description * - * NFNLA_HOOK_INFO_DESC: nft chain and table name (enum nft_table_attributes) (NLA_NESTED) - * NFNLA_HOOK_INFO_TYPE: chain type (enum nfnl_hook_chaintype) (NLA_U32) + * @NFNLA_HOOK_INFO_DESC: nft chain and table name (NLA_NESTED) + * @NFNLA_HOOK_INFO_TYPE: chain type (enum nfnl_hook_chaintype) (NLA_U32) + * + * NFNLA_HOOK_INFO_DESC depends on NFNLA_HOOK_INFO_TYPE value: + * NFNL_HOOK_TYPE_NFTABLES: enum nft_table_attributes + * NFNL_HOOK_TYPE_BPF: enum nfnl_hook_bpf_attributes */ enum nfnl_hook_chain_info_attributes { NFNLA_HOOK_INFO_UNSPEC, @@ -55,10 +59,24 @@ enum nfnl_hook_chain_desc_attributes { /** * enum nfnl_hook_chaintype - chain type * - * @NFNL_HOOK_TYPE_NFTABLES nf_tables base chain + * @NFNL_HOOK_TYPE_NFTABLES: nf_tables base chain + * @NFNL_HOOK_TYPE_BPF: bpf program */ enum nfnl_hook_chaintype { NFNL_HOOK_TYPE_NFTABLES = 0x1, + NFNL_HOOK_TYPE_BPF, +}; + +/** + * enum nfnl_hook_bpf_attributes - bpf prog description + * + * @NFNLA_HOOK_BPF_ID: bpf program id (NLA_U32) + */ +enum nfnl_hook_bpf_attributes { + NFNLA_HOOK_BPF_UNSPEC, + NFNLA_HOOK_BPF_ID, + __NFNLA_HOOK_BPF_MAX, }; +#define NFNLA_HOOK_BPF_MAX (__NFNLA_HOOK_BPF_MAX - 1) #endif /* _NFNL_HOOK_H */ -- cgit v1.2.3 From d7597f59d1d33e9efbffa7060deb9ee5bd119e62 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Mon, 17 Apr 2023 22:13:40 -0700 Subject: mm: add new api to enable ksm per process Patch series "mm: process/cgroup ksm support", v9. So far KSM can only be enabled by calling madvise for memory regions. To be able to use KSM for more workloads, KSM needs to have the ability to be enabled / disabled at the process / cgroup level. Use case 1: The madvise call is not available in the programming language. An example for this are programs with forked workloads using a garbage collected language without pointers. In such a language madvise cannot be made available. In addition the addresses of objects get moved around as they are garbage collected. KSM sharing needs to be enabled "from the outside" for these type of workloads. Use case 2: The same interpreter can also be used for workloads where KSM brings no benefit or even has overhead. We'd like to be able to enable KSM on a workload by workload basis. Use case 3: With the madvise call sharing opportunities are only enabled for the current process: it is a workload-local decision. A considerable number of sharing opportunities may exist across multiple workloads or jobs (if they are part of the same security domain). Only a higler level entity like a job scheduler or container can know for certain if its running one or more instances of a job. That job scheduler however doesn't have the necessary internal workload knowledge to make targeted madvise calls. Security concerns: In previous discussions security concerns have been brought up. The problem is that an individual workload does not have the knowledge about what else is running on a machine. Therefore it has to be very conservative in what memory areas can be shared or not. However, if the system is dedicated to running multiple jobs within the same security domain, its the job scheduler that has the knowledge that sharing can be safely enabled and is even desirable. Performance: Experiments with using UKSM have shown a capacity increase of around 20%. Here are the metrics from an instagram workload (taken from a machine with 64GB main memory): full_scans: 445 general_profit: 20158298048 max_page_sharing: 256 merge_across_nodes: 1 pages_shared: 129547 pages_sharing: 5119146 pages_to_scan: 4000 pages_unshared: 1760924 pages_volatile: 10761341 run: 1 sleep_millisecs: 20 stable_node_chains: 167 stable_node_chains_prune_millisecs: 2000 stable_node_dups: 2751 use_zero_pages: 0 zero_pages_sharing: 0 After the service is running for 30 minutes to an hour, 4 to 5 million shared pages are common for this workload when using KSM. Detailed changes: 1. New options for prctl system command This patch series adds two new options to the prctl system call. The first one allows to enable KSM at the process level and the second one to query the setting. The setting will be inherited by child processes. With the above setting, KSM can be enabled for the seed process of a cgroup and all processes in the cgroup will inherit the setting. 2. Changes to KSM processing When KSM is enabled at the process level, the KSM code will iterate over all the VMA's and enable KSM for the eligible VMA's. When forking a process that has KSM enabled, the setting will be inherited by the new child process. 3. Add general_profit metric The general_profit metric of KSM is specified in the documentation, but not calculated. This adds the general profit metric to /sys/kernel/debug/mm/ksm. 4. Add more metrics to ksm_stat This adds the process profit metric to /proc//ksm_stat. 5. Add more tests to ksm_tests and ksm_functional_tests This adds an option to specify the merge type to the ksm_tests. This allows to test madvise and prctl KSM. It also adds a two new tests to ksm_functional_tests: one to test the new prctl options and the other one is a fork test to verify that the KSM process setting is inherited by client processes. This patch (of 3): So far KSM can only be enabled by calling madvise for memory regions. To be able to use KSM for more workloads, KSM needs to have the ability to be enabled / disabled at the process / cgroup level. 1. New options for prctl system command This patch series adds two new options to the prctl system call. The first one allows to enable KSM at the process level and the second one to query the setting. The setting will be inherited by child processes. With the above setting, KSM can be enabled for the seed process of a cgroup and all processes in the cgroup will inherit the setting. 2. Changes to KSM processing When KSM is enabled at the process level, the KSM code will iterate over all the VMA's and enable KSM for the eligible VMA's. When forking a process that has KSM enabled, the setting will be inherited by the new child process. 1) Introduce new MMF_VM_MERGE_ANY flag This introduces the new flag MMF_VM_MERGE_ANY flag. When this flag is set, kernel samepage merging (ksm) gets enabled for all vma's of a process. 2) Setting VM_MERGEABLE on VMA creation When a VMA is created, if the MMF_VM_MERGE_ANY flag is set, the VM_MERGEABLE flag will be set for this VMA. 3) support disabling of ksm for a process This adds the ability to disable ksm for a process if ksm has been enabled for the process with prctl. 4) add new prctl option to get and set ksm for a process This adds two new options to the prctl system call - enable ksm for all vmas of a process (if the vmas support it). - query if ksm has been enabled for a process. 3. Disabling MMF_VM_MERGE_ANY for storage keys in s390 In the s390 architecture when storage keys are used, the MMF_VM_MERGE_ANY will be disabled. Link: https://lkml.kernel.org/r/20230418051342.1919757-1-shr@devkernel.io Link: https://lkml.kernel.org/r/20230418051342.1919757-2-shr@devkernel.io Signed-off-by: Stefan Roesch Acked-by: David Hildenbrand Cc: David Hildenbrand Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: Bagas Sanjaya Signed-off-by: Andrew Morton --- include/uapi/linux/prctl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index b99c0be72577..f23d9a16507f 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -292,4 +292,6 @@ struct prctl_mm_map { #define PR_GET_AUXV 0x41555856 +#define PR_SET_MEMORY_MERGE 67 +#define PR_GET_MEMORY_MERGE 68 #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 3db166d6cf0ea73dd2c887036aad2e95e0884d9b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 18 Apr 2023 10:39:01 -0700 Subject: cxl/mbox: Deprecate poison commands The CXL subsystem is adding formal mechanisms for managing device poison. Minimize the maintenance burden going forward, and maximize the investment in common tooling by deprecating direct user access to poison commands outside of CXL_MEM_RAW_COMMANDS debug scenarios. A new cxl_deprecated_commands[] list is created for querying which command ids defined in previous kernels are now deprecated. CXL Media and Poison Management commands, opcodes 0x43XX, defined in CXL 3.0 Spec, Table 8-93 are deprecated with one exception: Get Scan Media Capabilities. Keep Get Scan Media Capabilities as it simply provides information and has no impact on the device state. Effectively all of the commands defined in: commit 87815ee9d006 ("cxl/pci: Add media provisioning required commands") ...were defined prematurely and should have waited until the kernel implementation was decided. To my knowledge there are no shipping devices with poison support and no known tools that would regress with this change. Co-developed-by: Alison Schofield Signed-off-by: Alison Schofield Link: https://lore.kernel.org/r/652197e9bc8885e6448d989405b9e50ee9d6b0a6.1681838291.git.alison.schofield@intel.com Tested-by: Jonathan Cameron Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 86bbacf2a315..14bc6e742148 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -40,19 +40,22 @@ ___C(SET_ALERT_CONFIG, "Set Alert Configuration"), \ ___C(GET_SHUTDOWN_STATE, "Get Shutdown State"), \ ___C(SET_SHUTDOWN_STATE, "Set Shutdown State"), \ - ___C(GET_POISON, "Get Poison List"), \ - ___C(INJECT_POISON, "Inject Poison"), \ - ___C(CLEAR_POISON, "Clear Poison"), \ + ___DEPRECATED(GET_POISON, "Get Poison List"), \ + ___DEPRECATED(INJECT_POISON, "Inject Poison"), \ + ___DEPRECATED(CLEAR_POISON, "Clear Poison"), \ ___C(GET_SCAN_MEDIA_CAPS, "Get Scan Media Capabilities"), \ - ___C(SCAN_MEDIA, "Scan Media"), \ - ___C(GET_SCAN_MEDIA, "Get Scan Media Results"), \ + ___DEPRECATED(SCAN_MEDIA, "Scan Media"), \ + ___DEPRECATED(GET_SCAN_MEDIA, "Get Scan Media Results"), \ ___C(MAX, "invalid / last command") #define ___C(a, b) CXL_MEM_COMMAND_ID_##a +#define ___DEPRECATED(a, b) CXL_MEM_DEPRECATED_ID_##a enum { CXL_CMDS }; #undef ___C +#undef ___DEPRECATED #define ___C(a, b) { b } +#define ___DEPRECATED(a, b) { "Deprecated " b } static const struct { const char *name; } cxl_command_names[] __attribute__((__unused__)) = { CXL_CMDS }; @@ -68,6 +71,28 @@ static const struct { */ #undef ___C +#undef ___DEPRECATED +#define ___C(a, b) (0) +#define ___DEPRECATED(a, b) (1) + +static const __u8 cxl_deprecated_commands[] + __attribute__((__unused__)) = { CXL_CMDS }; + +/* + * Here's how this actually breaks out: + * cxl_deprecated_commands[] = { + * [CXL_MEM_COMMAND_ID_INVALID] = 0, + * [CXL_MEM_COMMAND_ID_IDENTIFY] = 0, + * ... + * [CXL_MEM_DEPRECATED_ID_GET_POISON] = 1, + * [CXL_MEM_DEPRECATED_ID_INJECT_POISON] = 1, + * [CXL_MEM_DEPRECATED_ID_CLEAR_POISON] = 1, + * ... + * }; + */ + +#undef ___C +#undef ___DEPRECATED /** * struct cxl_command_info - Command information returned from a query. -- cgit v1.2.3 From 9280c577431401544e63dfb489a830a42bee25eb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 20 Apr 2023 13:56:31 -0400 Subject: NFSD: Handle new xprtsec= export option Enable administrators to require clients to use transport layer security when accessing particular exports. Signed-off-by: Chuck Lever --- include/uapi/linux/nfsd/export.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h index 2124ba904779..a73ca3703abb 100644 --- a/include/uapi/linux/nfsd/export.h +++ b/include/uapi/linux/nfsd/export.h @@ -62,5 +62,18 @@ | NFSEXP_ALLSQUASH \ | NFSEXP_INSECURE_PORT) +/* + * Transport layer security policies that are permitted to access + * an export + */ +#define NFSEXP_XPRTSEC_NONE 0x0001 +#define NFSEXP_XPRTSEC_TLS 0x0002 +#define NFSEXP_XPRTSEC_MTLS 0x0004 + +#define NFSEXP_XPRTSEC_NUM (3) + +#define NFSEXP_XPRTSEC_ALL (NFSEXP_XPRTSEC_NONE | \ + NFSEXP_XPRTSEC_TLS | \ + NFSEXP_XPRTSEC_MTLS) #endif /* _UAPINFSD_EXPORT_H */ -- cgit v1.2.3 From daf376a366fd2d469d66ab83dfdc074777462bab Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 10 Apr 2023 13:06:08 -0500 Subject: uapi nbd: improve doc links to userspace spec The uapi header intentionally documents only the NBD server features that the kernel module will utilize as a client. But while it already had one mention of skipped bits due to userspace extensions, it did not actually direct the reader to the canonical source to learn about those extensions. While touching comments, fix an outdated reference that listed only READ and WRITE as commands. Signed-off-by: Eric Blake Reviewed-by: Ming Lei Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20230410180611.1051618-2-eblake@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/nbd.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index 20d6cc91435d..8797387caaf7 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -11,6 +11,8 @@ * Cleanup PARANOIA usage & code. * 2004/02/19 Paul Clements * Removed PARANOIA, plus various cleanup and comments + * 2023 Copyright Red Hat + * Link to userspace extensions. */ #ifndef _UAPILINUX_NBD_H @@ -30,12 +32,18 @@ #define NBD_SET_TIMEOUT _IO( 0xab, 9 ) #define NBD_SET_FLAGS _IO( 0xab, 10) +/* + * See also https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md + * for additional userspace extensions not yet utilized in the kernel module. + */ + enum { NBD_CMD_READ = 0, NBD_CMD_WRITE = 1, NBD_CMD_DISC = 2, NBD_CMD_FLUSH = 3, NBD_CMD_TRIM = 4 + /* userspace defines additional extension commands */ }; /* values for flags field, these are server interaction specific. */ @@ -64,14 +72,15 @@ enum { #define NBD_REQUEST_MAGIC 0x25609513 #define NBD_REPLY_MAGIC 0x67446698 /* Do *not* use magics: 0x12560953 0x96744668. */ +/* magic 0x668e33ef for structured reply not supported by kernel yet */ /* * This is the packet used for communication between client and * server. All data are in network byte order. */ struct nbd_request { - __be32 magic; - __be32 type; /* == READ || == WRITE */ + __be32 magic; /* NBD_REQUEST_MAGIC */ + __be32 type; /* See NBD_CMD_* */ char handle[8]; __be64 from; __be32 len; @@ -82,7 +91,7 @@ struct nbd_request { * it has completed an I/O request (or an error occurs). */ struct nbd_reply { - __be32 magic; + __be32 magic; /* NBD_REPLY_MAGIC */ __be32 error; /* 0 = ok, else error */ char handle[8]; /* handle you got from request */ }; -- cgit v1.2.3 From 2686eb845da7762ee98b17e578b0c081aafb77b9 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 10 Apr 2023 13:06:09 -0500 Subject: uapi nbd: add cookie alias to handle The uapi header declares a 'char handle[8]' per request; which is overloaded in English (are you referring to "handle" the verb, such as handling a signal or writing a callback handler, or "handle" the noun, the value used in a lookup table to correlate a response back to the request). Many user-space NBD implementations (both servers and clients) have instead used 'uint64_t cookie' or similar, as it is easier to directly assign an integer than to futz around with memcpy. In fact, upstream documentation is now encouraging this shift in terminology: https://github.com/NetworkBlockDevice/nbd/commit/ca4392eb2b Accomplish this by use of an anonymous union to provide the alias for anyone getting the definition from the uapi; this does not break existing clients, while exposing the nicer name for those who prefer it. Note that block/nbd.c still uses the term handle (in fact, it actually combines a 32-bit cookie and a 32-bit tag into the 64-bit handle), but that internal usage is not changed by the public uapi, since no compliant NBD server has any reason to inspect or alter the 64 bits sent over the socket. Signed-off-by: Eric Blake Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20230410180611.1051618-3-eblake@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/nbd.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index 8797387caaf7..80ce0ef43afd 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -12,7 +12,7 @@ * 2004/02/19 Paul Clements * Removed PARANOIA, plus various cleanup and comments * 2023 Copyright Red Hat - * Link to userspace extensions. + * Link to userspace extensions, favor cookie over handle. */ #ifndef _UAPILINUX_NBD_H @@ -81,7 +81,10 @@ enum { struct nbd_request { __be32 magic; /* NBD_REQUEST_MAGIC */ __be32 type; /* See NBD_CMD_* */ - char handle[8]; + union { + __be64 cookie; /* Opaque identifier for request */ + char handle[8]; /* older spelling of cookie */ + }; __be64 from; __be32 len; } __attribute__((packed)); @@ -93,6 +96,9 @@ struct nbd_request { struct nbd_reply { __be32 magic; /* NBD_REPLY_MAGIC */ __be32 error; /* 0 = ok, else error */ - char handle[8]; /* handle you got from request */ + union { + __be64 cookie; /* Opaque identifier from request */ + char handle[8]; /* older spelling of cookie */ + }; }; #endif /* _UAPILINUX_NBD_H */ -- cgit v1.2.3