From 51c1064e82e77b39a49889287ca50709303e2f26 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 22 Nov 2019 15:19:21 +0100 Subject: gpiolib: add new ioctl() for monitoring changes in line info Currently there is no way for user-space to be informed about changes in status of GPIO lines e.g. when someone else requests the line or its config changes. We can only periodically re-read the line-info. This is fine for simple one-off user-space tools, but any daemon that provides a centralized access to GPIO chips would benefit hugely from an event driven line info synchronization. This patch adds a new ioctl() that allows user-space processes to reuse the file descriptor associated with the character device for watching any changes in line properties. Every such event contains the updated line information. Currently the events are generated on three types of status changes: when a line is requested, when it's released and when its config is changed. The first two are self-explanatory. For the third one: this will only happen when another user-space process calls the new SET_CONFIG ioctl() as any changes that can happen from within the kernel (i.e. set_transitory() or set_debounce()) are of no interest to user-space. Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij --- include/uapi/linux/gpio.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index 799cf823d493..dca320764e4d 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -59,6 +59,34 @@ struct gpioline_info { /* Maximum number of requested handles */ #define GPIOHANDLES_MAX 64 +/* Possible line status change events */ +enum { + GPIOLINE_CHANGED_REQUESTED = 1, + GPIOLINE_CHANGED_RELEASED, + GPIOLINE_CHANGED_CONFIG, +}; + +/** + * struct gpioline_info_changed - Information about a change in status + * of a GPIO line + * @info: updated line information + * @timestamp: estimate of time of status change occurrence, in nanoseconds + * and GPIOLINE_CHANGED_CONFIG + * @event_type: one of GPIOLINE_CHANGED_REQUESTED, GPIOLINE_CHANGED_RELEASED + * + * Note: struct gpioline_info embedded here has 32-bit alignment on its own, + * but it works fine with 64-bit alignment too. With its 72 byte size, we can + * guarantee there are no implicit holes between it and subsequent members. + * The 20-byte padding at the end makes sure we don't add any implicit padding + * at the end of the structure on 64-bit architectures. + */ +struct gpioline_info_changed { + struct gpioline_info info; + __u64 timestamp; + __u32 event_type; + __u32 padding[5]; /* for future use */ +}; + /* Linerequest flags */ #define GPIOHANDLE_REQUEST_INPUT (1UL << 0) #define GPIOHANDLE_REQUEST_OUTPUT (1UL << 1) @@ -176,6 +204,8 @@ struct gpioevent_data { #define GPIO_GET_CHIPINFO_IOCTL _IOR(0xB4, 0x01, struct gpiochip_info) #define GPIO_GET_LINEINFO_IOCTL _IOWR(0xB4, 0x02, struct gpioline_info) +#define GPIO_GET_LINEINFO_WATCH_IOCTL _IOWR(0xB4, 0x0b, struct gpioline_info) +#define GPIO_GET_LINEINFO_UNWATCH_IOCTL _IOWR(0xB4, 0x0c, __u32) #define GPIO_GET_LINEHANDLE_IOCTL _IOWR(0xB4, 0x03, struct gpiohandle_request) #define GPIO_GET_LINEEVENT_IOCTL _IOWR(0xB4, 0x04, struct gpioevent_request) -- cgit v1.2.3 From ef2c41cf38a7559bbf91af42d5b6a4429db8fc68 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 5 Feb 2020 14:26:22 +0100 Subject: clone3: allow spawning processes into cgroups This adds support for creating a process in a different cgroup than its parent. Callers can limit and account processes and threads right from the moment they are spawned: - A service manager can directly spawn new services into dedicated cgroups. - A process can be directly created in a frozen cgroup and will be frozen as well. - The initial accounting jitter experienced by process supervisors and daemons is eliminated with this. - Threaded applications or even thread implementations can choose to create a specific cgroup layout where each thread is spawned directly into a dedicated cgroup. This feature is limited to the unified hierarchy. Callers need to pass a directory file descriptor for the target cgroup. The caller can choose to pass an O_PATH file descriptor. All usual migration restrictions apply, i.e. there can be no processes in inner nodes. In general, creating a process directly in a target cgroup adheres to all migration restrictions. One of the biggest advantages of this feature is that CLONE_INTO_GROUP does not need to grab the write side of the cgroup cgroup_threadgroup_rwsem. This global lock makes moving tasks/threads around super expensive. With clone3() this lock is avoided. Cc: Tejun Heo Cc: Ingo Molnar Cc: Oleg Nesterov Cc: Johannes Weiner Cc: Li Zefan Cc: Peter Zijlstra Cc: cgroups@vger.kernel.org Signed-off-by: Christian Brauner Signed-off-by: Tejun Heo --- include/uapi/linux/sched.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index 2e3bc22c6f20..3bac0a8ceab2 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -35,6 +35,7 @@ /* Flags for the clone3() syscall. */ #define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */ +#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */ /* * cloning flags intersect with CSIGNAL so can be used with unshare and clone3 @@ -81,6 +82,8 @@ * @set_tid_size: This defines the size of the array referenced * in @set_tid. This cannot be larger than the * kernel's limit of nested PID namespaces. + * @cgroup: If CLONE_INTO_CGROUP is specified set this to + * a file descriptor for the cgroup. * * The structure is versioned by size and thus extensible. * New struct members must go at the end of the struct and @@ -97,11 +100,13 @@ struct clone_args { __aligned_u64 tls; __aligned_u64 set_tid; __aligned_u64 set_tid_size; + __aligned_u64 cgroup; }; #endif #define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ #define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ +#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ /* * Scheduling policies -- cgit v1.2.3 From 202853595e53f981c86656c49fc1cc1e3620f558 Mon Sep 17 00:00:00 2001 From: Alexandru Gagniuc Date: Fri, 25 Oct 2019 15:00:45 -0400 Subject: PCI: pciehp: Disable in-band presence detect when possible The presence detect state (PDS) is normally a logical OR of in-band and out-of-band (OOB) presence detect. As of PCIe 4.0, there is the option to disable in-band presence so that the PDS bit always reflects the state of the out-of-band presence. The recommendation of the PCIe spec is to disable in-band presence whenever supported (PCIe r5.0, appendix I implementation note): Due to architectural issues, the in-band (Physical-Layer-based) portion of the PD mechanism is deprecated for use with async hot-plug. One issue is that in-band PD as architected does not detect adapter removal during certain LTSSM states, notably the L1 and Disabled States. Another issue is that when both in-band and OOB PD are being used together, the Presence Detect State bit and its associated interrupt mechanism always reflect the logical OR of the inband and OOB PD states, and with some hot-plug hardware configurations, it is important for software to detect and respond to in-band and OOB PD events independently. If OOB PD is being used and the associated DSP supports In-Band PD Disable, it is recommended that the In-Band PD Disable bit be Set, and the Presence Detect State bit and its associated interrupt mechanism be used exclusively for OOB PD. As a substitute for in-band PD with async hot-plug, the reference model uses either the DPC or the DLL Link Active mechanism. Link: https://lore.kernel.org/r/20191025190047.38130-2-stuart.w.hayes@gmail.com [bhelgaas: move PCI_EXP_SLTCAP2 read earlier & print PCI_EXP_SLTCAP2_IBPD value (suggested by Lukas)] Signed-off-by: Alexandru Gagniuc Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Reviewed-by: Lukas Wunner --- include/uapi/linux/pci_regs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 5437690483cd..f9701410d3b5 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -605,6 +605,7 @@ #define PCI_EXP_SLTCTL_PWR_OFF 0x0400 /* Power Off */ #define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ #define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ +#define PCI_EXP_SLTCTL_IBPD_DISABLE 0x4000 /* In-band PD disable */ #define PCI_EXP_SLTSTA 26 /* Slot Status */ #define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ #define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */ @@ -680,6 +681,7 @@ #define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ #define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */ +#define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ #define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ #define PCI_EXP_SLTSTA2 58 /* Slot Status 2 */ -- cgit v1.2.3 From 7c4a4d088283e02cc04252e88dd08b5cdf54e70f Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 10 Mar 2020 15:18:02 -0700 Subject: dmaengine: idxd: Merge definition of dsa_batch_desc into dsa_hw_desc We don't need a special structure just for batch descriptors. The layout matches the general form for other descriptors. Merge the desc_list_addr field into the union of other aliases for the the third quadword in the structure. Create a union to alias "xfer_size" with "desc_count". Signed-off-by: Tony Luck Acked-by: Dave Jiang Link: https://lore.kernel.org/r/158387868208.35922.5895104426944263789.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 849ef1515d04..1f412fbf561b 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -83,21 +83,6 @@ enum dsa_completion_status { #define DSA_COMP_STATUS_MASK 0x7f #define DSA_COMP_STATUS_WRITE 0x80 -struct dsa_batch_desc { - uint32_t pasid:20; - uint32_t rsvd:11; - uint32_t priv:1; - uint32_t flags:24; - uint32_t opcode:8; - uint64_t completion_addr; - uint64_t desc_list_addr; - uint64_t rsvd1; - uint32_t desc_count; - uint16_t interrupt_handle; - uint16_t rsvd2; - uint8_t rsvd3[24]; -} __attribute__((packed)); - struct dsa_hw_desc { uint32_t pasid:20; uint32_t rsvd:11; @@ -109,6 +94,7 @@ struct dsa_hw_desc { uint64_t src_addr; uint64_t rdback_addr; uint64_t pattern; + uint64_t desc_list_addr; }; union { uint64_t dst_addr; @@ -116,7 +102,10 @@ struct dsa_hw_desc { uint64_t src2_addr; uint64_t comp_pattern; }; - uint32_t xfer_size; + union { + uint32_t xfer_size; + uint32_t desc_count; + }; uint16_t int_handle; uint16_t rsvd1; union { -- cgit v1.2.3 From 22b436c9b5682e877d34425d05576db74a8647e1 Mon Sep 17 00:00:00 2001 From: Yuri Benditovich Date: Mon, 2 Mar 2020 13:50:01 +0200 Subject: virtio-net: Introduce extended RSC feature VIRTIO_NET_F_RSC_EXT feature bit indicates that the device is able to provide extended RSC information. When the feature is negotiatede and 'gso_type' field in received packet is not GSO_NONE, the device reports number of coalesced packets in 'csum_start' field and number of duplicated acks in 'csum_offset' field and sets VIRTIO_NET_HDR_F_RSC_INFO in 'flags' field. Signed-off-by: Yuri Benditovich Link: https://lore.kernel.org/r/20200302115003.14877-2-yuri.benditovich@daynix.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_net.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index a3715a3224c1..6466c5979a93 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -57,6 +57,7 @@ * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ +#define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ #define VIRTIO_NET_F_STANDBY 62 /* Act as standby for another device * with the same MAC. */ @@ -104,6 +105,7 @@ struct virtio_net_config { struct virtio_net_hdr_v1 { #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ +#define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc info in csum_ fields */ __u8 flags; #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ @@ -113,8 +115,26 @@ struct virtio_net_hdr_v1 { __u8 gso_type; __virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ __virtio16 gso_size; /* Bytes to append to hdr_len per frame */ - __virtio16 csum_start; /* Position to start checksumming from */ - __virtio16 csum_offset; /* Offset after that to place checksum */ + union { + struct { + __virtio16 csum_start; + __virtio16 csum_offset; + }; + /* Checksum calculation */ + struct { + /* Position to start checksumming from */ + __virtio16 start; + /* Offset after that to place checksum */ + __virtio16 offset; + } csum; + /* Receive Segment Coalescing */ + struct { + /* Number of coalesced segments */ + __le16 segments; + /* Number of duplicated acks */ + __le16 dup_acks; + } rsc; + }; __virtio16 num_buffers; /* Number of merged rx buffers */ }; -- cgit v1.2.3 From fd58bf674564f1731ca8a61a5150d40383f3df60 Mon Sep 17 00:00:00 2001 From: Yuri Benditovich Date: Mon, 2 Mar 2020 13:50:02 +0200 Subject: virtio-net: Introduce RSS receive steering feature RSS (Receive-side scaling) defines hash calculation rules and decision on receive virtqueue according to the calculated hash, provided mask to apply and provided indirection table containing indices of receive virqueues. The driver sends the control command to enable multiqueue and provide parameters for receive steering. Signed-off-by: Yuri Benditovich Link: https://lore.kernel.org/r/20200302115003.14877-3-yuri.benditovich@daynix.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_net.h | 42 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 6466c5979a93..aec6fac3666a 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -57,6 +57,7 @@ * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ +#define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ #define VIRTIO_NET_F_STANDBY 62 /* Act as standby for another device * with the same MAC. @@ -70,6 +71,17 @@ #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ #define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ +/* supported/enabled hash types */ +#define VIRTIO_NET_RSS_HASH_TYPE_IPv4 (1 << 0) +#define VIRTIO_NET_RSS_HASH_TYPE_TCPv4 (1 << 1) +#define VIRTIO_NET_RSS_HASH_TYPE_UDPv4 (1 << 2) +#define VIRTIO_NET_RSS_HASH_TYPE_IPv6 (1 << 3) +#define VIRTIO_NET_RSS_HASH_TYPE_TCPv6 (1 << 4) +#define VIRTIO_NET_RSS_HASH_TYPE_UDPv6 (1 << 5) +#define VIRTIO_NET_RSS_HASH_TYPE_IP_EX (1 << 6) +#define VIRTIO_NET_RSS_HASH_TYPE_TCP_EX (1 << 7) +#define VIRTIO_NET_RSS_HASH_TYPE_UDP_EX (1 << 8) + struct virtio_net_config { /* The config defining mac address (if VIRTIO_NET_F_MAC) */ __u8 mac[ETH_ALEN]; @@ -93,6 +105,12 @@ struct virtio_net_config { * Any other value stands for unknown. */ __u8 duplex; + /* maximum size of RSS key */ + __u8 rss_max_key_size; + /* maximum number of indirection table entries */ + __le16 rss_max_indirection_table_length; + /* bitmask of supported VIRTIO_NET_RSS_HASH_ types */ + __le32 supported_hash_types; } __attribute__((packed)); /* @@ -248,7 +266,9 @@ struct virtio_net_ctrl_mac { /* * Control Receive Flow Steering - * + */ +#define VIRTIO_NET_CTRL_MQ 4 +/* * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET * enables Receive Flow Steering, specifying the number of the transmit and * receive queues that will be used. After the command is consumed and acked by @@ -261,11 +281,29 @@ struct virtio_net_ctrl_mq { __virtio16 virtqueue_pairs; }; -#define VIRTIO_NET_CTRL_MQ 4 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 +/* + * The command VIRTIO_NET_CTRL_MQ_RSS_CONFIG has the same effect as + * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET does and additionally configures + * the receive steering to use a hash calculated for incoming packet + * to decide on receive virtqueue to place the packet. The command + * also provides parameters to calculate a hash and receive virtqueue. + */ +struct virtio_net_rss_config { + __le32 hash_types; + __le16 indirection_table_mask; + __le16 unclassified_queue; + __le16 indirection_table[1/* + indirection_table_mask */]; + __le16 max_tx_vq; + __u8 hash_key_length; + __u8 hash_key_data[/* hash_key_length */]; +}; + + #define VIRTIO_NET_CTRL_MQ_RSS_CONFIG 1 + /* * Control network offloads * -- cgit v1.2.3 From 3024e20958ee9e1554951df4d26aaf9f5cb7c210 Mon Sep 17 00:00:00 2001 From: Yuri Benditovich Date: Mon, 2 Mar 2020 13:50:03 +0200 Subject: virtio-net: Introduce hash report feature The feature VIRTIO_NET_F_HASH_REPORT extends the layout of the packet and requests the device to calculate hash on incoming packets and report it in the packet header. Signed-off-by: Yuri Benditovich Link: https://lore.kernel.org/r/20200302115003.14877-4-yuri.benditovich@daynix.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_net.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index aec6fac3666a..19d23e5baa4e 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -57,6 +57,7 @@ * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ +#define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ #define VIRTIO_NET_F_STANDBY 62 /* Act as standby for another device @@ -156,6 +157,23 @@ struct virtio_net_hdr_v1 { __virtio16 num_buffers; /* Number of merged rx buffers */ }; +struct virtio_net_hdr_v1_hash { + struct virtio_net_hdr_v1 hdr; + __le32 hash_value; +#define VIRTIO_NET_HASH_REPORT_NONE 0 +#define VIRTIO_NET_HASH_REPORT_IPv4 1 +#define VIRTIO_NET_HASH_REPORT_TCPv4 2 +#define VIRTIO_NET_HASH_REPORT_UDPv4 3 +#define VIRTIO_NET_HASH_REPORT_IPv6 4 +#define VIRTIO_NET_HASH_REPORT_TCPv6 5 +#define VIRTIO_NET_HASH_REPORT_UDPv6 6 +#define VIRTIO_NET_HASH_REPORT_IPv6_EX 7 +#define VIRTIO_NET_HASH_REPORT_TCPv6_EX 8 +#define VIRTIO_NET_HASH_REPORT_UDPv6_EX 9 + __le16 hash_report; + __le16 padding; +}; + #ifndef VIRTIO_NET_NO_LEGACY /* This header comes first in the scatter-gather list. * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must @@ -304,6 +322,24 @@ struct virtio_net_rss_config { #define VIRTIO_NET_CTRL_MQ_RSS_CONFIG 1 +/* + * The command VIRTIO_NET_CTRL_MQ_HASH_CONFIG requests the device + * to include in the virtio header of the packet the value of the + * calculated hash and the report type of hash. It also provides + * parameters for hash calculation. The command requires feature + * VIRTIO_NET_F_HASH_REPORT to be negotiated to extend the + * layout of virtio header as defined in virtio_net_hdr_v1_hash. + */ +struct virtio_net_hash_config { + __le32 hash_types; + /* for compatibility with virtio_net_rss_config */ + __le16 reserved[4]; + __u8 hash_key_length; + __u8 hash_key_data[/* hash_key_length */]; +}; + + #define VIRTIO_NET_CTRL_MQ_HASH_CONFIG 2 + /* * Control network offloads * -- cgit v1.2.3 From 6473ea760ca1707ade74de5b57e74189d14f8e10 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:09 +0200 Subject: fsnotify: tidy up FS_ and FAN_ constants Order by value, so the free value ranges are easier to find. Link: https://lore.kernel.org/r/20200319151022.31456-2-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index b9effa6f8503..2a1844edda47 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -25,9 +25,9 @@ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ #define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ -#define FAN_ONDIR 0x40000000 /* event occurred against dir */ +#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ -#define FAN_EVENT_ON_CHILD 0x08000000 /* interested in child events */ +#define FAN_ONDIR 0x40000000 /* Event occurred against dir */ /* helper events */ #define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ -- cgit v1.2.3 From 43eeeecc8ed5fa05652d68032a8bfb1308ee9baa Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 24 Mar 2020 09:28:27 -0600 Subject: vfio: Introduce VFIO_DEVICE_FEATURE ioctl and first user The VFIO_DEVICE_FEATURE ioctl is meant to be a general purpose, device agnostic ioctl for setting, retrieving, and probing device features. This implementation provides a 16-bit field for specifying a feature index, where the data porition of the ioctl is determined by the semantics for the given feature. Additional flag bits indicate the direction and nature of the operation; SET indicates user data is provided into the device feature, GET indicates the device feature is written out into user data. The PROBE flag augments determining whether the given feature is supported, and if provided, whether the given operation on the feature is supported. The first user of this ioctl is for setting the vfio-pci VF token, where the user provides a shared secret key (UUID) on a SR-IOV PF device, which users must provide when opening associated VF devices. Reviewed-by: Cornelia Huck Reviewed-by: Kevin Tian Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 9e843a147ead..015516bcfaa3 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -707,6 +707,43 @@ struct vfio_device_ioeventfd { #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16) +/** + * VFIO_DEVICE_FEATURE - _IORW(VFIO_TYPE, VFIO_BASE + 17, + * struct vfio_device_feature) + * + * Get, set, or probe feature data of the device. The feature is selected + * using the FEATURE_MASK portion of the flags field. Support for a feature + * can be probed by setting both the FEATURE_MASK and PROBE bits. A probe + * may optionally include the GET and/or SET bits to determine read vs write + * access of the feature respectively. Probing a feature will return success + * if the feature is supported and all of the optionally indicated GET/SET + * methods are supported. The format of the data portion of the structure is + * specific to the given feature. The data portion is not required for + * probing. GET and SET are mutually exclusive, except for use with PROBE. + * + * Return 0 on success, -errno on failure. + */ +struct vfio_device_feature { + __u32 argsz; + __u32 flags; +#define VFIO_DEVICE_FEATURE_MASK (0xffff) /* 16-bit feature index */ +#define VFIO_DEVICE_FEATURE_GET (1 << 16) /* Get feature into data[] */ +#define VFIO_DEVICE_FEATURE_SET (1 << 17) /* Set feature from data[] */ +#define VFIO_DEVICE_FEATURE_PROBE (1 << 18) /* Probe feature support */ + __u8 data[]; +}; + +#define VFIO_DEVICE_FEATURE _IO(VFIO_TYPE, VFIO_BASE + 17) + +/* + * Provide support for setting a PCI VF Token, which is used as a shared + * secret between PF and VF drivers. This feature may only be set on a + * PCI SR-IOV PF when SR-IOV is enabled on the PF and there are no existing + * open VFs. Data provided when setting this feature is a 16-byte array + * (__u8 b[16]), representing a UUID. + */ +#define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- cgit v1.2.3 From 32f5f62d7991c62095ee62a060b35dec0a9ac404 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Tue, 3 Mar 2020 21:01:48 +0100 Subject: gpio: uapi: Improve phrasing around arrays representing empty strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Character arrays can be considered empty strings (if they are immediately terminated), but they cannot be NULL. Signed-off-by: Jonathan Neuschäfer Signed-off-by: Bartosz Golaszewski --- include/uapi/linux/gpio.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index dca320764e4d..0206383c0383 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -18,7 +18,7 @@ * struct gpiochip_info - Information about a certain GPIO chip * @name: the Linux kernel name of this GPIO chip * @label: a functional name for this GPIO chip, such as a product - * number, may be NULL + * number, may be empty * @lines: number of GPIO lines on this chip */ struct gpiochip_info { @@ -44,10 +44,10 @@ struct gpiochip_info { * @flags: various flags for this line * @name: the name of this GPIO line, such as the output pin of the line on the * chip, a rail or a pin header name on a board, as specified by the gpio - * chip, may be NULL + * chip, may be empty * @consumer: a functional name for the consumer of this GPIO line as set by - * whatever is using it, will be NULL if there is no current user but may - * also be NULL if the consumer doesn't set this up + * whatever is using it, will be empty if there is no current user but may + * also be empty if the consumer doesn't set this up */ struct gpioline_info { __u32 line_offset; -- cgit v1.2.3 From 9e2ba2c34f1922ca1e0c7d31b30ace5842c2e7d1 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:19 +0200 Subject: fanotify: send FAN_DIR_MODIFY event flavor with dir inode and name Dirent events are going to be supported in two flavors: 1. Directory fid info + mask that includes the specific event types (e.g. FAN_CREATE) and an optional FAN_ONDIR flag. 2. Directory fid info + name + mask that includes only FAN_DIR_MODIFY. To request the second event flavor, user needs to set the event type FAN_DIR_MODIFY in the mark mask. The first flavor is supported since kernel v5.1 for groups initialized with flag FAN_REPORT_FID. It is intended to be used for watching directories in "batch mode" - the watcher is notified when directory is changed and re-scans the directory content in response. This event flavor is stored more compactly in the event queue, so it is optimal for workloads with frequent directory changes. The second event flavor is intended to be used for watching large directories, where the cost of re-scan of the directory on every change is considered too high. The watcher getting the event with the directory fid and entry name is expected to call fstatat(2) to query the content of the entry after the change. Legacy inotify events are reported with name and event mask (e.g. "foo", FAN_CREATE | FAN_ONDIR). That can lead users to the conclusion that there is *currently* an entry "foo" that is a sub-directory, when in fact "foo" may be negative or non-dir by the time user gets the event. To make it clear that the current state of the named entry is unknown, when reporting an event with name info, fanotify obfuscates the specific event types (e.g. create,delete,rename) and uses a common event type - FAN_DIR_MODIFY to describe the change. This should make it harder for users to make wrong assumptions and write buggy filesystem monitors. At this point, name info reporting is not yet implemented, so trying to set FAN_DIR_MODIFY in mark mask will return -EINVAL. Link: https://lore.kernel.org/r/20200319151022.31456-12-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 2a1844edda47..615fa2c87179 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -24,6 +24,7 @@ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ #define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ +#define FAN_DIR_MODIFY 0x00080000 /* Directory entry was modified */ #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ -- cgit v1.2.3 From 44d705b0370b1d581f46ff23e5d33e8b5ff8ec58 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:22 +0200 Subject: fanotify: report name info for FAN_DIR_MODIFY event Report event FAN_DIR_MODIFY with name in a variable length record similar to how fid's are reported. With name info reporting implemented, setting FAN_DIR_MODIFY in mark mask is now allowed. When events are reported with name, the reported fid identifies the directory and the name follows the fid. The info record type for this event info is FAN_EVENT_INFO_TYPE_DFID_NAME. For now, all reported events have at most one info record which is either FAN_EVENT_INFO_TYPE_FID or FAN_EVENT_INFO_TYPE_DFID_NAME (for FAN_DIR_MODIFY). Later on, events "on child" will report both records. There are several ways that an application can use this information: 1. When watching a single directory, the name is always relative to the watched directory, so application need to fstatat(2) the name relative to the watched directory. 2. When watching a set of directories, the application could keep a map of dirfd for all watched directories and hash the map by fid obtained with name_to_handle_at(2). When getting a name event, the fid in the event info could be used to lookup the base dirfd in the map and then call fstatat(2) with that dirfd. 3. When watching a filesystem (FAN_MARK_FILESYSTEM) or a large set of directories, the application could use open_by_handle_at(2) with the fid in event info to obtain dirfd for the directory where event happened and call fstatat(2) with this dirfd. The last option scales better for a large number of watched directories. The first two options may be available in the future also for non privileged fanotify watchers, because open_by_handle_at(2) requires the CAP_DAC_READ_SEARCH capability. Link: https://lore.kernel.org/r/20200319151022.31456-15-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 615fa2c87179..a88c7c6d0692 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -117,6 +117,7 @@ struct fanotify_event_metadata { }; #define FAN_EVENT_INFO_TYPE_FID 1 +#define FAN_EVENT_INFO_TYPE_DFID_NAME 2 /* Variable length info record following event metadata */ struct fanotify_event_info_header { @@ -125,7 +126,12 @@ struct fanotify_event_info_header { __u16 len; }; -/* Unique file identifier info record */ +/* + * Unique file identifier info record. This is used both for + * FAN_EVENT_INFO_TYPE_FID records and for FAN_EVENT_INFO_TYPE_DFID_NAME + * records. For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null + * terminated name immediately after the file handle. + */ struct fanotify_event_info_fid { struct fanotify_event_info_header hdr; __kernel_fsid_t fsid; -- cgit v1.2.3 From 9b6eaaf3db5e5888df7bca7fed7752a90f7fd871 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Tue, 24 Mar 2020 05:22:13 +0100 Subject: coresight: do not use the BIT() macro in the UAPI header The BIT() macro definition is not available for the UAPI headers (moreover, it can be defined differently in the user space); replace its usage with the _BITUL() macro that is defined in . Fixes: 237483aa5cf4 ("coresight: stm: adding driver for CoreSight STM component") Signed-off-by: Eugene Syromiatnikov Cc: stable Reviewed-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200324042213.GA10452@asgard.redhat.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/coresight-stm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/coresight-stm.h b/include/uapi/linux/coresight-stm.h index aac550a52f80..8847dbf24151 100644 --- a/include/uapi/linux/coresight-stm.h +++ b/include/uapi/linux/coresight-stm.h @@ -2,8 +2,10 @@ #ifndef __UAPI_CORESIGHT_STM_H_ #define __UAPI_CORESIGHT_STM_H_ -#define STM_FLAG_TIMESTAMPED BIT(3) -#define STM_FLAG_GUARANTEED BIT(7) +#include + +#define STM_FLAG_TIMESTAMPED _BITUL(3) +#define STM_FLAG_GUARANTEED _BITUL(7) /* * The CoreSight STM supports guaranteed and invariant timing -- cgit v1.2.3 From b0efe0281234e60c7c4f5fa812712fca50bec1b7 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Tue, 24 Mar 2020 05:12:09 +0100 Subject: rtc: make definitions in include/uapi/linux/rtc.h actually useful for user space BIT() macro is not defined in UAPI headers; there is, however, similarly defined _BITUL() macro present in include/uapi/linux/const.h; use it instead and include and in order to make the definitions provided in the header useful. Fixes: 3431ca4837bf ("rtc: define RTC_VL_READ values") Signed-off-by: Eugene Syromiatnikov Link: https://lore.kernel.org/r/20200324041209.GA30727@asgard.redhat.com Signed-off-by: Alexandre Belloni --- include/uapi/linux/rtc.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h index 095af360326a..83bba58d47f4 100644 --- a/include/uapi/linux/rtc.h +++ b/include/uapi/linux/rtc.h @@ -12,6 +12,9 @@ #ifndef _UAPI_LINUX_RTC_H_ #define _UAPI_LINUX_RTC_H_ +#include +#include + /* * The struct used to pass data via the following ioctl. Similar to the * struct tm in , but it needs to be here so that the kernel @@ -92,10 +95,10 @@ struct rtc_pll_info { #define RTC_PLL_GET _IOR('p', 0x11, struct rtc_pll_info) /* Get PLL correction */ #define RTC_PLL_SET _IOW('p', 0x12, struct rtc_pll_info) /* Set PLL correction */ -#define RTC_VL_DATA_INVALID BIT(0) /* Voltage too low, RTC data is invalid */ -#define RTC_VL_BACKUP_LOW BIT(1) /* Backup voltage is low */ -#define RTC_VL_BACKUP_EMPTY BIT(2) /* Backup empty or not present */ -#define RTC_VL_ACCURACY_LOW BIT(3) /* Voltage is low, RTC accuracy is reduced */ +#define RTC_VL_DATA_INVALID _BITUL(0) /* Voltage too low, RTC data is invalid */ +#define RTC_VL_BACKUP_LOW _BITUL(1) /* Backup voltage is low */ +#define RTC_VL_BACKUP_EMPTY _BITUL(2) /* Backup empty or not present */ +#define RTC_VL_ACCURACY_LOW _BITUL(3) /* Voltage is low, RTC accuracy is reduced */ #define RTC_VL_READ _IOR('p', 0x13, unsigned int) /* Voltage low detection */ #define RTC_VL_CLR _IO('p', 0x14) /* Clear voltage low information */ -- cgit v1.2.3 From 3f84b96c9779c8c2c8f4215a9d08cc6af1d45fdb Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 26 Mar 2020 10:35:56 +0100 Subject: iommu/virtio: Fix sparse warning We copied the virtio_iommu_config from the virtio-iommu specification, which declares the fields using little-endian annotations (for example le32). Unfortunately this causes sparse to warn about comparison between little- and cpu-endian, because of the typecheck() in virtio_cread(): drivers/iommu/virtio-iommu.c:1024:9: sparse: sparse: incompatible types in comparison expression (different base types): drivers/iommu/virtio-iommu.c:1024:9: sparse: restricted __le64 * drivers/iommu/virtio-iommu.c:1024:9: sparse: unsigned long long * drivers/iommu/virtio-iommu.c:1036:9: sparse: sparse: incompatible types in comparison expression (different base types): drivers/iommu/virtio-iommu.c:1036:9: sparse: restricted __le64 * drivers/iommu/virtio-iommu.c:1036:9: sparse: unsigned long long * drivers/iommu/virtio-iommu.c:1040:9: sparse: sparse: incompatible types in comparison expression (different base types): drivers/iommu/virtio-iommu.c:1040:9: sparse: restricted __le64 * drivers/iommu/virtio-iommu.c:1040:9: sparse: unsigned long long * drivers/iommu/virtio-iommu.c:1044:9: sparse: sparse: incompatible types in comparison expression (different base types): drivers/iommu/virtio-iommu.c:1044:9: sparse: restricted __le32 * drivers/iommu/virtio-iommu.c:1044:9: sparse: unsigned int * drivers/iommu/virtio-iommu.c:1048:9: sparse: sparse: incompatible types in comparison expression (different base types): drivers/iommu/virtio-iommu.c:1048:9: sparse: restricted __le32 * drivers/iommu/virtio-iommu.c:1048:9: sparse: unsigned int * drivers/iommu/virtio-iommu.c:1052:9: sparse: sparse: incompatible types in comparison expression (different base types): drivers/iommu/virtio-iommu.c:1052:9: sparse: restricted __le32 * drivers/iommu/virtio-iommu.c:1052:9: sparse: unsigned int * Although virtio_cread() does convert virtio-endian (in our case little-endian) to cpu-endian, the typecheck() needs the two arguments to have the same endianness. Do as UAPI headers of other virtio devices do, and remove the endian annotation from the device config. Even though we change the UAPI this shouldn't cause any regression since QEMU, the existing implementation of virtio-iommu that uses this header, already removes the annotations when importing headers. Reported-by: kbuild test robot Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20200326093558.2641019-2-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- include/uapi/linux/virtio_iommu.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h index 237e36a280cb..48e3c29223b5 100644 --- a/include/uapi/linux/virtio_iommu.h +++ b/include/uapi/linux/virtio_iommu.h @@ -18,24 +18,24 @@ #define VIRTIO_IOMMU_F_MMIO 5 struct virtio_iommu_range_64 { - __le64 start; - __le64 end; + __u64 start; + __u64 end; }; struct virtio_iommu_range_32 { - __le32 start; - __le32 end; + __u32 start; + __u32 end; }; struct virtio_iommu_config { /* Supported page sizes */ - __le64 page_size_mask; + __u64 page_size_mask; /* Supported IOVA range */ struct virtio_iommu_range_64 input_range; /* Max domain ID size */ struct virtio_iommu_range_32 domain_range; /* Probe buffer size */ - __le32 probe_size; + __u32 probe_size; }; /* Request types */ -- cgit v1.2.3 From 96aaab686505c449e24d76e76507290dcc30e008 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Mar 2020 21:45:28 +0900 Subject: perf/core: Add PERF_RECORD_CGROUP event To support cgroup tracking, add CGROUP event to save a link between cgroup path and id number. This is needed since cgroups can go away when userspace tries to read the cgroup info (from the id) later. The attr.cgroup bit was also added to enable cgroup tracking from userspace. This event will be generated when a new cgroup becomes active. Userspace might need to synthesize those events for existing cgroups. Committer testing: From the resulting kernel, using /sys/kernel/btf/vmlinux: $ pahole perf_event_attr | grep -w cgroup -B5 -A1 __u64 write_backward:1; /* 40:27 8 */ __u64 namespaces:1; /* 40:28 8 */ __u64 ksymbol:1; /* 40:29 8 */ __u64 bpf_event:1; /* 40:30 8 */ __u64 aux_output:1; /* 40:31 8 */ __u64 cgroup:1; /* 40:32 8 */ __u64 __reserved_1:31; /* 40:33 8 */ $ Reported-by: kbuild test robot Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo [staticize perf_event_cgroup function] Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Johannes Weiner Cc: Mark Rutland Cc: Zefan Li Link: http://lore.kernel.org/lkml/20200325124536.2800725-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 397cfd65b3fe..de95f6c7b273 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -381,7 +381,8 @@ struct perf_event_attr { ksymbol : 1, /* include ksymbol events */ bpf_event : 1, /* include bpf events */ aux_output : 1, /* generate AUX records instead of events */ - __reserved_1 : 32; + cgroup : 1, /* include cgroup events */ + __reserved_1 : 31; union { __u32 wakeup_events; /* wakeup every n events */ @@ -1012,6 +1013,16 @@ enum perf_event_type { */ PERF_RECORD_BPF_EVENT = 18, + /* + * struct { + * struct perf_event_header header; + * u64 id; + * char path[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_CGROUP = 19, + PERF_RECORD_MAX, /* non-ABI */ }; -- cgit v1.2.3 From 6546b19f95acc986807de981402bbac6b3a94b0f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Mar 2020 21:45:29 +0900 Subject: perf/core: Add PERF_SAMPLE_CGROUP feature The PERF_SAMPLE_CGROUP bit is to save (perf_event) cgroup information in the sample. It will add a 64-bit id to identify current cgroup and it's the file handle in the cgroup file system. Userspace should use this information with PERF_RECORD_CGROUP event to match which cgroup it belongs. I put it before PERF_SAMPLE_AUX for simplicity since it just needs a 64-bit word. But if we want bigger samples, I can work on that direction too. Committer testing: $ pahole perf_sample_data | grep -w cgroup -B5 -A5 /* --- cacheline 4 boundary (256 bytes) was 56 bytes ago --- */ struct perf_regs regs_intr; /* 312 16 */ /* --- cacheline 5 boundary (320 bytes) was 8 bytes ago --- */ u64 stack_user_size; /* 328 8 */ u64 phys_addr; /* 336 8 */ u64 cgroup; /* 344 8 */ /* size: 384, cachelines: 6, members: 22 */ /* padding: 32 */ }; $ Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Johannes Weiner Cc: Mark Rutland Cc: Zefan Li Link: http://lore.kernel.org/lkml/20200325124536.2800725-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index de95f6c7b273..7b2d6fc9e6ed 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -142,8 +142,9 @@ enum perf_event_sample_format { PERF_SAMPLE_REGS_INTR = 1U << 18, PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_AUX = 1U << 20, + PERF_SAMPLE_CGROUP = 1U << 21, - PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; -- cgit v1.2.3 From 88ce642492339f49a0b391af40e5798c08948e49 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 13 Feb 2020 14:26:47 +0100 Subject: um: Implement time-travel=ext This implements synchronized time-travel mode which - using a special application on a unix socket - lets multiple machines take part in a time-travelling simulation together. The protocol for the unix domain socket is defined in the new file include/uapi/linux/um_timetravel.h. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- include/uapi/linux/um_timetravel.h | 128 +++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 include/uapi/linux/um_timetravel.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/um_timetravel.h b/include/uapi/linux/um_timetravel.h new file mode 100644 index 000000000000..ca3238222b6d --- /dev/null +++ b/include/uapi/linux/um_timetravel.h @@ -0,0 +1,128 @@ +/* + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Copyright (C) 2019 Intel Corporation + */ +#ifndef _UAPI_LINUX_UM_TIMETRAVEL_H +#define _UAPI_LINUX_UM_TIMETRAVEL_H +#include + +/** + * struct um_timetravel_msg - UM time travel message + * + * This is the basic message type, going in both directions. + * + * This is the message passed between the host (user-mode Linux instance) + * and the calendar (the application on the other side of the socket) in + * order to implement common scheduling. + * + * Whenever UML has an event it will request runtime for it from the + * calendar, and then wait for its turn until it can run, etc. Note + * that it will only ever request the single next runtime, i.e. multiple + * REQUEST messages override each other. + */ +struct um_timetravel_msg { + /** + * @op: operation value from &enum um_timetravel_ops + */ + __u32 op; + + /** + * @seq: sequence number for the message - shall be reflected in + * the ACK response, and should be checked while processing + * the response to see if it matches + */ + __u32 seq; + + /** + * @time: time in nanoseconds + */ + __u64 time; +}; + +/** + * enum um_timetravel_ops - Operation codes + */ +enum um_timetravel_ops { + /** + * @UM_TIMETRAVEL_ACK: response (ACK) to any previous message, + * this usually doesn't carry any data in the 'time' field + * unless otherwise specified below + */ + UM_TIMETRAVEL_ACK = 0, + + /** + * @UM_TIMETRAVEL_START: initialize the connection, the time + * field contains an (arbitrary) ID to possibly be able + * to distinguish the connections. + */ + UM_TIMETRAVEL_START = 1, + + /** + * @UM_TIMETRAVEL_REQUEST: request to run at the given time + * (host -> calendar) + */ + UM_TIMETRAVEL_REQUEST = 2, + + /** + * @UM_TIMETRAVEL_WAIT: Indicate waiting for the previously requested + * runtime, new requests may be made while waiting (e.g. due to + * interrupts); the time field is ignored. The calendar must process + * this message and later send a %UM_TIMETRAVEL_RUN message when + * the host can run again. + * (host -> calendar) + */ + UM_TIMETRAVEL_WAIT = 3, + + /** + * @UM_TIMETRAVEL_GET: return the current time from the calendar in the + * ACK message, the time in the request message is ignored + * (host -> calendar) + */ + UM_TIMETRAVEL_GET = 4, + + /** + * @UM_TIMETRAVEL_UPDATE: time update to the calendar, must be sent e.g. + * before kicking an interrupt to another calendar + * (host -> calendar) + */ + UM_TIMETRAVEL_UPDATE = 5, + + /** + * @UM_TIMETRAVEL_RUN: run time request granted, current time is in + * the time field + * (calendar -> host) + */ + UM_TIMETRAVEL_RUN = 6, + + /** + * @UM_TIMETRAVEL_FREE_UNTIL: Enable free-running until the given time, + * this is a message from the calendar telling the host that it can + * freely do its own scheduling for anything before the indicated + * time. + * Note that if a calendar sends this message once, the host may + * assume that it will also do so in the future, if it implements + * wraparound semantics for the time field. + * (calendar -> host) + */ + UM_TIMETRAVEL_FREE_UNTIL = 7, + + /** + * @UM_TIMETRAVEL_GET_TOD: Return time of day, typically used once at + * boot by the virtual machines to get a synchronized time from + * the simulation. + */ + UM_TIMETRAVEL_GET_TOD = 8, +}; + +#endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */ -- cgit v1.2.3 From 3a8579629665bd6c0b37afcb2291080e959b885d Mon Sep 17 00:00:00 2001 From: Rajat Jain Date: Wed, 1 Apr 2020 13:24:11 -0700 Subject: Input: update SPDX tag for input-event-codes.h Replace the /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ with /* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ to help coreboot community consume this file without relaxing their licensing checks. Signed-off-by: Rajat Jain Link: https://lore.kernel.org/r/20200329172513.133548-1-rajatja@google.com Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 0f1db1cccc3f..f6cc830b31f2 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ /* * Input event codes * -- cgit v1.2.3 From 4c8cf31885f69e86be0b5b9e6677a26797365e1d Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Thu, 26 Mar 2020 22:01:23 +0800 Subject: vhost: introduce vDPA-based backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces a vDPA-based vhost backend. This backend is built on top of the same interface defined in virtio-vDPA and provides a generic vhost interface for userspace to accelerate the virtio devices in guest. This backend is implemented as a vDPA device driver on top of the same ops used in virtio-vDPA. It will create char device entry named vhost-vdpa-$index for userspace to use. Userspace can use vhost ioctls on top of this char device to setup the backend. Vhost ioctls are extended to make it type agnostic and behave like a virtio device, this help to eliminate type specific API like what vhost_net/scsi/vsock did: - VHOST_VDPA_GET_DEVICE_ID: get the virtio device ID which is defined by virtio specification to differ from different type of devices - VHOST_VDPA_GET_VRING_NUM: get the maximum size of virtqueue supported by the vDPA device - VHSOT_VDPA_SET/GET_STATUS: set and get virtio status of vDPA device - VHOST_VDPA_SET/GET_CONFIG: access virtio config space - VHOST_VDPA_SET_VRING_ENABLE: enable a specific virtqueue For memory mapping, IOTLB API is mandated for vhost-vDPA which means userspace drivers are required to use VHOST_IOTLB_UPDATE/VHOST_IOTLB_INVALIDATE to add or remove mapping for a specific userspace memory region. The vhost-vDPA API is designed to be type agnostic, but it allows net device only in current stage. Due to the lacking of control virtqueue support, some features were filter out by vhost-vdpa. We will enable more features and devices in the near future. Signed-off-by: Tiwei Bie Signed-off-by: Eugenio Pérez Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200326140125.19794-8-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost.h | 24 ++++++++++++++++++++++++ include/uapi/linux/vhost_types.h | 8 ++++++++ 2 files changed, 32 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 40d028eed645..9fe72e4b1373 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -116,4 +116,28 @@ #define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64) #define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int) +/* VHOST_VDPA specific defines */ + +/* Get the device id. The device ids follow the same definition of + * the device id defined in virtio-spec. + */ +#define VHOST_VDPA_GET_DEVICE_ID _IOR(VHOST_VIRTIO, 0x70, __u32) +/* Get and set the status. The status bits follow the same definition + * of the device status defined in virtio-spec. + */ +#define VHOST_VDPA_GET_STATUS _IOR(VHOST_VIRTIO, 0x71, __u8) +#define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8) +/* Get and set the device config. The device config follows the same + * definition of the device config defined in virtio-spec. + */ +#define VHOST_VDPA_GET_CONFIG _IOR(VHOST_VIRTIO, 0x73, \ + struct vhost_vdpa_config) +#define VHOST_VDPA_SET_CONFIG _IOW(VHOST_VIRTIO, 0x74, \ + struct vhost_vdpa_config) +/* Enable/disable the ring. */ +#define VHOST_VDPA_SET_VRING_ENABLE _IOW(VHOST_VIRTIO, 0x75, \ + struct vhost_vring_state) +/* Get the max ring size. */ +#define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16) + #endif diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index c907290ff065..669457ce5c48 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -119,6 +119,14 @@ struct vhost_scsi_target { unsigned short reserved; }; +/* VHOST_VDPA specific definitions */ + +struct vhost_vdpa_config { + __u32 off; + __u32 len; + __u8 buf[0]; +}; + /* Feature bits */ /* Log all write descriptors. Can be changed while device is active. */ #define VHOST_F_LOG_ALL 26 -- cgit v1.2.3 From 73c5762652c538527f654b8eb9af47ae2c2434af Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 16 Mar 2020 16:54:23 +0530 Subject: tools: PCI: Add 'd' command line option to support DMA Add a new command line option 'd' to use DMA for data transfers. It should be used with read, write or copy commands. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Lorenzo Pieralisi Tested-by: Alan Mikhak --- include/uapi/linux/pcitest.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h index cbf422e56696..8b868761f8b4 100644 --- a/include/uapi/linux/pcitest.h +++ b/include/uapi/linux/pcitest.h @@ -20,4 +20,11 @@ #define PCITEST_SET_IRQTYPE _IOW('P', 0x8, int) #define PCITEST_GET_IRQTYPE _IO('P', 0x9) +#define PCITEST_FLAGS_USE_DMA 0x00000001 + +struct pci_endpoint_test_xfer_param { + unsigned long size; + unsigned char flags; +}; + #endif /* __UAPI_LINUX_PCITEST_H */ -- cgit v1.2.3 From 475007f9cef0ef88ea9263529ad93a16849c49e8 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Tue, 17 Mar 2020 15:31:55 +0530 Subject: misc: pci_endpoint_test: Add ioctl to clear IRQ Add ioctl to clear IRQ which can be used to free the allocated IRQ vectors and free the requested IRQ. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Lorenzo Pieralisi --- include/uapi/linux/pcitest.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h index 8b868761f8b4..c3ab4c826297 100644 --- a/include/uapi/linux/pcitest.h +++ b/include/uapi/linux/pcitest.h @@ -19,6 +19,7 @@ #define PCITEST_MSIX _IOW('P', 0x7, int) #define PCITEST_SET_IRQTYPE _IOW('P', 0x8, int) #define PCITEST_GET_IRQTYPE _IO('P', 0x9) +#define PCITEST_CLEAR_IRQ _IO('P', 0x10) #define PCITEST_FLAGS_USE_DMA 0x00000001 -- cgit v1.2.3 From bc9fe6143de5df8fb36cf1532b48fecf35868571 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 31 Mar 2020 09:35:59 -0700 Subject: netfilter: xt_IDLETIMER: target v1 - match Android layout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Android has long had an extension to IDLETIMER to send netlink messages to userspace, see: https://android.googlesource.com/kernel/common/+/refs/heads/android-mainline/include/uapi/linux/netfilter/xt_IDLETIMER.h#42 Note: this is idletimer target rev 1, there is no rev 0 in the Android common kernel sources, see registration at: https://android.googlesource.com/kernel/common/+/refs/heads/android-mainline/net/netfilter/xt_IDLETIMER.c#483 When we compare that to upstream's new idletimer target rev 1: https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git/tree/include/uapi/linux/netfilter/xt_IDLETIMER.h#n46 We immediately notice that these two rev 1 structs are the same size and layout, and that while timer_type and send_nl_msg are differently named and serve a different purpose, they're at the same offset. This makes them impossible to tell apart - and thus one cannot know in a mixed Android/vanilla environment whether one means timer_type or send_nl_msg. Since this is iptables/netfilter uapi it introduces a problem between iptables (vanilla vs Android) userspace and kernel (vanilla vs Android) if the two don't match each other. Additionally when at some point in the future Android picks up 5.7+ it's not at all clear how to resolve the resulting merge conflict. Furthermore, since upgrading the kernel on old Android phones is pretty much impossible there does not seem to be an easy way out of this predicament. The only thing I've been able to come up with is some super disgusting kernel version >= 5.7 check in the iptables binary to flip between different struct layouts. By adding a dummy field to the vanilla Linux kernel header file we can force the two structs to be compatible with each other. Long term I think I would like to deprecate send_nl_msg out of Android entirely, but I haven't quite been able to figure out exactly how we depend on it. It seems to be very similar to sysfs notifications but with some extra info. Currently it's actually always enabled whenever Android uses the IDLETIMER target, so we could also probably entirely remove it from the uapi in favour of just always enabling it, but again we can't upgrade old kernels already in the field. (Also note that this doesn't change the structure's size, as it is simply fitting into the pre-existing padding, and that since 5.7 hasn't been released yet, there's still time to make this uapi visible change) Cc: Manoj Basapathi Cc: Subash Abhinov Kasiviswanathan Signed-off-by: Maciej Żenczykowski Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_IDLETIMER.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_IDLETIMER.h b/include/uapi/linux/netfilter/xt_IDLETIMER.h index 434e6506abaa..49ddcdc61c09 100644 --- a/include/uapi/linux/netfilter/xt_IDLETIMER.h +++ b/include/uapi/linux/netfilter/xt_IDLETIMER.h @@ -48,6 +48,7 @@ struct idletimer_tg_info_v1 { char label[MAX_IDLETIMER_LABEL_SIZE]; + __u8 send_nl_msg; /* unused: for compatibility with Android */ __u8 timer_type; /* for kernel module internal use only */ -- cgit v1.2.3 From ef516e8625ddea90b3a0313f3a0b0baa83db7ac2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 7 Apr 2020 14:10:38 +0200 Subject: netfilter: nf_tables: reintroduce the NFT_SET_CONCAT flag Stefano originally proposed to introduce this flag, users hit EOPNOTSUPP in new binaries with old kernels when defining a set with ranges in a concatenation. Fixes: f3a2181e16f1 ("netfilter: nf_tables: Support for sets with multiple ranged fields") Reviewed-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 30f2a87270dc..4565456c0ef4 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -276,6 +276,7 @@ enum nft_rule_compat_attributes { * @NFT_SET_TIMEOUT: set uses timeouts * @NFT_SET_EVAL: set can be updated from the evaluation path * @NFT_SET_OBJECT: set contains stateful objects + * @NFT_SET_CONCAT: set contains a concatenation */ enum nft_set_flags { NFT_SET_ANONYMOUS = 0x1, @@ -285,6 +286,7 @@ enum nft_set_flags { NFT_SET_TIMEOUT = 0x10, NFT_SET_EVAL = 0x20, NFT_SET_OBJECT = 0x40, + NFT_SET_CONCAT = 0x80, }; /** -- cgit v1.2.3 From b0c504f154718904ae49349147e3b7e6ae91ffdc Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 6 Apr 2020 20:05:05 -0700 Subject: virtio-balloon: add support for providing free page reports to host Add support for the page reporting feature provided by virtio-balloon. Reporting differs from the regular balloon functionality in that is is much less durable than a standard memory balloon. Instead of creating a list of pages that cannot be accessed the pages are only inaccessible while they are being indicated to the virtio interface. Once the interface has acknowledged them they are placed back into their respective free lists and are once again accessible by the guest system. Unlike a standard balloon we don't inflate and deflate the pages. Instead we perform the reporting, and once the reporting is completed it is assumed that the page has been dropped from the guest and will be faulted back in the next time the page is accessed. Signed-off-by: Alexander Duyck Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Acked-by: Michael S. Tsirkin Cc: Andrea Arcangeli Cc: Dan Williams Cc: Dave Hansen Cc: Konrad Rzeszutek Wilk Cc: Luiz Capitulino Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Nitesh Narayan Lal Cc: Oscar Salvador Cc: Pankaj Gupta Cc: Paolo Bonzini Cc: Rik van Riel Cc: Vlastimil Babka Cc: Wei Wang Cc: Yang Zhang Cc: wei qi Link: http://lkml.kernel.org/r/20200211224657.29318.68624.stgit@localhost.localdomain Signed-off-by: Linus Torvalds --- include/uapi/linux/virtio_balloon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index a1966cd7b677..19974392d324 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -36,6 +36,7 @@ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ #define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */ #define VIRTIO_BALLOON_F_PAGE_POISON 4 /* Guest is using page poisoning */ +#define VIRTIO_BALLOON_F_REPORTING 5 /* Page reporting virtqueue */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 -- cgit v1.2.3 From 72981e0e7b609c741d7764cc920c8fec00920bd5 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Mon, 6 Apr 2020 20:05:41 -0700 Subject: userfaultfd: wp: add UFFDIO_COPY_MODE_WP This allows UFFDIO_COPY to map pages write-protected. [peterx@redhat.com: switch to VM_WARN_ON_ONCE in mfill_atomic_pte; add brackets around "dst_vma->vm_flags & VM_WRITE"; fix wordings in comments and commit messages] Signed-off-by: Andrea Arcangeli Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: Jerome Glisse Reviewed-by: Mike Rapoport Cc: Bobby Powers Cc: Brian Geffon Cc: David Hildenbrand Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Hugh Dickins Cc: Johannes Weiner Cc: "Kirill A . Shutemov" Cc: Martin Cracauer Cc: Marty McFadden Cc: Maya Gokhale Cc: Mel Gorman Cc: Mike Kravetz Cc: Pavel Emelyanov Cc: Rik van Riel Cc: Shaohua Li Link: http://lkml.kernel.org/r/20200220163112.11409-6-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 48f1a7c2f1f0..340f23bc251d 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -203,13 +203,14 @@ struct uffdio_copy { __u64 dst; __u64 src; __u64 len; +#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) /* - * There will be a wrprotection flag later that allows to map - * pages wrprotected on the fly. And such a flag will be - * available if the wrprotection ioctl are implemented for the - * range according to the uffdio_register.ioctls. + * UFFDIO_COPY_MODE_WP will map the page write protected on + * the fly. UFFDIO_COPY_MODE_WP is available only if the + * write protected ioctl is implemented for the range + * according to the uffdio_register.ioctls. */ -#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) +#define UFFDIO_COPY_MODE_WP ((__u64)1<<1) __u64 mode; /* -- cgit v1.2.3 From 63b2d4174c4ad1f40b48d7138e71bcb564c1fe03 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Mon, 6 Apr 2020 20:06:12 -0700 Subject: userfaultfd: wp: add the writeprotect API to userfaultfd ioctl Introduce the new uffd-wp APIs for userspace. Firstly, we'll allow to do UFFDIO_REGISTER with write protection tracking using the new UFFDIO_REGISTER_MODE_WP flag. Note that this flag can co-exist with the existing UFFDIO_REGISTER_MODE_MISSING, in which case the userspace program can not only resolve missing page faults, and at the same time tracking page data changes along the way. Secondly, we introduced the new UFFDIO_WRITEPROTECT API to do page level write protection tracking. Note that we will need to register the memory region with UFFDIO_REGISTER_MODE_WP before that. [peterx@redhat.com: write up the commit message] [peterx@redhat.com: remove useless block, write commit message, check against VM_MAYWRITE rather than VM_WRITE when register] Signed-off-by: Andrea Arcangeli Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: Jerome Glisse Cc: Bobby Powers Cc: Brian Geffon Cc: David Hildenbrand Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Hugh Dickins Cc: Johannes Weiner Cc: "Kirill A . Shutemov" Cc: Martin Cracauer Cc: Marty McFadden Cc: Maya Gokhale Cc: Mel Gorman Cc: Mike Kravetz Cc: Mike Rapoport Cc: Pavel Emelyanov Cc: Rik van Riel Cc: Shaohua Li Link: http://lkml.kernel.org/r/20200220163112.11409-14-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 340f23bc251d..95c4a160e5f8 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -52,6 +52,7 @@ #define _UFFDIO_WAKE (0x02) #define _UFFDIO_COPY (0x03) #define _UFFDIO_ZEROPAGE (0x04) +#define _UFFDIO_WRITEPROTECT (0x06) #define _UFFDIO_API (0x3F) /* userfaultfd ioctl ids */ @@ -68,6 +69,8 @@ struct uffdio_copy) #define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ struct uffdio_zeropage) +#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ + struct uffdio_writeprotect) /* read() structure */ struct uffd_msg { @@ -232,4 +235,24 @@ struct uffdio_zeropage { __s64 zeropage; }; +struct uffdio_writeprotect { + struct uffdio_range range; +/* + * UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range, + * unset the flag to undo protection of a range which was previously + * write protected. + * + * UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up + * any wait thread after the operation succeeds. + * + * NOTE: Write protecting a region (WP=1) is unrelated to page faults, + * therefore DONTWAKE flag is meaningless with WP=1. Removing write + * protection (WP=0) in response to a page fault wakes the faulting + * task unless DONTWAKE is set. + */ +#define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0) +#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1) + __u64 mode; +}; + #endif /* _LINUX_USERFAULTFD_H */ -- cgit v1.2.3 From e06f1e1dd4998ffc9da37f580703b55a93fc4de4 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 6 Apr 2020 20:06:16 -0700 Subject: userfaultfd: wp: enabled write protection in userfaultfd API Now it's safe to enable write protection in userfaultfd API Signed-off-by: Shaohua Li Signed-off-by: Andrea Arcangeli Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: Jerome Glisse Reviewed-by: Mike Rapoport Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Kirill A. Shutemov Cc: Mel Gorman Cc: Hugh Dickins Cc: Johannes Weiner Cc: Bobby Powers Cc: Brian Geffon Cc: David Hildenbrand Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Martin Cracauer Cc: Marty McFadden Cc: Maya Gokhale Cc: Mike Kravetz Cc: Pavel Emelyanov Link: http://lkml.kernel.org/r/20200220163112.11409-15-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 95c4a160e5f8..e7e98bde221f 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -19,7 +19,8 @@ * means the userland is reading). */ #define UFFD_API ((__u64)0xAA) -#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \ +#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ + UFFD_FEATURE_EVENT_FORK | \ UFFD_FEATURE_EVENT_REMAP | \ UFFD_FEATURE_EVENT_REMOVE | \ UFFD_FEATURE_EVENT_UNMAP | \ @@ -34,7 +35,8 @@ #define UFFD_API_RANGE_IOCTLS \ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY | \ - (__u64)1 << _UFFDIO_ZEROPAGE) + (__u64)1 << _UFFDIO_ZEROPAGE | \ + (__u64)1 << _UFFDIO_WRITEPROTECT) #define UFFD_API_RANGE_IOCTLS_BASIC \ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY) -- cgit v1.2.3 From 34c51814b2b87cb2e5a98c92fe957db2ee8e27f4 Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Wed, 1 Apr 2020 05:26:50 +0200 Subject: btrfs: re-instantiate the removed BTRFS_SUBVOL_CREATE_ASYNC definition The commit 9c1036fdb1d1ff1b ("btrfs: Remove BTRFS_SUBVOL_CREATE_ASYNC support") breaks strace build with the kernel headers from git: btrfs.c: In function "btrfs_test_subvol_ioctls": btrfs.c:531:23: error: "BTRFS_SUBVOL_CREATE_ASYNC" undeclared (first use in this function) vol_args_v2.flags = BTRFS_SUBVOL_CREATE_ASYNC; Moreover, it is improper to break UAPI, strace uses the definitions to decode ioctls that are considered part of public API. Restore the macro definition and put it under "#ifndef __KERNEL__" in order to prevent inadvertent in-kernel usage. Fixes: 9c1036fdb1d1ff1b ("btrfs: Remove BTRFS_SUBVOL_CREATE_ASYNC support") Reviewed-by: Nikolay Borisov Signed-off-by: Eugene Syromiatnikov Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 8134924cfc17..e6b6cb0f8bc6 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -36,12 +36,10 @@ struct btrfs_ioctl_vol_args { #define BTRFS_DEVICE_PATH_NAME_MAX 1024 #define BTRFS_SUBVOL_NAME_MAX 4039 -/* - * Deprecated since 5.7: - * - * BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) - */ - +#ifndef __KERNEL__ +/* Deprecated since 5.7 */ +# define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) +#endif #define BTRFS_SUBVOL_RDONLY (1ULL << 1) #define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) -- cgit v1.2.3 From d6cdad870358128c1e753e6258e295ab8a5a2429 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 19:51:46 -0500 Subject: uapi: linux: dlm_device.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/uapi/linux/dlm_device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dlm_device.h b/include/uapi/linux/dlm_device.h index f880d2831160..e83954c69fff 100644 --- a/include/uapi/linux/dlm_device.h +++ b/include/uapi/linux/dlm_device.h @@ -45,13 +45,13 @@ struct dlm_lock_params { void __user *bastaddr; struct dlm_lksb __user *lksb; char lvb[DLM_USER_LVB_LEN]; - char name[0]; + char name[]; }; struct dlm_lspace_params { __u32 flags; __u32 minor; - char name[0]; + char name[]; }; struct dlm_purge_params { -- cgit v1.2.3 From 6e88abb862898f55d083071e4423000983dcfe63 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 21:30:22 -0500 Subject: uapi: linux: fiemap.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/uapi/linux/fiemap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fiemap.h b/include/uapi/linux/fiemap.h index 8c0bc24d5d95..7a900b2377b6 100644 --- a/include/uapi/linux/fiemap.h +++ b/include/uapi/linux/fiemap.h @@ -34,7 +34,7 @@ struct fiemap { __u32 fm_mapped_extents;/* number of extents that were mapped (out) */ __u32 fm_extent_count; /* size of fm_extents array (in) */ __u32 fm_reserved; - struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ + struct fiemap_extent fm_extents[]; /* array of mapped extents (out) */ }; #define FIEMAP_MAX_OFFSET (~0ULL) -- cgit v1.2.3