From c2eb8effb265ac5cdd960d8e61ecb931e9c767cd Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 24 Oct 2018 06:50:34 -0400 Subject: media: videodev2.h: add v4l2_timeval_to_ns inline function We want to be able to uniquely identify buffers for stateless codecs. The internal timestamp (a u64) as stored internally in the kernel is a suitable candidate for that, but in struct v4l2_buffer it is represented as a struct timeval. Add a v4l2_timeval_to_ns() function that converts the struct timeval into a u64 in the same way that the kernel does. This makes it possible to use this u64 elsewhere as a unique identifier of the buffer. Since timestamps are also copied from the output buffer to the corresponding capture buffer(s) by M2M devices, the u64 can be used to refer to both output and capture buffers. The plan is that in the future we redesign struct v4l2_buffer and use u64 for the timestamp instead of a struct timeval (which has lots of problems with 32 vs 64 bit and y2038 layout changes), and then there is no more need to use this function. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index b5671ce2724f..d6eed479c3a6 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -973,6 +973,18 @@ struct v4l2_buffer { }; }; +/** + * v4l2_timeval_to_ns - Convert timeval to nanoseconds + * @ts: pointer to the timeval variable to be converted + * + * Returns the scalar nanosecond representation of the timeval + * parameter. + */ +static inline __u64 v4l2_timeval_to_ns(const struct timeval *tv) +{ + return (__u64)tv->tv_sec * 1000000000ULL + tv->tv_usec * 1000; +} + /* Flags for 'flags' field */ /* Buffer is mapped (flag) */ #define V4L2_BUF_FLAG_MAPPED 0x00000001 -- cgit v1.2.3 From 4b837c6d7ee771f68a30f362c9f68171a95be222 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 14 Jan 2019 09:01:54 -0500 Subject: media: v4l: uAPI: V4L2_BUF_TYPE_META_OUTPUT is an output buffer type V4L2_BUF_TYPE_META_OUTPUT was added by commit 72148d1a57e7 ("media: v4l: Add support for V4L2_BUF_TYPE_META_OUTPUT") but the patch missed adding the type to the macro telling whether a given type is an output type or not. Do that now. Getting this wrong leads to handling the buffer as a capture buffer in a lot of places. Fixes: 72148d1a57e7 ("media: v4l: Add support for V4L2_BUF_TYPE_META_OUTPUT") Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index d6eed479c3a6..c0c36c165bf4 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -161,7 +161,8 @@ enum v4l2_buf_type { || (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY \ || (type) == V4L2_BUF_TYPE_VBI_OUTPUT \ || (type) == V4L2_BUF_TYPE_SLICED_VBI_OUTPUT \ - || (type) == V4L2_BUF_TYPE_SDR_OUTPUT) + || (type) == V4L2_BUF_TYPE_SDR_OUTPUT \ + || (type) == V4L2_BUF_TYPE_META_OUTPUT) enum v4l2_tuner_type { V4L2_TUNER_RADIO = 1, -- cgit v1.2.3 From 50656bad786d001b294764e9f047c5d5b3e4db75 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 10 Jan 2019 11:56:09 -0500 Subject: media: v4l2-ctrl: Add control to enable h.264 constrained intra prediction Allow to enable h.264 constrained intra prediction (macroblocks using intra prediction modes are not allowed to use residual data and decoded samples of neighboring macroblocks coded using inter prediction modes). This control directly corresponds to the constrained_intra_pred_flag field in the h.264 picture parameter set. Signed-off-by: Philipp Zabel Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 3dcfc6148f99..fd65c710b144 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -533,6 +533,7 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type { }; #define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER (V4L2_CID_MPEG_BASE+381) #define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER_QP (V4L2_CID_MPEG_BASE+382) +#define V4L2_CID_MPEG_VIDEO_H264_CONSTRAINED_INTRA_PREDICTION (V4L2_CID_MPEG_BASE+383) #define V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP (V4L2_CID_MPEG_BASE+400) #define V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP (V4L2_CID_MPEG_BASE+401) #define V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP (V4L2_CID_MPEG_BASE+402) -- cgit v1.2.3 From d034696cbe5a6e00f76ca4b7869c6cdef66aebd5 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 10 Jan 2019 11:56:10 -0500 Subject: media: v4l2-ctrl: Add control for h.264 chroma qp offset Allow to add fixed quantization parameter offset between luma and chroma quantization parameters. This control directly corresponds to the chroma_qp_index_offset field of the h.264 picture parameter set. Signed-off-by: Philipp Zabel Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index fd65c710b144..06479f2fb3ae 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -534,6 +534,7 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type { #define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER (V4L2_CID_MPEG_BASE+381) #define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER_QP (V4L2_CID_MPEG_BASE+382) #define V4L2_CID_MPEG_VIDEO_H264_CONSTRAINED_INTRA_PREDICTION (V4L2_CID_MPEG_BASE+383) +#define V4L2_CID_MPEG_VIDEO_H264_CHROMA_QP_INDEX_OFFSET (V4L2_CID_MPEG_BASE+384) #define V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP (V4L2_CID_MPEG_BASE+400) #define V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP (V4L2_CID_MPEG_BASE+401) #define V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP (V4L2_CID_MPEG_BASE+402) -- cgit v1.2.3 From 1c3721b1f22286033abeda30b7e12439b083ed0f Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 9 Jan 2019 13:30:04 -0500 Subject: media: videodev2.h: Add more field helper macros Adds two helper macros: V4L2_FIELD_IS_SEQUENTIAL: returns true if the given field type is 'sequential', that is a full frame is transmitted, or exists in memory, as all top field lines followed by all bottom field lines, or vice-versa. V4L2_FIELD_IS_INTERLACED: returns true if the given field type is 'interlaced', that is a full frame is transmitted, or exists in memory, as top field lines interlaced with bottom field lines. Signed-off-by: Steve Longerbeam Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index c0c36c165bf4..9a920f071ff9 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -130,6 +130,13 @@ enum v4l2_field { ((field) == V4L2_FIELD_BOTTOM ||\ (field) == V4L2_FIELD_TOP ||\ (field) == V4L2_FIELD_ALTERNATE) +#define V4L2_FIELD_IS_INTERLACED(field) \ + ((field) == V4L2_FIELD_INTERLACED ||\ + (field) == V4L2_FIELD_INTERLACED_TB ||\ + (field) == V4L2_FIELD_INTERLACED_BT) +#define V4L2_FIELD_IS_SEQUENTIAL(field) \ + ((field) == V4L2_FIELD_SEQ_TB ||\ + (field) == V4L2_FIELD_SEQ_BT) enum v4l2_buf_type { V4L2_BUF_TYPE_VIDEO_CAPTURE = 1, -- cgit v1.2.3 From 1194c4133195dfcb6c5fc0935d54bbed872a5285 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 29 Jan 2019 00:56:17 +0000 Subject: nfit: Add Hyper-V NVDIMM DSM command set to white list Add the Hyper-V _DSM command set to the white list of NVDIMM command sets. This command set is documented at http://www.uefi.org/RFIC_LIST (see "Virtual NVDIMM 0x1901"). Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index f57c9e434d2d..de5d90212409 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -243,6 +243,7 @@ struct nd_cmd_pkg { #define NVDIMM_FAMILY_HPE1 1 #define NVDIMM_FAMILY_HPE2 2 #define NVDIMM_FAMILY_MSFT 3 +#define NVDIMM_FAMILY_HYPERV 4 #define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ struct nd_cmd_pkg) -- cgit v1.2.3 From d0a060be573bfbf8753a15dca35497db5e968bb0 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 30 Jan 2019 12:02:44 +0000 Subject: arm64: add ptrace regsets for ptrauth key management Add two new ptrace regsets, which can be used to request and change the pointer authentication keys of a thread. NT_ARM_PACA_KEYS gives access to the instruction/data address keys, and NT_ARM_PACG_KEYS to the generic authentication key. The keys are also part of the core dump file of the process. The regsets are only exposed if the kernel is compiled with CONFIG_CHECKPOINT_RESTORE=y, as the only intended use case is checkpointing and restoring processes that are using pointer authentication. (This can be changed later if there are other use cases.) Reviewed-by: Dave Martin Signed-off-by: Kristina Martsenko Signed-off-by: Catalin Marinas --- include/uapi/linux/elf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index e4d6ddd93567..34c02e4290fe 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -421,6 +421,8 @@ typedef struct elf64_shdr { #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ #define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */ #define NT_ARM_PAC_MASK 0x406 /* ARM pointer authentication code masks */ +#define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */ +#define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3 From d9a9ea94f748f47b1d75c6c5e33edcf74476c445 Mon Sep 17 00:00:00 2001 From: Chad Austin Date: Mon, 7 Jan 2019 16:53:17 -0800 Subject: fuse: support clients that don't implement 'opendir' Allow filesystems to return ENOSYS from opendir, preventing the kernel from sending opendir and releasedir messages in the future. This avoids userspace transitions when filesystems don't need to keep track of state per directory handle. A new capability flag, FUSE_NO_OPENDIR_SUPPORT, parallels FUSE_NO_OPEN_SUPPORT, indicating the new semantics for returning ENOSYS from opendir. Signed-off-by: Chad Austin Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index b4967d48bfda..2ac598614a8f 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -122,6 +122,9 @@ * - add FOPEN_CACHE_DIR * - add FUSE_MAX_PAGES, add max_pages to init_out * - add FUSE_CACHE_SYMLINKS + * + * 7.29 + * - add FUSE_NO_OPENDIR_SUPPORT flag */ #ifndef _LINUX_FUSE_H @@ -157,7 +160,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 28 +#define FUSE_KERNEL_MINOR_VERSION 29 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -259,6 +262,7 @@ struct fuse_file_lock { * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages * FUSE_CACHE_SYMLINKS: cache READLINK responses + * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -284,6 +288,7 @@ struct fuse_file_lock { #define FUSE_ABORT_ERROR (1 << 21) #define FUSE_MAX_PAGES (1 << 22) #define FUSE_CACHE_SYMLINKS (1 << 23) +#define FUSE_NO_OPENDIR_SUPPORT (1 << 24) /** * CUSE INIT request/reply flags -- cgit v1.2.3 From a7fe4ca72b1fe7877de5672640d0b4e023d0fdca Mon Sep 17 00:00:00 2001 From: Vivek Kasireddy Date: Thu, 7 Feb 2019 22:18:43 -0500 Subject: media: v4l: Add 32-bit packed YUV formats The formats added in this patch include: V4L2_PIX_FMT_AYUV32 V4L2_PIX_FMT_XYUV32 V4L2_PIX_FMT_VUYA32 V4L2_PIX_FMT_VUYX32 These formats enable the trasmission of alpha channel data to other drivers and userspace applications in addition to YUV data. For example, buffers generated by drivers in one of these formats can be used by the Weston compositor to display as a texture or flipped directly onto the overlay planes with the help of a DRM driver. Signed-off-by: Vivek Kasireddy Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 9a920f071ff9..1db220da3bcc 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -562,6 +562,10 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_YUV555 v4l2_fourcc('Y', 'U', 'V', 'O') /* 16 YUV-5-5-5 */ #define V4L2_PIX_FMT_YUV565 v4l2_fourcc('Y', 'U', 'V', 'P') /* 16 YUV-5-6-5 */ #define V4L2_PIX_FMT_YUV32 v4l2_fourcc('Y', 'U', 'V', '4') /* 32 YUV-8-8-8-8 */ +#define V4L2_PIX_FMT_AYUV32 v4l2_fourcc('A', 'Y', 'U', 'V') /* 32 AYUV-8-8-8-8 */ +#define V4L2_PIX_FMT_XYUV32 v4l2_fourcc('X', 'Y', 'U', 'V') /* 32 XYUV-8-8-8-8 */ +#define V4L2_PIX_FMT_VUYA32 v4l2_fourcc('V', 'U', 'Y', 'A') /* 32 VUYA-8-8-8-8 */ +#define V4L2_PIX_FMT_VUYX32 v4l2_fourcc('V', 'U', 'Y', 'X') /* 32 VUYX-8-8-8-8 */ #define V4L2_PIX_FMT_HI240 v4l2_fourcc('H', 'I', '2', '4') /* 8 8-bit color */ #define V4L2_PIX_FMT_HM12 v4l2_fourcc('H', 'M', '1', '2') /* 8 YUV 4:2:0 16x16 macroblocks */ #define V4L2_PIX_FMT_M420 v4l2_fourcc('M', '4', '2', '0') /* 12 YUV 4:2:0 2 lines y, 1 line uv interleaved */ -- cgit v1.2.3 From 721074b03411327e7bf41555d4cc7c18f49313f7 Mon Sep 17 00:00:00 2001 From: Patrick Lerda Date: Thu, 17 Jan 2019 03:50:13 -0500 Subject: media: rc: rcmm decoder and encoder media: add support for RCMM infrared remote controls. Signed-off-by: Patrick Lerda Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/lirc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index 6b319581882f..45fcbf99d72e 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -192,6 +192,9 @@ struct lirc_scancode { * @RC_PROTO_XMP: XMP protocol * @RC_PROTO_CEC: CEC protocol * @RC_PROTO_IMON: iMon Pad protocol + * @RC_PROTO_RCMM12: RC-MM protocol 12 bits + * @RC_PROTO_RCMM24: RC-MM protocol 24 bits + * @RC_PROTO_RCMM32: RC-MM protocol 32 bits */ enum rc_proto { RC_PROTO_UNKNOWN = 0, @@ -218,6 +221,9 @@ enum rc_proto { RC_PROTO_XMP = 21, RC_PROTO_CEC = 22, RC_PROTO_IMON = 23, + RC_PROTO_RCMM12 = 24, + RC_PROTO_RCMM24 = 25, + RC_PROTO_RCMM32 = 26, }; #endif -- cgit v1.2.3 From fadccd8fc2d06cf7fd222245d7e04b00fae946cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Feb 2019 09:37:13 +0100 Subject: nvme_ioctl.h: remove duplicate GPL boilerplate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have a ЅPDX header, so no need to duplicate the information. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- include/uapi/linux/nvme_ioctl.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index 6e74b1eaf541..1c215ea1798e 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -2,15 +2,6 @@ /* * Definitions for the NVM Express ioctl interface * Copyright (c) 2011-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #ifndef _UAPI_LINUX_NVME_IOCTL_H -- cgit v1.2.3 From 61697a6abd24acba941359c6268a94f4afe4a53d Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 18 Jan 2019 14:19:26 -0500 Subject: dm: eliminate 'split_discard_bios' flag from DM target interface There is no need to have DM core split discards on behalf of a DM target now that blk_queue_split() handles splitting discards based on the queue_limits. A DM target just needs to set max_discard_sectors, discard_granularity, etc, in queue_limits. Signed-off-by: Mike Snitzer --- include/uapi/linux/dm-ioctl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index d1e49514977b..f396a82dfd3e 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -270,9 +270,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 39 +#define DM_VERSION_MINOR 40 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2018-04-03)" +#define DM_VERSION_EXTRA "-ioctl (2019-01-18)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From e5567f5f67621877726f99be040af9fbedda37dc Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Tue, 19 Feb 2019 11:04:51 -0800 Subject: PCI/ATS: Add pci_prg_resp_pasid_required() interface. Return the PRG Response PASID Required bit in the Page Request Status Register. As per PCIe spec r4.0, sec 10.5.2.3, if this bit is Set, the device expects a PASID TLP Prefix on PRG Response Messages when the corresponding Page Requests had a PASID TLP Prefix. If Clear, the device does not expect PASID TLP Prefixes on any PRG Response Message, and the device behavior is undefined if the device receives a PRG Response Message with a PASID TLP Prefix. Also the device behavior is undefined if this bit is Set and the device receives a PRG Response Message with no PASID TLP Prefix when the corresponding Page Requests had a PASID TLP Prefix. This function will be used by drivers like IOMMU, if it is required to check the status of the PRG Response PASID Required bit before enabling the PASID support of the device. Cc: Ashok Raj Cc: Jacob Pan Cc: Keith Busch Suggested-by: Ashok Raj Signed-off-by: Kuppuswamy Sathyanarayanan Acked-by: Bjorn Helgaas Signed-off-by: Joerg Roedel --- include/uapi/linux/pci_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index e1e9888c85e6..898be572b010 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -880,6 +880,7 @@ #define PCI_PRI_STATUS_RF 0x001 /* Response Failure */ #define PCI_PRI_STATUS_UPRGI 0x002 /* Unexpected PRG index */ #define PCI_PRI_STATUS_STOPPED 0x100 /* PRI Stopped */ +#define PCI_PRI_STATUS_PASID 0x8000 /* PRG Response PASID Required */ #define PCI_PRI_MAX_REQ 0x08 /* PRI max reqs supported */ #define PCI_PRI_ALLOC_REQ 0x0c /* PRI max reqs allowed */ #define PCI_EXT_CAP_PRI_SIZEOF 16 -- cgit v1.2.3 From 8c938ddc6df3bbe72809db1be6c9f3af83f5d7a9 Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Tue, 19 Feb 2019 11:06:09 -0800 Subject: PCI/ATS: Add pci_ats_page_aligned() interface Return the Page Aligned Request bit in the ATS Capability Register. As per PCIe spec r4.0, sec 10.5.1.2, if the Page Aligned Request bit is set, it indicates the Untranslated Addresses generated by the device are always aligned to a 4096 byte boundary. An IOMMU that can only translate page-aligned addresses can only be used with devices that always produce aligned Untranslated Addresses. This interface will be used by drivers for such IOMMUs to determine whether devices can use the ATS service. Cc: Ashok Raj Cc: Jacob Pan Cc: Keith Busch Suggested-by: Ashok Raj Signed-off-by: Kuppuswamy Sathyanarayanan Acked-by: Bjorn Helgaas Signed-off-by: Joerg Roedel --- include/uapi/linux/pci_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 898be572b010..5c98133f2c94 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -866,6 +866,7 @@ #define PCI_ATS_CAP 0x04 /* ATS Capability Register */ #define PCI_ATS_CAP_QDEP(x) ((x) & 0x1f) /* Invalidate Queue Depth */ #define PCI_ATS_MAX_QDEP 32 /* Max Invalidate Queue Depth */ +#define PCI_ATS_CAP_PAGE_ALIGNED 0x0020 /* Page Aligned Request */ #define PCI_ATS_CTRL 0x06 /* ATS Control Register */ #define PCI_ATS_CTRL_ENABLE 0x8000 /* ATS Enable */ #define PCI_ATS_CTRL_STU(x) ((x) & 0x1f) /* Smallest Translation Unit */ -- cgit v1.2.3 From 2b188cc1bb857a9d4701ae59aa7768b5124e262e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 7 Jan 2019 10:46:33 -0700 Subject: Add io_uring IO interface The submission queue (SQ) and completion queue (CQ) rings are shared between the application and the kernel. This eliminates the need to copy data back and forth to submit and complete IO. IO submissions use the io_uring_sqe data structure, and completions are generated in the form of io_uring_cqe data structures. The SQ ring is an index into the io_uring_sqe array, which makes it possible to submit a batch of IOs without them being contiguous in the ring. The CQ ring is always contiguous, as completion events are inherently unordered, and hence any io_uring_cqe entry can point back to an arbitrary submission. Two new system calls are added for this: io_uring_setup(entries, params) Sets up an io_uring instance for doing async IO. On success, returns a file descriptor that the application can mmap to gain access to the SQ ring, CQ ring, and io_uring_sqes. io_uring_enter(fd, to_submit, min_complete, flags, sigset, sigsetsize) Initiates IO against the rings mapped to this fd, or waits for them to complete, or both. The behavior is controlled by the parameters passed in. If 'to_submit' is non-zero, then we'll try and submit new IO. If IORING_ENTER_GETEVENTS is set, the kernel will wait for 'min_complete' events, if they aren't already available. It's valid to set IORING_ENTER_GETEVENTS and 'min_complete' == 0 at the same time, this allows the kernel to return already completed events without waiting for them. This is useful only for polling, as for IRQ driven IO, the application can just check the CQ ring without entering the kernel. With this setup, it's possible to do async IO with a single system call. Future developments will enable polled IO with this interface, and polled submission as well. The latter will enable an application to do IO without doing ANY system calls at all. For IRQ driven IO, an application only needs to enter the kernel for completions if it wants to wait for them to occur. Each io_uring is backed by a workqueue, to support buffered async IO as well. We will only punt to an async context if the command would need to wait for IO on the device side. Any data that can be accessed directly in the page cache is done inline. This avoids the slowness issue of usual threadpools, since cached data is accessed as quickly as a sync interface. Sample application: http://git.kernel.dk/cgit/fio/plain/t/io_uring.c Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 95 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 include/uapi/linux/io_uring.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h new file mode 100644 index 000000000000..ac692823d6f4 --- /dev/null +++ b/include/uapi/linux/io_uring.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Header file for the io_uring interface. + * + * Copyright (C) 2019 Jens Axboe + * Copyright (C) 2019 Christoph Hellwig + */ +#ifndef LINUX_IO_URING_H +#define LINUX_IO_URING_H + +#include +#include + +/* + * IO submission data structure (Submission Queue Entry) + */ +struct io_uring_sqe { + __u8 opcode; /* type of operation for this sqe */ + __u8 flags; /* as of now unused */ + __u16 ioprio; /* ioprio for the request */ + __s32 fd; /* file descriptor to do IO on */ + __u64 off; /* offset into file */ + __u64 addr; /* pointer to buffer or iovecs */ + __u32 len; /* buffer size or number of iovecs */ + union { + __kernel_rwf_t rw_flags; + __u32 __resv; + }; + __u64 user_data; /* data to be passed back at completion time */ + __u64 __pad2[3]; +}; + +#define IORING_OP_NOP 0 +#define IORING_OP_READV 1 +#define IORING_OP_WRITEV 2 + +/* + * IO completion data structure (Completion Queue Entry) + */ +struct io_uring_cqe { + __u64 user_data; /* sqe->data submission passed back */ + __s32 res; /* result code for this event */ + __u32 flags; +}; + +/* + * Magic offsets for the application to mmap the data it needs + */ +#define IORING_OFF_SQ_RING 0ULL +#define IORING_OFF_CQ_RING 0x8000000ULL +#define IORING_OFF_SQES 0x10000000ULL + +/* + * Filled with the offset for mmap(2) + */ +struct io_sqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 flags; + __u32 dropped; + __u32 array; + __u32 resv1; + __u64 resv2; +}; + +struct io_cqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 overflow; + __u32 cqes; + __u64 resv[2]; +}; + +/* + * io_uring_enter(2) flags + */ +#define IORING_ENTER_GETEVENTS (1U << 0) + +/* + * Passed in for io_uring_setup(2). Copied back with updated info on success + */ +struct io_uring_params { + __u32 sq_entries; + __u32 cq_entries; + __u32 flags; + __u32 resv[7]; + struct io_sqring_offsets sq_off; + struct io_cqring_offsets cq_off; +}; + +#endif -- cgit v1.2.3 From c992fe2925d776be066d9f6cc13f9ea11d78b657 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Jan 2019 09:43:02 -0700 Subject: io_uring: add fsync support Add a new fsync opcode, which either syncs a range if one is passed, or the whole file if the offset and length fields are both cleared to zero. A flag is provided to use fdatasync semantics, that is only force out metadata which is required to retrieve the file data, but not others like metadata. Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ac692823d6f4..4589d56d0b68 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -24,7 +24,7 @@ struct io_uring_sqe { __u32 len; /* buffer size or number of iovecs */ union { __kernel_rwf_t rw_flags; - __u32 __resv; + __u32 fsync_flags; }; __u64 user_data; /* data to be passed back at completion time */ __u64 __pad2[3]; @@ -33,6 +33,12 @@ struct io_uring_sqe { #define IORING_OP_NOP 0 #define IORING_OP_READV 1 #define IORING_OP_WRITEV 2 +#define IORING_OP_FSYNC 3 + +/* + * sqe->fsync_flags + */ +#define IORING_FSYNC_DATASYNC (1U << 0) /* * IO completion data structure (Completion Queue Entry) -- cgit v1.2.3 From def596e9557c91d9846fc4d84d26f2c564644416 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 9 Jan 2019 08:59:42 -0700 Subject: io_uring: support for IO polling Add support for a polled io_uring instance. When a read or write is submitted to a polled io_uring, the application must poll for completions on the CQ ring through io_uring_enter(2). Polled IO may not generate IRQ completions, hence they need to be actively found by the application itself. To use polling, io_uring_setup() must be used with the IORING_SETUP_IOPOLL flag being set. It is illegal to mix and match polled and non-polled IO on an io_uring. Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 4589d56d0b68..5c457ea396e6 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -30,6 +30,11 @@ struct io_uring_sqe { __u64 __pad2[3]; }; +/* + * io_uring_setup() flags + */ +#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */ + #define IORING_OP_NOP 0 #define IORING_OP_READV 1 #define IORING_OP_WRITEV 2 -- cgit v1.2.3 From edafccee56ff31678a091ddb7219aba9b28bc3cb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 9 Jan 2019 09:16:05 -0700 Subject: io_uring: add support for pre-mapped user IO buffers If we have fixed user buffers, we can map them into the kernel when we setup the io_uring. That avoids the need to do get_user_pages() for each and every IO. To utilize this feature, the application must call io_uring_register() after having setup an io_uring instance, passing in IORING_REGISTER_BUFFERS as the opcode. The argument must be a pointer to an iovec array, and the nr_args should contain how many iovecs the application wishes to map. If successful, these buffers are now mapped into the kernel, eligible for IO. To use these fixed buffers, the application must use the IORING_OP_READ_FIXED and IORING_OP_WRITE_FIXED opcodes, and then set sqe->index to the desired buffer index. sqe->addr..sqe->addr+seq->len must point to somewhere inside the indexed buffer. The application may register buffers throughout the lifetime of the io_uring instance. It can call io_uring_register() with IORING_UNREGISTER_BUFFERS as the opcode to unregister the current set of buffers, and then register a new set. The application need not unregister buffers explicitly before shutting down the io_uring instance. It's perfectly valid to setup a larger buffer, and then sometimes only use parts of it for an IO. As long as the range is within the originally mapped region, it will work just fine. For now, buffers must not be file backed. If file backed buffers are passed in, the registration will fail with -1/EOPNOTSUPP. This restriction may be relaxed in the future. RLIMIT_MEMLOCK is used to check how much memory we can pin. A somewhat arbitrary 1G per buffer size is also imposed. Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 5c457ea396e6..cf28f7a11f12 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -27,7 +27,10 @@ struct io_uring_sqe { __u32 fsync_flags; }; __u64 user_data; /* data to be passed back at completion time */ - __u64 __pad2[3]; + union { + __u16 buf_index; /* index into fixed buffers, if used */ + __u64 __pad2[3]; + }; }; /* @@ -39,6 +42,8 @@ struct io_uring_sqe { #define IORING_OP_READV 1 #define IORING_OP_WRITEV 2 #define IORING_OP_FSYNC 3 +#define IORING_OP_READ_FIXED 4 +#define IORING_OP_WRITE_FIXED 5 /* * sqe->fsync_flags @@ -103,4 +108,10 @@ struct io_uring_params { struct io_cqring_offsets cq_off; }; +/* + * io_uring_register(2) opcodes and arguments + */ +#define IORING_REGISTER_BUFFERS 0 +#define IORING_UNREGISTER_BUFFERS 1 + #endif -- cgit v1.2.3 From 6b06314c47e141031be043539900d80d2c7ba10f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Jan 2019 22:13:58 -0700 Subject: io_uring: add file set registration We normally have to fget/fput for each IO we do on a file. Even with the batching we do, the cost of the atomic inc/dec of the file usage count adds up. This adds IORING_REGISTER_FILES, and IORING_UNREGISTER_FILES opcodes for the io_uring_register(2) system call. The arguments passed in must be an array of __s32 holding file descriptors, and nr_args should hold the number of file descriptors the application wishes to pin for the duration of the io_uring instance (or until IORING_UNREGISTER_FILES is called). When used, the application must set IOSQE_FIXED_FILE in the sqe->flags member. Then, instead of setting sqe->fd to the real fd, it sets sqe->fd to the index in the array passed in to IORING_REGISTER_FILES. Files are automatically unregistered when the io_uring instance is torn down. An application need only unregister if it wishes to register a new set of fds. Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index cf28f7a11f12..6257478d55e9 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -16,7 +16,7 @@ */ struct io_uring_sqe { __u8 opcode; /* type of operation for this sqe */ - __u8 flags; /* as of now unused */ + __u8 flags; /* IOSQE_ flags */ __u16 ioprio; /* ioprio for the request */ __s32 fd; /* file descriptor to do IO on */ __u64 off; /* offset into file */ @@ -33,6 +33,11 @@ struct io_uring_sqe { }; }; +/* + * sqe->flags + */ +#define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */ + /* * io_uring_setup() flags */ @@ -113,5 +118,7 @@ struct io_uring_params { */ #define IORING_REGISTER_BUFFERS 0 #define IORING_UNREGISTER_BUFFERS 1 +#define IORING_REGISTER_FILES 2 +#define IORING_UNREGISTER_FILES 3 #endif -- cgit v1.2.3 From 6c271ce2f1d572f7fa225700a13cfe7ced492434 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Jan 2019 11:22:30 -0700 Subject: io_uring: add submission polling This enables an application to do IO, without ever entering the kernel. By using the SQ ring to fill in new sqes and watching for completions on the CQ ring, we can submit and reap IOs without doing a single system call. The kernel side thread will poll for new submissions, and in case of HIPRI/polled IO, it'll also poll for completions. By default, we allow 1 second of active spinning. This can by changed by passing in a different grace period at io_uring_register(2) time. If the thread exceeds this idle time without having any work to do, it will set: sq_ring->flags |= IORING_SQ_NEED_WAKEUP. The application will have to call io_uring_enter() to start things back up again. If IO is kept busy, that will never be needed. Basically an application that has this feature enabled will guard it's io_uring_enter(2) call with: read_barrier(); if (*sq_ring->flags & IORING_SQ_NEED_WAKEUP) io_uring_enter(fd, 0, 0, IORING_ENTER_SQ_WAKEUP); instead of calling it unconditionally. It's mandatory to use fixed files with this feature. Failure to do so will result in the application getting an -EBADF CQ entry when submitting IO. Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6257478d55e9..0ec74bab8dbe 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -42,6 +42,8 @@ struct io_uring_sqe { * io_uring_setup() flags */ #define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */ +#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */ +#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ #define IORING_OP_NOP 0 #define IORING_OP_READV 1 @@ -86,6 +88,11 @@ struct io_sqring_offsets { __u64 resv2; }; +/* + * sq_ring->flags + */ +#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ + struct io_cqring_offsets { __u32 head; __u32 tail; @@ -100,6 +107,7 @@ struct io_cqring_offsets { * io_uring_enter(2) flags */ #define IORING_ENTER_GETEVENTS (1U << 0) +#define IORING_ENTER_SQ_WAKEUP (1U << 1) /* * Passed in for io_uring_setup(2). Copied back with updated info on success @@ -108,7 +116,9 @@ struct io_uring_params { __u32 sq_entries; __u32 cq_entries; __u32 flags; - __u32 resv[7]; + __u32 sq_thread_cpu; + __u32 sq_thread_idle; + __u32 resv[5]; struct io_sqring_offsets sq_off; struct io_cqring_offsets cq_off; }; -- cgit v1.2.3 From 221c5eb2338232f7340386de1c43decc32682e58 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 Jan 2019 09:41:58 -0700 Subject: io_uring: add support for IORING_OP_POLL This is basically a direct port of bfe4037e722e, which implements a one-shot poll command through aio. Description below is based on that commit as well. However, instead of adding a POLL command and relying on io_cancel(2) to remove it, we mimic the epoll(2) interface of having a command to add a poll notification, IORING_OP_POLL_ADD, and one to remove it again, IORING_OP_POLL_REMOVE. To poll for a file descriptor the application should submit an sqe of type IORING_OP_POLL. It will poll the fd for the events specified in the poll_events field. Unlike poll or epoll without EPOLLONESHOT this interface always works in one shot mode, that is once the sqe is completed, it will have to be resubmitted. Reviewed-by: Hannes Reinecke Based-on-code-from: Christoph Hellwig Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 0ec74bab8dbe..e23408692118 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -25,6 +25,7 @@ struct io_uring_sqe { union { __kernel_rwf_t rw_flags; __u32 fsync_flags; + __u16 poll_events; }; __u64 user_data; /* data to be passed back at completion time */ union { @@ -51,6 +52,8 @@ struct io_uring_sqe { #define IORING_OP_FSYNC 3 #define IORING_OP_READ_FIXED 4 #define IORING_OP_WRITE_FIXED 5 +#define IORING_OP_POLL_ADD 6 +#define IORING_OP_POLL_REMOVE 7 /* * sqe->fsync_flags -- cgit v1.2.3 From dbafd7ddd62369b2f3926ab847cbf8fc40e800b7 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 12 Mar 2019 10:23:04 -0700 Subject: bpf: Add bpf_get_listener_sock(struct bpf_sock *sk) helper Add a new helper "struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)" which returns a bpf_sock in TCP_LISTEN state. It will trace back to the listener sk from a request_sock if possible. It returns NULL for all other cases. No reference is taken because the helper ensures the sk is in SOCK_RCU_FREE (where the TCP_LISTEN sock should be in). Hence, bpf_sk_release() is unnecessary and the verifier does not allow bpf_sk_release(listen_sk) to be called either. The following is also allowed because the bpf_prog is run under rcu_read_lock(): sk = bpf_sk_lookup_tcp(); /* if (!sk) { ... } */ listen_sk = bpf_get_listener_sock(sk); /* if (!listen_sk) { ... } */ bpf_sk_release(sk); src_port = listen_sk->src_port; /* Allowed */ Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3c38ac9a92a7..983b25cb608d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2366,6 +2366,14 @@ union bpf_attr { * current value is ect (ECN capable). Works with IPv6 and IPv4. * Return * 1 if set, 0 if not set. + * + * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) + * Description + * Return a **struct bpf_sock** pointer in TCP_LISTEN state. + * bpf_sk_release() is unnecessary and not allowed. + * Return + * A **struct bpf_sock** pointer on success, or NULL in + * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2465,7 +2473,8 @@ union bpf_attr { FN(spin_unlock), \ FN(sk_fullsock), \ FN(tcp_sock), \ - FN(skb_ecn_set_ce), + FN(skb_ecn_set_ce), \ + FN(get_listener_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 62369db2df8d1edfa040878203b446e023a16802 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 14 Mar 2019 12:38:39 +0000 Subject: bpf: fix documentation for eBPF helpers Another round of minor fixes for the documentation of the BPF helpers located in the UAPI bpf.h header file. Changes include: - Moving around description of some helpers, to keep the descriptions in the same order as helpers are declared (bpf_map_push_elem(), leftover from commit 90b1023f68c7 ("bpf: fix documentation for eBPF helpers"), bpf_rc_keydown(), and bpf_skb_ancestor_cgroup_id()). - Fixing typos ("contex" -> "context"). - Harmonising return types ("void* " -> "void *", "uint64_t" -> "u64"). - Addition of the "bpf_" prefix to bpf_get_storage(). - Light additions of RST markup on some keywords. - Empty line deletion between description and return value for bpf_tcp_sock(). - Edit for the description for bpf_skb_ecn_set_ce() (capital letters, acronym expansion, no effect if ECT not set, more details on return value). Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 128 ++++++++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 63 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 983b25cb608d..4465d00d3493 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -502,16 +502,6 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) - * Description - * Push an element *value* in *map*. *flags* is one of: - * - * **BPF_EXIST** - * If the queue/stack is full, the oldest element is removed to - * make room for this. - * Return - * 0 on success, or a negative error in case of failure. - * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1435,14 +1425,14 @@ union bpf_attr { * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_addr** contex. + * *skb*, but gets socket from **struct bpf_sock_addr** context. * Return * A 8-byte long non-decreasing number. * * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_ops** contex. + * *skb*, but gets socket from **struct bpf_sock_ops** context. * Return * A 8-byte long non-decreasing number. * @@ -2098,52 +2088,52 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * int bpf_rc_repeat(void *ctx) * Description * This helper is used in programs implementing IR decoding, to - * report a successfully decoded key press with *scancode*, - * *toggle* value in the given *protocol*. The scancode will be - * translated to a keycode using the rc keymap, and reported as - * an input key down event. After a period a key up event is - * generated. This period can be extended by calling either - * **bpf_rc_keydown**\ () again with the same values, or calling - * **bpf_rc_repeat**\ (). + * report a successfully decoded repeat key message. This delays + * the generation of a key up event for previously generated + * key down event. * - * Some protocols include a toggle bit, in case the button was - * released and pressed again between consecutive scancodes. + * Some IR protocols like NEC have a special IR message for + * repeating last button, for when a button is held down. * * The *ctx* should point to the lirc sample as passed into * the program. * - * The *protocol* is the decoded protocol number (see - * **enum rc_proto** for some predefined values). - * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * Return * 0 * - * int bpf_rc_repeat(void *ctx) + * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) * Description * This helper is used in programs implementing IR decoding, to - * report a successfully decoded repeat key message. This delays - * the generation of a key up event for previously generated - * key down event. + * report a successfully decoded key press with *scancode*, + * *toggle* value in the given *protocol*. The scancode will be + * translated to a keycode using the rc keymap, and reported as + * an input key down event. After a period a key up event is + * generated. This period can be extended by calling either + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). * - * Some IR protocols like NEC have a special IR message for - * repeating last button, for when a button is held down. + * Some protocols include a toggle bit, in case the button was + * released and pressed again between consecutive scancodes. * * The *ctx* should point to the lirc sample as passed into * the program. * + * The *protocol* is the decoded protocol number (see + * **enum rc_proto** for some predefined values). + * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * Return * 0 * - * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb) + * u64 bpf_skb_cgroup_id(struct sk_buff *skb) * Description * Return the cgroup v2 id of the socket associated with the *skb*. * This is roughly similar to the **bpf_get_cgroup_classid**\ () @@ -2159,30 +2149,12 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) - * Description - * Return id of cgroup v2 that is ancestor of cgroup associated - * with the *skb* at the *ancestor_level*. The root cgroup is at - * *ancestor_level* zero and each step down the hierarchy - * increments the level. If *ancestor_level* == level of cgroup - * associated with *skb*, then return value will be same as that - * of **bpf_skb_cgroup_id**\ (). - * - * The helper is useful to implement policies based on cgroups - * that are upper in hierarchy than immediate cgroup associated - * with *skb*. - * - * The format of returned id and helper limitations are same as in - * **bpf_skb_cgroup_id**\ (). - * Return - * The id is returned or 0 in case the id could not be retrieved. - * * u64 bpf_get_current_cgroup_id(void) * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. * - * void* get_local_storage(void *map, u64 flags) + * void *bpf_get_local_storage(void *map, u64 flags) * Description * Get the pointer to the local storage area. * The type and the size of the local storage is defined @@ -2209,6 +2181,24 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) + * Description + * Return id of cgroup v2 that is ancestor of cgroup associated + * with the *skb* at the *ancestor_level*. The root cgroup is at + * *ancestor_level* zero and each step down the hierarchy + * increments the level. If *ancestor_level* == level of cgroup + * associated with *skb*, then return value will be same as that + * of **bpf_skb_cgroup_id**\ (). + * + * The helper is useful to implement policies based on cgroups + * that are upper in hierarchy than immediate cgroup associated + * with *skb*. + * + * The format of returned id and helper limitations are same as in + * **bpf_skb_cgroup_id**\ (). + * Return + * The id is returned or 0 in case the id could not be retrieved. + * * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child @@ -2289,6 +2279,16 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is + * removed to make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_map_pop_elem(struct bpf_map *map, void *value) * Description * Pop an element from *map*. @@ -2346,33 +2346,35 @@ union bpf_attr { * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_sock** pointer such - * that all the fields in bpf_sock can be accessed. + * that all the fields in this **bpf_sock** can be accessed. * Return - * A **struct bpf_sock** pointer on success, or NULL in + * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. * * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_tcp_sock** pointer from a * **struct bpf_sock** pointer. - * * Return - * A **struct bpf_tcp_sock** pointer on success, or NULL in + * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * * int bpf_skb_ecn_set_ce(struct sk_buf *skb) - * Description - * Sets ECN of IP header to ce (congestion encountered) if - * current value is ect (ECN capable). Works with IPv6 and IPv4. - * Return - * 1 if set, 0 if not set. + * Description + * Set ECN (Explicit Congestion Notification) field of IP header + * to **CE** (Congestion Encountered) if current value is **ECT** + * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 + * and IPv4. + * Return + * 1 if the **CE** flag is set (either by the current helper call + * or because it was already present), 0 if it is not set. * * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) * Description - * Return a **struct bpf_sock** pointer in TCP_LISTEN state. - * bpf_sk_release() is unnecessary and not allowed. + * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. + * **bpf_sk_release**\ () is unnecessary and not allowed. * Return - * A **struct bpf_sock** pointer on success, or NULL in + * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ -- cgit v1.2.3 From 0eb0978528d47699edd091dc2c337952ad8da436 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 14 Mar 2019 12:38:40 +0000 Subject: bpf: add documentation for helpers bpf_spin_lock(), bpf_spin_unlock() Add documentation for the BPF spinlock-related helpers to the doc in bpf.h. I added the constraints and restrictions coming with the use of spinlocks for BPF: not all of it is directly related to the use of the helper, but I thought it would be nice for users to find them in the man page. This list of restrictions is nearly a verbatim copy of the list in Alexei's commit log for those helpers. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4465d00d3493..929c8e537a14 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2343,6 +2343,61 @@ union bpf_attr { * Return * 0 * + * int bpf_spin_lock(struct bpf_spin_lock *lock) + * Description + * Acquire a spinlock represented by the pointer *lock*, which is + * stored as part of a value of a map. Taking the lock allows to + * safely update the rest of the fields in that value. The + * spinlock can (and must) later be released with a call to + * **bpf_spin_unlock**\ (\ *lock*\ ). + * + * Spinlocks in BPF programs come with a number of restrictions + * and constraints: + * + * * **bpf_spin_lock** objects are only allowed inside maps of + * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this + * list could be extended in the future). + * * BTF description of the map is mandatory. + * * The BPF program can take ONE lock at a time, since taking two + * or more could cause dead locks. + * * Only one **struct bpf_spin_lock** is allowed per map element. + * * When the lock is taken, calls (either BPF to BPF or helpers) + * are not allowed. + * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not + * allowed inside a spinlock-ed region. + * * The BPF program MUST call **bpf_spin_unlock**\ () to release + * the lock, on all execution paths, before it returns. + * * The BPF program can access **struct bpf_spin_lock** only via + * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () + * helpers. Loading or storing data into the **struct + * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. + * * To use the **bpf_spin_lock**\ () helper, the BTF description + * of the map value must be a struct and have **struct + * bpf_spin_lock** *anyname*\ **;** field at the top level. + * Nested lock inside another struct is not allowed. + * * The **struct bpf_spin_lock** *lock* field in a map value must + * be aligned on a multiple of 4 bytes in that value. + * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy + * the **bpf_spin_lock** field to user space. + * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from + * a BPF program, do not update the **bpf_spin_lock** field. + * * **bpf_spin_lock** cannot be on the stack or inside a + * networking packet (it can only be inside of a map values). + * * **bpf_spin_lock** is available to root only. + * * Tracing programs and socket filter programs cannot use + * **bpf_spin_lock**\ () due to insufficient preemption checks + * (but this may change in the future). + * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. + * Return + * 0 + * + * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * Description + * Release the *lock* previously locked by a call to + * **bpf_spin_lock**\ (\ *lock*\ ). + * Return + * 0 + * * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_sock** pointer such -- cgit v1.2.3 From 0532a1b0d045115521a93acf28f1270df89ad806 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 22 Mar 2019 09:19:34 +0100 Subject: virt: vbox: Implement passing requestor info to the host for VirtualBox 6.0.x VirtualBox 6.0.x has a new feature where the guest kernel driver passes info about the origin of the request (e.g. userspace or kernelspace) to the hypervisor. If we do not pass this information then when running the 6.0.x userspace guest-additions tools on a 6.0.x host, some requests will get denied with a VERR_VERSION_MISMATCH error, breaking vboxservice.service and the mounting of shared folders marked to be auto-mounted. This commit implements passing the requestor info to the host, fixing this. Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/vbox_vmmdev_types.h | 60 ++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vbox_vmmdev_types.h b/include/uapi/linux/vbox_vmmdev_types.h index 0e68024f36c7..26f39816af14 100644 --- a/include/uapi/linux/vbox_vmmdev_types.h +++ b/include/uapi/linux/vbox_vmmdev_types.h @@ -102,6 +102,66 @@ enum vmmdev_request_type { #define VMMDEVREQ_HGCM_CALL VMMDEVREQ_HGCM_CALL32 #endif +/* vmmdev_request_header.requestor defines */ + +/* Requestor user not given. */ +#define VMMDEV_REQUESTOR_USR_NOT_GIVEN 0x00000000 +/* The kernel driver (vboxguest) is the requestor. */ +#define VMMDEV_REQUESTOR_USR_DRV 0x00000001 +/* Some other kernel driver is the requestor. */ +#define VMMDEV_REQUESTOR_USR_DRV_OTHER 0x00000002 +/* The root or a admin user is the requestor. */ +#define VMMDEV_REQUESTOR_USR_ROOT 0x00000003 +/* Regular joe user is making the request. */ +#define VMMDEV_REQUESTOR_USR_USER 0x00000006 +/* User classification mask. */ +#define VMMDEV_REQUESTOR_USR_MASK 0x00000007 + +/* Kernel mode request. Note this is 0, check for !USERMODE instead. */ +#define VMMDEV_REQUESTOR_KERNEL 0x00000000 +/* User mode request. */ +#define VMMDEV_REQUESTOR_USERMODE 0x00000008 +/* User or kernel mode classification mask. */ +#define VMMDEV_REQUESTOR_MODE_MASK 0x00000008 + +/* Don't know the physical console association of the requestor. */ +#define VMMDEV_REQUESTOR_CON_DONT_KNOW 0x00000000 +/* + * The request originates with a process that is NOT associated with the + * physical console. + */ +#define VMMDEV_REQUESTOR_CON_NO 0x00000010 +/* Requestor process is associated with the physical console. */ +#define VMMDEV_REQUESTOR_CON_YES 0x00000020 +/* Console classification mask. */ +#define VMMDEV_REQUESTOR_CON_MASK 0x00000030 + +/* Requestor is member of special VirtualBox user group. */ +#define VMMDEV_REQUESTOR_GRP_VBOX 0x00000080 + +/* Note: trust level is for windows guests only, linux always uses not-given */ +/* Requestor trust level: Unspecified */ +#define VMMDEV_REQUESTOR_TRUST_NOT_GIVEN 0x00000000 +/* Requestor trust level: Untrusted (SID S-1-16-0) */ +#define VMMDEV_REQUESTOR_TRUST_UNTRUSTED 0x00001000 +/* Requestor trust level: Untrusted (SID S-1-16-4096) */ +#define VMMDEV_REQUESTOR_TRUST_LOW 0x00002000 +/* Requestor trust level: Medium (SID S-1-16-8192) */ +#define VMMDEV_REQUESTOR_TRUST_MEDIUM 0x00003000 +/* Requestor trust level: Medium plus (SID S-1-16-8448) */ +#define VMMDEV_REQUESTOR_TRUST_MEDIUM_PLUS 0x00004000 +/* Requestor trust level: High (SID S-1-16-12288) */ +#define VMMDEV_REQUESTOR_TRUST_HIGH 0x00005000 +/* Requestor trust level: System (SID S-1-16-16384) */ +#define VMMDEV_REQUESTOR_TRUST_SYSTEM 0x00006000 +/* Requestor trust level >= Protected (SID S-1-16-20480, S-1-16-28672) */ +#define VMMDEV_REQUESTOR_TRUST_PROTECTED 0x00007000 +/* Requestor trust level mask */ +#define VMMDEV_REQUESTOR_TRUST_MASK 0x00007000 + +/* Requestor is using the less trusted user device node (/dev/vboxuser) */ +#define VMMDEV_REQUESTOR_USER_DEVICE 0x00008000 + /** HGCM service location types. */ enum vmmdev_hgcm_service_location_type { VMMDEV_HGCM_LOC_INVALID = 0, -- cgit v1.2.3 From 3d9683cf3bfb6d4e4605a153958dfca7e18b52f2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 18 Mar 2019 18:08:12 +0900 Subject: KVM: export and iif KVM is supported I do not see any consistency about headers_install of and . According to my analysis of Linux 5.1-rc1, there are 3 groups: [1] Both and are exported alpha, arm, hexagon, mips, powerpc, s390, sparc, x86 [2] is exported, but is not arc, arm64, c6x, h8300, ia64, m68k, microblaze, nios2, openrisc, parisc, sh, unicore32, xtensa [3] Neither nor is exported csky, nds32, riscv This does not match to the actual KVM support. At least, [2] is half-baked. Nor do arch maintainers look like they care about this. For example, commit 0add53713b1c ("microblaze: Add missing kvm_para.h to Kbuild") exported to user-space in order to fix an in-kernel build error. We have two ways to make this consistent: [A] export both and for all architectures, irrespective of the KVM support [B] Match the header export of and to the KVM support My first attempt was [A] because the code looks cleaner, but Paolo suggested [B]. So, this commit goes with [B]. For most architectures, was moved to the kernel-space. I changed include/uapi/linux/Kbuild so that it checks generated asm/kvm_para.h as well as check-in ones. After this commit, there will be two groups: [1] Both and are exported arm, arm64, mips, powerpc, s390, x86 [2] Neither nor is exported alpha, arc, c6x, csky, h8300, hexagon, ia64, m68k, microblaze, nds32, nios2, openrisc, parisc, riscv, sh, sparc, unicore32, xtensa Signed-off-by: Masahiro Yamada Acked-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- include/uapi/linux/Kbuild | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 5f24b50c9e88..059dc2bedaf6 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -7,5 +7,7 @@ no-export-headers += kvm.h endif ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm_para.h),) +ifeq ($(wildcard $(objtree)/arch/$(SRCARCH)/include/generated/uapi/asm/kvm_para.h),) no-export-headers += kvm_para.h endif +endif -- cgit v1.2.3