From 1693c5db6ab8262e6f5263f9d211855959aa5acd Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Thu, 9 May 2024 14:18:33 -0700 Subject: net: Add additional bit to support clockid_t timestamp type tstamp_type is now set based on actual clockid_t compressed into 2 bits. To make the design scalable for future needs this commit bring in the change to extend the tstamp_type:1 to tstamp_type:2 to support other clockid_t timestamp. We now support CLOCK_TAI as part of tstamp_type as part of this commit with existing support CLOCK_MONOTONIC and CLOCK_REALTIME. Signed-off-by: Abhishek Chauhan Reviewed-by: Willem de Bruijn Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240509211834.3235191-3-quic_abchauha@quicinc.com Signed-off-by: Martin KaFai Lau --- include/uapi/linux/bpf.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 90706a47f6ff..25ea393cf084 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6207,12 +6207,17 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +/* The enum used in skb->tstamp_type. It specifies the clock type + * of the time stored in the skb->tstamp. + */ enum { - BPF_SKB_TSTAMP_UNSPEC, - BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ - /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, - * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC - * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */ + BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */ + BPF_SKB_CLOCK_REALTIME = 0, + BPF_SKB_CLOCK_MONOTONIC = 1, + BPF_SKB_CLOCK_TAI = 2, + /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle, + * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid. */ }; -- cgit v1.2.3 From ed7ee6a69f9289337af4835a908aa782263d4852 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 29 May 2024 10:39:01 +0200 Subject: statx: Update offset commentary for struct statx In commit 2a82bb02941f ("statx: stx_subvol"), a new member was added to struct statx, but the offset comment was not correct. Update it. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240529081725.3769290-1-john.g.garry@oracle.com Signed-off-by: Christian Brauner --- include/uapi/linux/stat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 67626d535316..95770941ee2c 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -126,8 +126,8 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ + __u64 stx_subvol; /* Subvolume identifier */ __u64 __spare3[11]; /* Spare space for future expansion */ /* 0x100 */ }; -- cgit v1.2.3 From 1d8491d3e726984343dd8c3cdbe2f2b47cfdd928 Mon Sep 17 00:00:00 2001 From: Paolo Pisati Date: Sat, 1 Jun 2024 17:32:54 +0200 Subject: m68k: amiga: Turn off Warp1260 interrupts during boot On an Amiga 1200 equipped with a Warp1260 accelerator, an interrupt storm coming from the accelerator board causes the machine to crash in local_irq_enable() or auto_irq_enable(). Disabling interrupts for the Warp1260 in amiga_parse_bootinfo() fixes the problem. Link: https://lore.kernel.org/r/ZkjwzVwYeQtyAPrL@amaterasu.local Cc: stable Signed-off-by: Paolo Pisati Reviewed-by: Michael Schmitz Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240601153254.186225-1-p.pisati@gmail.com Signed-off-by: Geert Uytterhoeven --- include/uapi/linux/zorro_ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/zorro_ids.h b/include/uapi/linux/zorro_ids.h index 6e574d7b7d79..393f2ee9c042 100644 --- a/include/uapi/linux/zorro_ids.h +++ b/include/uapi/linux/zorro_ids.h @@ -449,6 +449,9 @@ #define ZORRO_PROD_VMC_ISDN_BLASTER_Z2 ZORRO_ID(VMC, 0x01, 0) #define ZORRO_PROD_VMC_HYPERCOM_4 ZORRO_ID(VMC, 0x02, 0) +#define ZORRO_MANUF_CSLAB 0x1400 +#define ZORRO_PROD_CSLAB_WARP_1260 ZORRO_ID(CSLAB, 0x65, 0) + #define ZORRO_MANUF_INFORMATION 0x157C #define ZORRO_PROD_INFORMATION_ISDN_ENGINE_I ZORRO_ID(INFORMATION, 0x64, 0) -- cgit v1.2.3 From 1d17568e74dedbcb54d36af0662a15128297d681 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 30 May 2024 19:08:35 +0200 Subject: net/sched: cls_flower: add support for matching tunnel control flags extend cls_flower to match TUNNEL_FLAGS_PRESENT bits in tunnel metadata. Suggested-by: Ilya Maximets Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- include/uapi/linux/pkt_cls.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 229fc925ec3a..b6d38f5fd7c0 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -554,6 +554,9 @@ enum { TCA_FLOWER_KEY_SPI, /* be32 */ TCA_FLOWER_KEY_SPI_MASK, /* be32 */ + TCA_FLOWER_KEY_ENC_FLAGS, /* u32 */ + TCA_FLOWER_KEY_ENC_FLAGS_MASK, /* u32 */ + __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From 310ec03841a36e3f45fb528f0dfdfe5b9e84b037 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 5 Jun 2024 13:26:05 +1200 Subject: dma-buf: align fd_flags and heap_flags with dma_heap_allocation_data dma_heap_allocation_data defines the UAPI as follows: struct dma_heap_allocation_data { __u64 len; __u32 fd; __u32 fd_flags; __u64 heap_flags; }; However, dma_heap_buffer_alloc() casts both fd_flags and heap_flags into unsigned int. We're inconsistent with types in the non UAPI arguments. This patch fixes it. Signed-off-by: Barry Song Acked-by: John Stultz Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20240605012605.5341-1-21cnbao@gmail.com --- include/uapi/linux/dma-heap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dma-heap.h b/include/uapi/linux/dma-heap.h index 6f84fa08e074..a4cf716a49fa 100644 --- a/include/uapi/linux/dma-heap.h +++ b/include/uapi/linux/dma-heap.h @@ -19,7 +19,7 @@ #define DMA_HEAP_VALID_FD_FLAGS (O_CLOEXEC | O_ACCMODE) /* Currently no heap flags */ -#define DMA_HEAP_VALID_HEAP_FLAGS (0) +#define DMA_HEAP_VALID_HEAP_FLAGS (0ULL) /** * struct dma_heap_allocation_data - metadata passed from userspace for -- cgit v1.2.3 From 6fef518594bcb7e374f809717281bd02894929f8 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Thu, 25 Apr 2024 15:07:01 -0700 Subject: KVM: x86: Add a capability to configure bus frequency for APIC timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add KVM_CAP_X86_APIC_BUS_CYCLES_NS capability to configure the APIC bus clock frequency for APIC timer emulation. Allow KVM_ENABLE_CAPABILITY(KVM_CAP_X86_APIC_BUS_CYCLES_NS) to set the frequency in nanoseconds. When using this capability, the user space VMM should configure CPUID leaf 0x15 to advertise the frequency. Vishal reported that the TDX guest kernel expects a 25MHz APIC bus frequency but ends up getting interrupts at a significantly higher rate. The TDX architecture hard-codes the core crystal clock frequency to 25MHz and mandates exposing it via CPUID leaf 0x15. The TDX architecture does not allow the VMM to override the value. In addition, per Intel SDM: "The APIC timer frequency will be the processor’s bus clock or core crystal clock frequency (when TSC/core crystal clock ratio is enumerated in CPUID leaf 0x15) divided by the value specified in the divide configuration register." The resulting 25MHz APIC bus frequency conflicts with the KVM hardcoded APIC bus frequency of 1GHz. The KVM doesn't enumerate CPUID leaf 0x15 to the guest unless the user space VMM sets it using KVM_SET_CPUID. If the CPUID leaf 0x15 is enumerated, the guest kernel uses it as the APIC bus frequency. If not, the guest kernel measures the frequency based on other known timers like the ACPI timer or the legacy PIT. As reported by Vishal the TDX guest kernel expects a 25MHz timer frequency but gets timer interrupt more frequently due to the 1GHz frequency used by KVM. To ensure that the guest doesn't have a conflicting view of the APIC bus frequency, allow the userspace to tell KVM to use the same frequency that TDX mandates instead of the default 1Ghz. Reported-by: Vishal Annapurve Closes: https://lore.kernel.org/lkml/20231006011255.4163884-1-vannapurve@google.com Signed-off-by: Isaku Yamahata Reviewed-by: Rick Edgecombe Co-developed-by: Reinette Chatre Signed-off-by: Reinette Chatre Reviewed-by: Xiaoyao Li Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/6748a4c12269e756f0c48680da8ccc5367c31ce7.1714081726.git.reinette.chatre@intel.com Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d03842abae57..ec998e6b6555 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -917,6 +917,7 @@ struct kvm_enable_cap { #define KVM_CAP_MEMORY_ATTRIBUTES 233 #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 +#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 236 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3 From 62096c48394b49e326056a62780fb67b60edde91 Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Mon, 6 May 2024 17:49:16 +0900 Subject: media: v4l2-ctrls: Add average QP control Add a control V4L2_CID_MPEG_VIDEO_AVERAGE_QP to report the average QP value of the current encoded frame. The value applies to the last dequeued capture buffer. Signed-off-by: Ming Qian Reviewed-by: Hans Verkuil Signed-off-by: Sebastian Fricke Signed-off-by: Hans Verkuil --- include/uapi/linux/v4l2-controls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 99c3f5e99da7..974fd254e573 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -898,6 +898,8 @@ enum v4l2_mpeg_video_av1_level { V4L2_MPEG_VIDEO_AV1_LEVEL_7_3 = 23 }; +#define V4L2_CID_MPEG_VIDEO_AVERAGE_QP (V4L2_CID_CODEC_BASE + 657) + /* MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */ #define V4L2_CID_CODEC_CX2341X_BASE (V4L2_CTRL_CLASS_CODEC | 0x1000) #define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE (V4L2_CID_CODEC_CX2341X_BASE+0) -- cgit v1.2.3 From 85542adb65ecd7cc0e442e8befef74f2ed07f5f6 Mon Sep 17 00:00:00 2001 From: Thomas Prescher Date: Wed, 8 May 2024 15:25:01 +0200 Subject: KVM: x86: Add KVM_RUN_X86_GUEST_MODE kvm_run flag When a vCPU is interrupted by a signal while running a nested guest, KVM will exit to userspace with L2 state. However, userspace has no way to know whether it sees L1 or L2 state (besides calling KVM_GET_STATS_FD, which does not have a stable ABI). This causes multiple problems: The simplest one is L2 state corruption when userspace marks the sregs as dirty. See this mailing list thread [1] for a complete discussion. Another problem is that if userspace decides to continue by emulating instructions, it will unknowingly emulate with L2 state as if L1 doesn't exist, which can be considered a weird guest escape. Introduce a new flag, KVM_RUN_X86_GUEST_MODE, in the kvm_run data structure, which is set when the vCPU exited while running a nested guest. Also introduce a new capability, KVM_CAP_X86_GUEST_MODE, to advertise the functionality to userspace. [1] https://lore.kernel.org/kvm/20240416123558.212040-1-julian.stecklina@cyberus-technology.de/T/#m280aadcb2e10ae02c191a7dc4ed4b711a74b1f55 Signed-off-by: Thomas Prescher Signed-off-by: Julian Stecklina Link: https://lore.kernel.org/r/20240508132502.184428-1-julian.stecklina@cyberus-technology.de Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ec998e6b6555..a6ac00ec77ad 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -918,6 +918,7 @@ struct kvm_enable_cap { #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 236 +#define KVM_CAP_X86_GUEST_MODE 237 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3 From f328a26eeb5380bc74e58cb9c3280a4908452df7 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 3 Jun 2024 17:55:55 -0400 Subject: dlm: introduce DLM_LSFL_SOFTIRQ_SAFE Introduce a new external lockspace flag DLM_LSFL_SOFTIRQ_SAFE. A lockspace user will set this flag if it can handle dlm running the callback functions from softirq context. When not set, dlm will continue to run callback functions from the dlm_callback workqueue. The new lockspace flag cannot be used for user space lockspaces, so a uapi placeholder definition is used for the new flag value. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- include/uapi/linux/dlm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dlm.h b/include/uapi/linux/dlm.h index e7e905fb0bb2..4eaf835780b0 100644 --- a/include/uapi/linux/dlm.h +++ b/include/uapi/linux/dlm.h @@ -71,6 +71,8 @@ struct dlm_lksb { /* DLM_LSFL_TIMEWARN is deprecated and reserved. DO NOT USE! */ #define DLM_LSFL_TIMEWARN 0x00000002 #define DLM_LSFL_NEWEXCL 0x00000008 +/* currently reserved due in-kernel use */ +#define __DLM_LSFL_RESERVED0 0x00000010 #endif /* _UAPI__DLM_DOT_H__ */ -- cgit v1.2.3 From c1d8bd8d777d55f6708ca6e47c54dbe9f66f9bbb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:49 +0200 Subject: wifi: cfg80211: add regulatory flag to allow VLP AP operation Add a regulatory flag to allow VLP AP operation even on channels otherwise marked NO_IR, which may be possible in some regulatory domains/countries. Note that this requires checking also when the beacon is changed, since that may change the regulatory power type. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.63792ce19790.Ie2a02750d283b78fbf3c686b10565fb0388889e2@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f917bc6c9b6f..6ae3997061b6 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4277,6 +4277,8 @@ enum nl80211_wmm_rule { * @NL80211_FREQUENCY_ATTR_CAN_MONITOR: This channel can be used in monitor * mode despite other (regulatory) restrictions, even if the channel is * otherwise completely disabled. + * @NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP: This channel can be used for a + * very low power (VLP) AP, despite being NO_IR. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4320,6 +4322,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT, NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT, NL80211_FREQUENCY_ATTR_CAN_MONITOR, + NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -4529,6 +4532,8 @@ enum nl80211_sched_scan_match_attr { * Should be used together with %NL80211_RRF_DFS only. * @NL80211_RRF_NO_6GHZ_VLP_CLIENT: Client connection to VLP AP not allowed * @NL80211_RRF_NO_6GHZ_AFC_CLIENT: Client connection to AFC AP not allowed + * @NL80211_RRF_ALLOW_6GHZ_VLP_AP: Very low power (VLP) AP can be permitted + * despite NO_IR configuration. */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -4553,6 +4558,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_DFS_CONCURRENT = 1<<21, NL80211_RRF_NO_6GHZ_VLP_CLIENT = 1<<22, NL80211_RRF_NO_6GHZ_AFC_CLIENT = 1<<23, + NL80211_RRF_ALLOW_6GHZ_VLP_AP = 1<<24, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR -- cgit v1.2.3 From a3cfe84cca28f205761a0450016593b0d728165e Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 6 Jun 2024 07:58:50 -0700 Subject: bpf: Add CHECKSUM_COMPLETE to bpf test progs Add special flag to validate that TC BPF program properly updates checksum information in skb. Signed-off-by: Vadim Fedorenko Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240606145851.229116-1-vadfed@meta.com --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 25ea393cf084..35bcf52dbc65 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1425,6 +1425,8 @@ enum { #define BPF_F_TEST_RUN_ON_CPU (1U << 0) /* If set, XDP frames will be transmitted after processing */ #define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) +/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */ +#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2) /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { -- cgit v1.2.3 From d25a92ccae6bed02327b63d138e12e7806830f78 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 14 Jun 2024 02:00:30 +0800 Subject: net/smc: Introduce IPPROTO_SMC This patch allows to create smc socket via AF_INET, similar to the following code, /* create v4 smc sock */ v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); /* create v6 smc sock */ v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); There are several reasons why we believe it is appropriate here: 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) address. There is no AF_SMC address at all. 2. Create smc socket in the AF_INET(6) path, which allows us to reuse the infrastructure of AF_INET(6) path, such as common ebpf hooks. Otherwise, smc have to implement it again in AF_SMC path. Signed-off-by: D. Wythe Reviewed-by: Wenjia Zhang Reviewed-by: Dust Li Tested-by: Niklas Schnelle Tested-by: Wenjia Zhang Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index e682ab628dfa..d358add1611c 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -81,6 +81,8 @@ enum { #define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_SMC = 256, /* Shared Memory Communications */ +#define IPPROTO_SMC IPPROTO_SMC IPPROTO_MPTCP = 262, /* Multipath TCP connection */ #define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX -- cgit v1.2.3 From 4b23e0c199b20fa6fe9655b3d0e12d6c6f18c27f Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 3 May 2024 11:17:33 -0700 Subject: KVM: Ensure new code that references immediate_exit gets extra scrutiny Ensure that any new KVM code that references immediate_exit gets extra scrutiny by renaming it to immediate_exit__unsafe in kernel code. All fields in struct kvm_run are subject to TOCTOU races since they are mapped into userspace, which may be malicious or buggy. To protect KVM, introduces a new macro that appends __unsafe to select field names in struct kvm_run, hinting to developers and reviewers that accessing such fields must be done carefully. Apply the new macro to immediate_exit, since userspace can make immediate_exit inconsistent with vcpu->wants_to_run, i.e. accessing immediate_exit directly could lead to unexpected bugs in the future. Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20240503181734.1467938-3-dmatlack@google.com [sean: massage changelog] Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d03842abae57..795773f5db63 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -192,11 +192,24 @@ struct kvm_xen_exit { /* Flags that describe what fields in emulation_failure hold valid data. */ #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) +/* + * struct kvm_run can be modified by userspace at any time, so KVM must be + * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM() + * renames fields in struct kvm_run from to __unsafe when + * compiled into the kernel, ensuring that any use within KVM is obvious and + * gets extra scrutiny. + */ +#ifdef __KERNEL__ +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe +#else +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol +#endif + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 immediate_exit; + __u8 HINT_UNSAFE_IN_KVM(immediate_exit); __u8 padding1[6]; /* out */ -- cgit v1.2.3 From 7481fd93fa0a851740e26026485f56a1305454ce Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 14 Jun 2024 12:30:46 -0400 Subject: io_uring: Introduce IORING_OP_BIND IORING_OP_BIND provides the semantic of bind(2) via io_uring. While this is an essentially synchronous system call, the main point is to enable a network path to execute fully with io_uring registered and descriptorless files. Signed-off-by: Gabriel Krisman Bertazi Link: https://lore.kernel.org/r/20240614163047.31581-3-krisman@suse.de Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 994bf7af0efe..4ef153d95c87 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -257,6 +257,7 @@ enum io_uring_op { IORING_OP_FUTEX_WAITV, IORING_OP_FIXED_FD_INSTALL, IORING_OP_FTRUNCATE, + IORING_OP_BIND, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From ff140cc8628abfb1755691d16cfa8788d8820ef7 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 14 Jun 2024 12:30:47 -0400 Subject: io_uring: Introduce IORING_OP_LISTEN IORING_OP_LISTEN provides the semantic of listen(2) via io_uring. While this is an essentially synchronous system call, the main point is to enable a network path to execute fully with io_uring registered and descriptorless files. Signed-off-by: Gabriel Krisman Bertazi Link: https://lore.kernel.org/r/20240614163047.31581-4-krisman@suse.de Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 4ef153d95c87..2aaf7ee256ac 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -258,6 +258,7 @@ enum io_uring_op { IORING_OP_FIXED_FD_INSTALL, IORING_OP_FTRUNCATE, IORING_OP_BIND, + IORING_OP_LISTEN, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From ba63a7e08523be3496cc1b7e5f77d306144a3a40 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 20 Apr 2024 21:47:46 +0200 Subject: can: isotp: remove ISO 15675-2 specification version where possible With the new ISO 15765-2:2024 release the former documentation and comments have to be reworked. This patch removes the ISO specification version/date where possible. Signed-off-by: Oliver Hartkopp Acked-by: Vincent Mailhol Acked-by: Francesco Valla Link: https://lore.kernel.org/all/20240420194746.4885-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/isotp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index 6cde62371b6f..bd990917f7c4 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -2,7 +2,7 @@ /* * linux/can/isotp.h * - * Definitions for isotp CAN sockets (ISO 15765-2:2016) + * Definitions for ISO 15765-2 CAN transport protocol sockets * * Copyright (c) 2020 Volkswagen Group Electronic Research * All rights reserved. -- cgit v1.2.3 From c34fc6f26ab86d03a2d47446f42b6cd492dfdc56 Mon Sep 17 00:00:00 2001 From: Prasad Singamsetty Date: Thu, 20 Jun 2024 12:53:52 +0000 Subject: fs: Initial atomic write support An atomic write is a write issued with torn-write protection, meaning that for a power failure or any other hardware failure, all or none of the data from the write will be stored, but never a mix of old and new data. Userspace may add flag RWF_ATOMIC to pwritev2() to indicate that the write is to be issued with torn-write prevention, according to special alignment and length rules. For any syscall interface utilizing struct iocb, add IOCB_ATOMIC for iocb->ki_flags field to indicate the same. A call to statx will give the relevant atomic write info for a file: - atomic_write_unit_min - atomic_write_unit_max - atomic_write_segments_max Both min and max values must be a power-of-2. Applications can avail of atomic write feature by ensuring that the total length of a write is a power-of-2 in size and also sized between atomic_write_unit_min and atomic_write_unit_max, inclusive. Applications must ensure that the write is at a naturally-aligned offset in the file wrt the total write length. The value in atomic_write_segments_max indicates the upper limit for IOV_ITER iovcnt. Add file mode flag FMODE_CAN_ATOMIC_WRITE, so files which do not have the flag set will have RWF_ATOMIC rejected and not just ignored. Add a type argument to kiocb_set_rw_flags() to allows reads which have RWF_ATOMIC set to be rejected. Helper function generic_atomic_write_valid() can be used by FSes to verify compliant writes. There we check for iov_iter type is for ubuf, which implies iovcnt==1 for pwritev2(), which is an initial restriction for atomic_write_segments_max. Initially the only user will be bdev file operations write handler. We will rely on the block BIO submission path to ensure write sizes are compliant for the bdev, so we don't need to check atomic writes sizes yet. Signed-off-by: Prasad Singamsetty jpg: merge into single patch and much rewrite Acked-by: Darrick J. Wong Reviewed-by: Martin K. Petersen Signed-off-by: John Garry Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20240620125359.2684798-4-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/uapi/linux/fs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 45e4e64fd664..191a7e88a8ab 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -329,9 +329,12 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO negation of O_APPEND */ #define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) +/* Atomic Write */ +#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC) /* Pagemap ioctl */ #define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) -- cgit v1.2.3 From 0f9ca80fa4f9670ba09721e4e36b8baf086a500c Mon Sep 17 00:00:00 2001 From: Prasad Singamsetty Date: Thu, 20 Jun 2024 12:53:53 +0000 Subject: fs: Add initial atomic write support info to statx Extend statx system call to return additional info for atomic write support support for a file. Helper function generic_fill_statx_atomic_writes() can be used by FSes to fill in the relevant statx fields. For now atomic_write_segments_max will always be 1, otherwise some rules would need to be imposed on iovec length and alignment, which we don't want now. Signed-off-by: Prasad Singamsetty jpg: relocate bdev support to another patch Reviewed-by: Darrick J. Wong Reviewed-by: Martin K. Petersen Signed-off-by: John Garry Acked-by: Darrick J. Wong Link: https://lore.kernel.org/r/20240620125359.2684798-5-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/uapi/linux/stat.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 67626d535316..887a25286441 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ -- cgit v1.2.3 From ca567df74a28a9fb368c6b2d93e864113f73f5c2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 7 Jun 2020 22:47:08 +0200 Subject: nsfs: add pid translation ioctls Add ioctl()s to translate pids between pid namespaces. LXCFS is a tiny fuse filesystem used to virtualize various aspects of procfs. LXCFS is run on the host. The files and directories it creates can be bind-mounted by e.g. a container at startup and mounted over the various procfs files the container wishes to have virtualized. When e.g. a read request for uptime is received, LXCFS will receive the pid of the reader. In order to virtualize the corresponding read, LXCFS needs to know the pid of the init process of the reader's pid namespace. In order to do this, LXCFS first needs to fork() two helper processes. The first helper process setns() to the readers pid namespace. The second helper process is needed to create a process that is a proper member of the pid namespace. The second helper process then creates a ucred message with ucred.pid set to 1 and sends it back to LXCFS. The kernel will translate the ucred.pid field to the corresponding pid number in LXCFS's pid namespace. This way LXCFS can learn the init pid number of the reader's pid namespace and can go on to virtualize. Since these two forks() are costly LXCFS maintains an init pid cache that caches a given pid for a fixed amount of time. The cache is pruned during new read requests. However, even with the cache the hit of the two forks() is singificant when a very large number of containers are running. With this simple patch we add an ns ioctl that let's a caller retrieve the init pid nr of a pid namespace through its pid namespace fd. This significantly improves performance with a very simple change. Support translation of pids and tgids. Other concepts can be added but there are no obvious users for this right now. To protect against races pidfds can be used to check whether the process is still valid. If needed, this can also be extended to work on pidfds directly. Link: https://lore.kernel.org/r/20240619-work-ns_ioctl-v1-1-7c0097e6bb6b@kernel.org Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Christian Brauner --- include/uapi/linux/nsfs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index a0c8552b64ee..faeb9195da08 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -15,5 +15,13 @@ #define NS_GET_NSTYPE _IO(NSIO, 0x3) /* Get owner UID (in the caller's user namespace) for a user namespace */ #define NS_GET_OWNER_UID _IO(NSIO, 0x4) +/* Translate pid from target pid namespace into the caller's pid namespace. */ +#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x5, int) +/* Return thread-group leader id of pid in the callers pid namespace. */ +#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int) +/* Translate pid from caller's pid namespace into a target pid namespace. */ +#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x6, int) +/* Return thread-group leader id of pid in the target pid namespace. */ +#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x8, int) #endif /* __LINUX_NSFS_H */ -- cgit v1.2.3 From e29630247be24c3987e2b048f8e152771b32d38b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 3 Jun 2024 20:16:59 +0200 Subject: netfilter: nf_tables: rise cap on SELinux secmark context secmark context is artificially limited 256 bytes, rise it to 4Kbytes. Fixes: fb961945457f ("netfilter: nf_tables: add SECMARK support") Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index aa4094ca2444..639894ed1b97 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1376,7 +1376,7 @@ enum nft_secmark_attributes { #define NFTA_SECMARK_MAX (__NFTA_SECMARK_MAX - 1) /* Max security context length */ -#define NFT_SECMARK_CTX_MAXLEN 256 +#define NFT_SECMARK_CTX_MAXLEN 4096 /** * enum nft_reject_types - nf_tables reject expression reject types -- cgit v1.2.3 From f750dfe825b904164688adeb147950e0e0c4d262 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Fri, 21 Jun 2024 18:13:51 +0800 Subject: ethtool: provide customized dim profile management The NetDIM library, currently leveraged by an array of NICs, delivers excellent acceleration benefits. Nevertheless, NICs vary significantly in their dim profile list prerequisites. Specifically, virtio-net backends may present diverse sw or hw device implementation, making a one-size-fits-all parameter list impractical. On Alibaba Cloud, the virtio DPU's performance under the default DIM profile falls short of expectations, partly due to a mismatch in parameter configuration. I also noticed that ice/idpf/ena and other NICs have customized profilelist or placed some restrictions on dim capabilities. Motivated by this, I tried adding new params for "ethtool -C" that provides a per-device control to modify and access a device's interrupt parameters. Usage ======== The target NIC is named ethx. Assume that ethx only declares support for rx profile setting (with DIM_PROFILE_RX flag set in profile_flags) and supports modification of usec and pkt fields. 1. Query the currently customized list of the device $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 256, .comps = n/a,}, {.usec = 8, .pkts = 256, .comps = n/a,}, {.usec = 64, .pkts = 256, .comps = n/a,}, {.usec = 128, .pkts = 256, .comps = n/a,}, {.usec = 256, .pkts = 256, .comps = n/a,} tx-profile: n/a 2. Tune $ ethtool -C ethx rx-profile 1,1,n_2,n,n_3,3,n_4,4,n_n,5,n "n" means do not modify this field. $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 1, .comps = n/a,}, {.usec = 2, .pkts = 256, .comps = n/a,}, {.usec = 3, .pkts = 3, .comps = n/a,}, {.usec = 4, .pkts = 4, .comps = n/a,}, {.usec = 256, .pkts = 5, .comps = n/a,} tx-profile: n/a 3. Hint If the device does not support some type of customized dim profiles, the corresponding "n/a" will display. If the "n/a" field is being modified, -EOPNOTSUPP will be reported. Signed-off-by: Heng Qi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240621101353.107425-4-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index b49b804b9495..d15856c7e001 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -415,12 +415,34 @@ enum { ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, /* u32 */ ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, /* u32 */ ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, /* u32 */ + /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_RX_PROFILE, + /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_TX_PROFILE, /* add new constants above here */ __ETHTOOL_A_COALESCE_CNT, ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) }; +enum { + ETHTOOL_A_PROFILE_UNSPEC, + /* nest, _A_IRQ_MODERATION_* */ + ETHTOOL_A_PROFILE_IRQ_MODERATION, + __ETHTOOL_A_PROFILE_CNT, + ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) +}; + +enum { + ETHTOOL_A_IRQ_MODERATION_UNSPEC, + ETHTOOL_A_IRQ_MODERATION_USEC, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_PKTS, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_COMPS, /* u32 */ + + __ETHTOOL_A_IRQ_MODERATION_CNT, + ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1) +}; + /* PAUSE */ enum { -- cgit v1.2.3 From f531d13bdfe3f4f084aaa8acae2cb0f02295f5ae Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Mon, 27 May 2024 20:29:14 -0700 Subject: xfrm: support sending NAT keepalives in ESP in UDP states Add the ability to send out RFC-3948 NAT keepalives from the xfrm stack. To use, Userspace sets an XFRM_NAT_KEEPALIVE_INTERVAL integer property when creating XFRM outbound states which denotes the number of seconds between keepalive messages. Keepalive messages are sent from a per net delayed work which iterates over the xfrm states. The logic is guarded by the xfrm state spinlock due to the xfrm state walk iterator. Possible future enhancements: - Adding counters to keep track of sent keepalives. - deduplicate NAT keepalives between states sharing the same nat keepalive parameters. - provisioning hardware offloads for devices capable of implementing this. - revise xfrm state list to use an rcu list in order to avoid running this under spinlock. Suggested-by: Paul Wouters Tested-by: Paul Wouters Tested-by: Antony Antony Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index d950d02ab791..f28701500714 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -321,6 +321,7 @@ enum xfrm_attr_type_t { XFRMA_IF_ID, /* __u32 */ XFRMA_MTIMER_THRESH, /* __u32 in seconds for input SA */ XFRMA_SA_DIR, /* __u8 */ + XFRMA_NAT_KEEPALIVE_INTERVAL, /* __u32 in seconds for NAT keepalive */ __XFRMA_MAX #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ -- cgit v1.2.3 From d1741141d03f8f5535adaf5a2d5000da3be9fe90 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Wed, 26 Jun 2024 20:14:32 +0200 Subject: media: uapi: Add a pixel format for BGR48 and RGB48 Add BGR48 and RGB48 16-bit per component image formats. Signed-off-by: Jacopo Mondi Reviewed-by: Kieran Bingham Reviewed-by: Naushir Patuck Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index fe6b67e83751..dd6876380fe3 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -582,6 +582,8 @@ struct v4l2_pix_format { /* RGB formats (6 or 8 bytes per pixel) */ #define V4L2_PIX_FMT_BGR48_12 v4l2_fourcc('B', '3', '1', '2') /* 48 BGR 12-bit per component */ +#define V4L2_PIX_FMT_BGR48 v4l2_fourcc('B', 'G', 'R', '6') /* 48 BGR 16-bit per component */ +#define V4L2_PIX_FMT_RGB48 v4l2_fourcc('R', 'G', 'B', '6') /* 48 RGB 16-bit per component */ #define V4L2_PIX_FMT_ABGR64_12 v4l2_fourcc('B', '4', '1', '2') /* 64 BGRA 12-bit per component */ /* Grey formats */ -- cgit v1.2.3 From c6c49bac8770ef95518faf20083e8b3e6150f45f Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Wed, 26 Jun 2024 20:14:33 +0200 Subject: media: uapi: Add Raspberry Pi PiSP Back End uAPI Add the Raspberry Pi PiSP Back End uAPI header. The header defines the data type used to configure the PiSP Back End ISP. The detailed description of the types and of the ISP configuration procedure is available at https://datasheets.raspberrypi.com/camera/raspberry-pi-image-signal-processor-specification.pdf Signed-off-by: Jacopo Mondi Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- .../uapi/linux/media/raspberrypi/pisp_be_config.h | 927 +++++++++++++++++++++ include/uapi/linux/media/raspberrypi/pisp_common.h | 199 +++++ 2 files changed, 1126 insertions(+) create mode 100644 include/uapi/linux/media/raspberrypi/pisp_be_config.h create mode 100644 include/uapi/linux/media/raspberrypi/pisp_common.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/media/raspberrypi/pisp_be_config.h b/include/uapi/linux/media/raspberrypi/pisp_be_config.h new file mode 100644 index 000000000000..1684ae068d4f --- /dev/null +++ b/include/uapi/linux/media/raspberrypi/pisp_be_config.h @@ -0,0 +1,927 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * PiSP Back End configuration definitions. + * + * Copyright (C) 2021 - Raspberry Pi Ltd + * + */ +#ifndef _UAPI_PISP_BE_CONFIG_H_ +#define _UAPI_PISP_BE_CONFIG_H_ + +#include + +#include "pisp_common.h" + +/* byte alignment for inputs */ +#define PISP_BACK_END_INPUT_ALIGN 4u +/* alignment for compressed inputs */ +#define PISP_BACK_END_COMPRESSED_ALIGN 8u +/* minimum required byte alignment for outputs */ +#define PISP_BACK_END_OUTPUT_MIN_ALIGN 16u +/* preferred byte alignment for outputs */ +#define PISP_BACK_END_OUTPUT_MAX_ALIGN 64u + +/* minimum allowed tile width anywhere in the pipeline */ +#define PISP_BACK_END_MIN_TILE_WIDTH 16u +/* minimum allowed tile width anywhere in the pipeline */ +#define PISP_BACK_END_MIN_TILE_HEIGHT 16u + +#define PISP_BACK_END_NUM_OUTPUTS 2 +#define PISP_BACK_END_HOG_OUTPUT 1 + +#define PISP_BACK_END_NUM_TILES 64 + +enum pisp_be_bayer_enable { + PISP_BE_BAYER_ENABLE_INPUT = 0x000001, + PISP_BE_BAYER_ENABLE_DECOMPRESS = 0x000002, + PISP_BE_BAYER_ENABLE_DPC = 0x000004, + PISP_BE_BAYER_ENABLE_GEQ = 0x000008, + PISP_BE_BAYER_ENABLE_TDN_INPUT = 0x000010, + PISP_BE_BAYER_ENABLE_TDN_DECOMPRESS = 0x000020, + PISP_BE_BAYER_ENABLE_TDN = 0x000040, + PISP_BE_BAYER_ENABLE_TDN_COMPRESS = 0x000080, + PISP_BE_BAYER_ENABLE_TDN_OUTPUT = 0x000100, + PISP_BE_BAYER_ENABLE_SDN = 0x000200, + PISP_BE_BAYER_ENABLE_BLC = 0x000400, + PISP_BE_BAYER_ENABLE_STITCH_INPUT = 0x000800, + PISP_BE_BAYER_ENABLE_STITCH_DECOMPRESS = 0x001000, + PISP_BE_BAYER_ENABLE_STITCH = 0x002000, + PISP_BE_BAYER_ENABLE_STITCH_COMPRESS = 0x004000, + PISP_BE_BAYER_ENABLE_STITCH_OUTPUT = 0x008000, + PISP_BE_BAYER_ENABLE_WBG = 0x010000, + PISP_BE_BAYER_ENABLE_CDN = 0x020000, + PISP_BE_BAYER_ENABLE_LSC = 0x040000, + PISP_BE_BAYER_ENABLE_TONEMAP = 0x080000, + PISP_BE_BAYER_ENABLE_CAC = 0x100000, + PISP_BE_BAYER_ENABLE_DEBIN = 0x200000, + PISP_BE_BAYER_ENABLE_DEMOSAIC = 0x400000, +}; + +enum pisp_be_rgb_enable { + PISP_BE_RGB_ENABLE_INPUT = 0x000001, + PISP_BE_RGB_ENABLE_CCM = 0x000002, + PISP_BE_RGB_ENABLE_SAT_CONTROL = 0x000004, + PISP_BE_RGB_ENABLE_YCBCR = 0x000008, + PISP_BE_RGB_ENABLE_FALSE_COLOUR = 0x000010, + PISP_BE_RGB_ENABLE_SHARPEN = 0x000020, + /* Preferred colours would occupy 0x000040 */ + PISP_BE_RGB_ENABLE_YCBCR_INVERSE = 0x000080, + PISP_BE_RGB_ENABLE_GAMMA = 0x000100, + PISP_BE_RGB_ENABLE_CSC0 = 0x000200, + PISP_BE_RGB_ENABLE_CSC1 = 0x000400, + PISP_BE_RGB_ENABLE_DOWNSCALE0 = 0x001000, + PISP_BE_RGB_ENABLE_DOWNSCALE1 = 0x002000, + PISP_BE_RGB_ENABLE_RESAMPLE0 = 0x008000, + PISP_BE_RGB_ENABLE_RESAMPLE1 = 0x010000, + PISP_BE_RGB_ENABLE_OUTPUT0 = 0x040000, + PISP_BE_RGB_ENABLE_OUTPUT1 = 0x080000, + PISP_BE_RGB_ENABLE_HOG = 0x200000 +}; + +#define PISP_BE_RGB_ENABLE_CSC(i) (PISP_BE_RGB_ENABLE_CSC0 << (i)) +#define PISP_BE_RGB_ENABLE_DOWNSCALE(i) (PISP_BE_RGB_ENABLE_DOWNSCALE0 << (i)) +#define PISP_BE_RGB_ENABLE_RESAMPLE(i) (PISP_BE_RGB_ENABLE_RESAMPLE0 << (i)) +#define PISP_BE_RGB_ENABLE_OUTPUT(i) (PISP_BE_RGB_ENABLE_OUTPUT0 << (i)) + +/* + * We use the enable flags to show when blocks are "dirty", but we need some + * extra ones too. + */ +enum pisp_be_dirty { + PISP_BE_DIRTY_GLOBAL = 0x0001, + PISP_BE_DIRTY_SH_FC_COMBINE = 0x0002, + PISP_BE_DIRTY_CROP = 0x0004 +}; + +/** + * struct pisp_be_global_config - PiSP global enable bitmaps + * @bayer_enables: Bayer input enable flags + * @rgb_enables: RGB output enable flags + * @bayer_order: Bayer input format ordering + * @pad: Padding bytes + */ +struct pisp_be_global_config { + __u32 bayer_enables; + __u32 rgb_enables; + __u8 bayer_order; + __u8 pad[3]; +} __attribute__((packed)); + +/** + * struct pisp_be_input_buffer_config - PiSP Back End input buffer + * @addr: Input buffer address + */ +struct pisp_be_input_buffer_config { + /* low 32 bits followed by high 32 bits (for each of up to 3 planes) */ + __u32 addr[3][2]; +} __attribute__((packed)); + +/** + * struct pisp_be_dpc_config - PiSP Back End DPC config + * + * Defective Pixel Correction configuration + * + * @coeff_level: Coefficient for the darkest neighbouring pixel value + * @coeff_range: Coefficient for the range of pixels for this Bayer channel + * @pad: Padding byte + * @flags: DPC configuration flags + */ +struct pisp_be_dpc_config { + __u8 coeff_level; + __u8 coeff_range; + __u8 pad; +#define PISP_BE_DPC_FLAG_FOLDBACK 1 + __u8 flags; +} __attribute__((packed)); + +/** + * struct pisp_be_geq_config - PiSP Back End GEQ config + * + * Green Equalisation configuration + * + * @offset: Offset value for threshold calculation + * @slope_sharper: Slope/Sharper configuration + * @min: Minimum value the threshold may have + * @max: Maximum value the threshold may have + */ +struct pisp_be_geq_config { + __u16 offset; +#define PISP_BE_GEQ_SHARPER BIT(15) +#define PISP_BE_GEQ_SLOPE ((1 << 10) - 1) + /* top bit is the "sharper" flag, slope value is bottom 10 bits */ + __u16 slope_sharper; + __u16 min; + __u16 max; +} __attribute__((packed)); + +/** + * struct pisp_be_tdn_input_buffer_config - PiSP Back End TDN input buffer + * @addr: TDN input buffer address + */ +struct pisp_be_tdn_input_buffer_config { + /* low 32 bits followed by high 32 bits */ + __u32 addr[2]; +} __attribute__((packed)); + +/** + * struct pisp_be_tdn_config - PiSP Back End TDN config + * + * Temporal Denoise configuration + * + * @black_level: Black level value subtracted from pixels + * @ratio: Multiplier for the LTA input frame + * @noise_constant: Constant offset value used in noise estimation + * @noise_slope: Noise estimation multiplier + * @threshold: Threshold for TDN operations + * @reset: Disable TDN operations + * @pad: Padding byte + */ +struct pisp_be_tdn_config { + __u16 black_level; + __u16 ratio; + __u16 noise_constant; + __u16 noise_slope; + __u16 threshold; + __u8 reset; + __u8 pad; +} __attribute__((packed)); + +/** + * struct pisp_be_tdn_output_buffer_config - PiSP Back End TDN output buffer + * @addr: TDN output buffer address + */ +struct pisp_be_tdn_output_buffer_config { + /* low 32 bits followed by high 32 bits */ + __u32 addr[2]; +} __attribute__((packed)); + +/** + * struct pisp_be_sdn_config - PiSP Back End SDN config + * + * Spatial Denoise configuration + * + * @black_level: Black level subtracted from pixel for noise estimation + * @leakage: Proportion of the original undenoised value to mix in + * denoised output + * @pad: Padding byte + * @noise_constant: Noise constant used for noise estimation + * @noise_slope: Noise slope value used for noise estimation + * @noise_constant2: Second noise constant used for noise estimation + * @noise_slope2: Second slope value used for noise estimation + */ +struct pisp_be_sdn_config { + __u16 black_level; + __u8 leakage; + __u8 pad; + __u16 noise_constant; + __u16 noise_slope; + __u16 noise_constant2; + __u16 noise_slope2; +} __attribute__((packed)); + +/** + * struct pisp_be_stitch_input_buffer_config - PiSP Back End Stitch input + * @addr: Stitch input buffer address + */ +struct pisp_be_stitch_input_buffer_config { + /* low 32 bits followed by high 32 bits */ + __u32 addr[2]; +} __attribute__((packed)); + +#define PISP_BE_STITCH_STREAMING_LONG 0x8000 +#define PISP_BE_STITCH_EXPOSURE_RATIO_MASK 0x7fff + +/** + * struct pisp_be_stitch_config - PiSP Back End Stitch config + * + * Stitch block configuration + * + * @threshold_lo: Low threshold value + * @threshold_diff_power: Low and high threshold difference + * @pad: Padding bytes + * @exposure_ratio: Multiplier to convert long exposure pixels into + * short exposure pixels + * @motion_threshold_256: Motion threshold above which short exposure + * pixels are used + * @motion_threshold_recip: Reciprocal of motion_threshold_256 value + */ +struct pisp_be_stitch_config { + __u16 threshold_lo; + __u8 threshold_diff_power; + __u8 pad; + + /* top bit indicates whether streaming input is the long exposure */ + __u16 exposure_ratio; + + __u8 motion_threshold_256; + __u8 motion_threshold_recip; +} __attribute__((packed)); + +/** + * struct pisp_be_stitch_output_buffer_config - PiSP Back End Stitch output + * @addr: Stitch input buffer address + */ +struct pisp_be_stitch_output_buffer_config { + /* low 32 bits followed by high 32 bits */ + __u32 addr[2]; +} __attribute__((packed)); + +/** + * struct pisp_be_cdn_config - PiSP Back End CDN config + * + * Colour Denoise configuration + * + * @thresh: Constant for noise estimation + * @iir_strength: Relative strength of the IIR part of the filter + * @g_adjust: Proportion of the change assigned to the G channel + */ +struct pisp_be_cdn_config { + __u16 thresh; + __u8 iir_strength; + __u8 g_adjust; +} __attribute__((packed)); + +#define PISP_BE_LSC_LOG_GRID_SIZE 5 +#define PISP_BE_LSC_GRID_SIZE (1 << PISP_BE_LSC_LOG_GRID_SIZE) +#define PISP_BE_LSC_STEP_PRECISION 18 + +/** + * struct pisp_be_lsc_config - PiSP Back End LSC config + * + * Lens Shading Correction configuration + * + * @grid_step_x: Reciprocal of cell size width + * @grid_step_y: Reciprocal of cell size height + * @lut_packed: Jointly-coded RGB gains for each LSC grid + */ +struct pisp_be_lsc_config { + /* (1<<18) / grid_cell_width */ + __u16 grid_step_x; + /* (1<<18) / grid_cell_height */ + __u16 grid_step_y; + /* RGB gains jointly encoded in 32 bits */ +#define PISP_BE_LSC_LUT_SIZE (PISP_BE_LSC_GRID_SIZE + 1) + __u32 lut_packed[PISP_BE_LSC_LUT_SIZE][PISP_BE_LSC_LUT_SIZE]; +} __attribute__((packed)); + +/** + * struct pisp_be_lsc_extra - PiSP Back End LSC Extra config + * @offset_x: Horizontal offset into the LSC table of this tile + * @offset_y: Vertical offset into the LSC table of this tile + */ +struct pisp_be_lsc_extra { + __u16 offset_x; + __u16 offset_y; +} __attribute__((packed)); + +#define PISP_BE_CAC_LOG_GRID_SIZE 3 +#define PISP_BE_CAC_GRID_SIZE (1 << PISP_BE_CAC_LOG_GRID_SIZE) +#define PISP_BE_CAC_STEP_PRECISION 20 + +/** + * struct pisp_be_cac_config - PiSP Back End CAC config + * + * Chromatic Aberration Correction config + * + * @grid_step_x: Reciprocal of cell size width + * @grid_step_y: Reciprocal of cell size height + * @lut: Pixel shift for the CAC grid + */ +struct pisp_be_cac_config { + /* (1<<20) / grid_cell_width */ + __u16 grid_step_x; + /* (1<<20) / grid_cell_height */ + __u16 grid_step_y; + /* [gridy][gridx][rb][xy] */ +#define PISP_BE_CAC_LUT_SIZE (PISP_BE_CAC_GRID_SIZE + 1) + __s8 lut[PISP_BE_CAC_LUT_SIZE][PISP_BE_CAC_LUT_SIZE][2][2]; +} __attribute__((packed)); + +/** + * struct pisp_be_cac_extra - PiSP Back End CAC extra config + * @offset_x: Horizontal offset into the CAC table of this tile + * @offset_y: Horizontal offset into the CAC table of this tile + */ +struct pisp_be_cac_extra { + __u16 offset_x; + __u16 offset_y; +} __attribute__((packed)); + +#define PISP_BE_DEBIN_NUM_COEFFS 4 + +/** + * struct pisp_be_debin_config - PiSP Back End Debin config + * + * Debinning configuration + * + * @coeffs: Filter coefficients for debinning + * @h_enable: Horizontal debinning enable + * @v_enable: Vertical debinning enable + * @pad: Padding bytes + */ +struct pisp_be_debin_config { + __s8 coeffs[PISP_BE_DEBIN_NUM_COEFFS]; + __s8 h_enable; + __s8 v_enable; + __s8 pad[2]; +} __attribute__((packed)); + +#define PISP_BE_TONEMAP_LUT_SIZE 64 + +/** + * struct pisp_be_tonemap_config - PiSP Back End Tonemap config + * + * Tonemapping configuration + * + * @detail_constant: Constant value for threshold calculation + * @detail_slope: Slope value for threshold calculation + * @iir_strength: Relative strength of the IIR fiter + * @strength: Strength factor + * @lut: Look-up table for tonemap curve + */ +struct pisp_be_tonemap_config { + __u16 detail_constant; + __u16 detail_slope; + __u16 iir_strength; + __u16 strength; + __u32 lut[PISP_BE_TONEMAP_LUT_SIZE]; +} __attribute__((packed)); + +/** + * struct pisp_be_demosaic_config - PiSP Back End Demosaic config + * + * Demosaic configuration + * + * @sharper: Use other Bayer channels to increase sharpness + * @fc_mode: Built-in false colour suppression mode + * @pad: Padding bytes + */ +struct pisp_be_demosaic_config { + __u8 sharper; + __u8 fc_mode; + __u8 pad[2]; +} __attribute__((packed)); + +/** + * struct pisp_be_ccm_config - PiSP Back End CCM config + * + * Colour Correction Matrix configuration + * + * @coeffs: Matrix coefficients + * @pad: Padding bytes + * @offsets: Offsets triplet + */ +struct pisp_be_ccm_config { + __s16 coeffs[9]; + __u8 pad[2]; + __s32 offsets[3]; +} __attribute__((packed)); + +/** + * struct pisp_be_sat_control_config - PiSP Back End SAT config + * + * Saturation Control configuration + * + * @shift_r: Left shift for Red colour channel + * @shift_g: Left shift for Green colour channel + * @shift_b: Left shift for Blue colour channel + * @pad: Padding byte + */ +struct pisp_be_sat_control_config { + __u8 shift_r; + __u8 shift_g; + __u8 shift_b; + __u8 pad; +} __attribute__((packed)); + +/** + * struct pisp_be_false_colour_config - PiSP Back End False Colour config + * + * False Colour configuration + * + * @distance: Distance of neighbouring pixels, either 1 or 2 + * @pad: Padding bytes + */ +struct pisp_be_false_colour_config { + __u8 distance; + __u8 pad[3]; +} __attribute__((packed)); + +#define PISP_BE_SHARPEN_SIZE 5 +#define PISP_BE_SHARPEN_FUNC_NUM_POINTS 9 + +/** + * struct pisp_be_sharpen_config - PiSP Back End Sharpening config + * + * Sharpening configuration + * + * @kernel0: Coefficient for filter 0 + * @pad0: Padding byte + * @kernel1: Coefficient for filter 1 + * @pad1: Padding byte + * @kernel2: Coefficient for filter 2 + * @pad2: Padding byte + * @kernel3: Coefficient for filter 3 + * @pad3: Padding byte + * @kernel4: Coefficient for filter 4 + * @pad4: Padding byte + * @threshold_offset0: Offset for filter 0 response calculation + * @threshold_slope0: Slope multiplier for the filter 0 response calculation + * @scale0: Scale factor for filter 0 response calculation + * @pad5: Padding byte + * @threshold_offset1: Offset for filter 0 response calculation + * @threshold_slope1: Slope multiplier for the filter 0 response calculation + * @scale1: Scale factor for filter 0 response calculation + * @pad6: Padding byte + * @threshold_offset2: Offset for filter 0 response calculation + * @threshold_slope2: Slope multiplier for the filter 0 response calculation + * @scale2: Scale factor for filter 0 response calculation + * @pad7: Padding byte + * @threshold_offset3: Offset for filter 0 response calculation + * @threshold_slope3: Slope multiplier for the filter 0 response calculation + * @scale3: Scale factor for filter 0 response calculation + * @pad8: Padding byte + * @threshold_offset4: Offset for filter 0 response calculation + * @threshold_slope4: Slope multiplier for the filter 0 response calculation + * @scale4: Scale factor for filter 0 response calculation + * @pad9: Padding byte + * @positive_strength: Factor to scale the positive sharpening strength + * @positive_pre_limit: Maximum allowed possible positive sharpening value + * @positive_func: Gain factor applied to positive sharpening response + * @positive_limit: Final gain factor applied to positive sharpening + * @negative_strength: Factor to scale the negative sharpening strength + * @negative_pre_limit: Maximum allowed possible negative sharpening value + * @negative_func: Gain factor applied to negative sharpening response + * @negative_limit: Final gain factor applied to negative sharpening + * @enables: Filter enable mask + * @white: White output pixel filter mask + * @black: Black output pixel filter mask + * @grey: Grey output pixel filter mask + */ +struct pisp_be_sharpen_config { + __s8 kernel0[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE]; + __s8 pad0[3]; + __s8 kernel1[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE]; + __s8 pad1[3]; + __s8 kernel2[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE]; + __s8 pad2[3]; + __s8 kernel3[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE]; + __s8 pad3[3]; + __s8 kernel4[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE]; + __s8 pad4[3]; + __u16 threshold_offset0; + __u16 threshold_slope0; + __u16 scale0; + __u16 pad5; + __u16 threshold_offset1; + __u16 threshold_slope1; + __u16 scale1; + __u16 pad6; + __u16 threshold_offset2; + __u16 threshold_slope2; + __u16 scale2; + __u16 pad7; + __u16 threshold_offset3; + __u16 threshold_slope3; + __u16 scale3; + __u16 pad8; + __u16 threshold_offset4; + __u16 threshold_slope4; + __u16 scale4; + __u16 pad9; + __u16 positive_strength; + __u16 positive_pre_limit; + __u16 positive_func[PISP_BE_SHARPEN_FUNC_NUM_POINTS]; + __u16 positive_limit; + __u16 negative_strength; + __u16 negative_pre_limit; + __u16 negative_func[PISP_BE_SHARPEN_FUNC_NUM_POINTS]; + __u16 negative_limit; + __u8 enables; + __u8 white; + __u8 black; + __u8 grey; +} __attribute__((packed)); + +/** + * struct pisp_be_sh_fc_combine_config - PiSP Back End Sharpening and + * False Colour config + * + * Sharpening and False Colour configuration + * + * @y_factor: Control amount of desaturation of pixels being darkened + * @c1_factor: Control amount of brightening of a pixel for the Cb + * channel + * @c2_factor: Control amount of brightening of a pixel for the Cr + * channel + * @pad: Padding byte + */ +struct pisp_be_sh_fc_combine_config { + __u8 y_factor; + __u8 c1_factor; + __u8 c2_factor; + __u8 pad; +} __attribute__((packed)); + +#define PISP_BE_GAMMA_LUT_SIZE 64 + +/** + * struct pisp_be_gamma_config - PiSP Back End Gamma configuration + * @lut: Gamma curve look-up table + */ +struct pisp_be_gamma_config { + __u32 lut[PISP_BE_GAMMA_LUT_SIZE]; +} __attribute__((packed)); + +/** + * struct pisp_be_crop_config - PiSP Back End Crop config + * + * Crop configuration + * + * @offset_x: Number of pixels cropped from the left of the tile + * @offset_y: Number of pixels cropped from the top of the tile + * @width: Width of the cropped tile output + * @height: Height of the cropped tile output + */ +struct pisp_be_crop_config { + __u16 offset_x, offset_y; + __u16 width, height; +} __attribute__((packed)); + +#define PISP_BE_RESAMPLE_FILTER_SIZE 96 + +/** + * struct pisp_be_resample_config - PiSP Back End Resampling config + * + * Resample configuration + * + * @scale_factor_h: Horizontal scale factor + * @scale_factor_v: Vertical scale factor + * @coef: Resample coefficients + */ +struct pisp_be_resample_config { + __u16 scale_factor_h, scale_factor_v; + __s16 coef[PISP_BE_RESAMPLE_FILTER_SIZE]; +} __attribute__((packed)); + +/** + * struct pisp_be_resample_extra - PiSP Back End Resample config + * + * Resample configuration + * + * @scaled_width: Width in pixels of the scaled output + * @scaled_height: Height in pixels of the scaled output + * @initial_phase_h: Initial horizontal phase + * @initial_phase_v: Initial vertical phase + */ +struct pisp_be_resample_extra { + __u16 scaled_width; + __u16 scaled_height; + __s16 initial_phase_h[3]; + __s16 initial_phase_v[3]; +} __attribute__((packed)); + +/** + * struct pisp_be_downscale_config - PiSP Back End Downscale config + * + * Downscale configuration + * + * @scale_factor_h: Horizontal scale factor + * @scale_factor_v: Vertical scale factor + * @scale_recip_h: Horizontal reciprocal factor + * @scale_recip_v: Vertical reciprocal factor + */ +struct pisp_be_downscale_config { + __u16 scale_factor_h; + __u16 scale_factor_v; + __u16 scale_recip_h; + __u16 scale_recip_v; +} __attribute__((packed)); + +/** + * struct pisp_be_downscale_extra - PiSP Back End Downscale Extra config + * @scaled_width: Scaled image width + * @scaled_height: Scaled image height + */ +struct pisp_be_downscale_extra { + __u16 scaled_width; + __u16 scaled_height; +} __attribute__((packed)); + +/** + * struct pisp_be_hog_config - PiSP Back End HOG config + * + * Histogram of Oriented Gradients configuration + * + * @compute_signed: Set 0 for unsigned gradients, 1 for signed + * @channel_mix: Channels proportions to use + * @stride: Stride in bytes between blocks directly below + */ +struct pisp_be_hog_config { + __u8 compute_signed; + __u8 channel_mix[3]; + __u32 stride; +} __attribute__((packed)); + +struct pisp_be_axi_config { + __u8 r_qos; /* Read QoS */ + __u8 r_cache_prot; /* Read { prot[2:0], cache[3:0] } */ + __u8 w_qos; /* Write QoS */ + __u8 w_cache_prot; /* Write { prot[2:0], cache[3:0] } */ +} __attribute__((packed)); + +/** + * enum pisp_be_transform - PiSP Back End Transform flags + * @PISP_BE_TRANSFORM_NONE: No transform + * @PISP_BE_TRANSFORM_HFLIP: Horizontal flip + * @PISP_BE_TRANSFORM_VFLIP: Vertical flip + * @PISP_BE_TRANSFORM_ROT180: 180 degress rotation + */ +enum pisp_be_transform { + PISP_BE_TRANSFORM_NONE = 0x0, + PISP_BE_TRANSFORM_HFLIP = 0x1, + PISP_BE_TRANSFORM_VFLIP = 0x2, + PISP_BE_TRANSFORM_ROT180 = + (PISP_BE_TRANSFORM_HFLIP | PISP_BE_TRANSFORM_VFLIP) +}; + +struct pisp_be_output_format_config { + struct pisp_image_format_config image; + __u8 transform; + __u8 pad[3]; + __u16 lo; + __u16 hi; + __u16 lo2; + __u16 hi2; +} __attribute__((packed)); + +/** + * struct pisp_be_output_buffer_config - PiSP Back End Output buffer + * @addr: Output buffer address + */ +struct pisp_be_output_buffer_config { + /* low 32 bits followed by high 32 bits (for each of 3 planes) */ + __u32 addr[3][2]; +} __attribute__((packed)); + +/** + * struct pisp_be_hog_buffer_config - PiSP Back End HOG buffer + * @addr: HOG buffer address + */ +struct pisp_be_hog_buffer_config { + /* low 32 bits followed by high 32 bits */ + __u32 addr[2]; +} __attribute__((packed)); + +/** + * struct pisp_be_config - RaspberryPi PiSP Back End Processing configuration + * + * @global: Global PiSP configuration + * @input_format: Input image format + * @decompress: Decompress configuration + * @dpc: Defective Pixel Correction configuration + * @geq: Green Equalisation configuration + * @tdn_input_format: Temporal Denoise input format + * @tdn_decompress: Temporal Denoise decompress configuration + * @tdn: Temporal Denoise configuration + * @tdn_compress: Temporal Denoise compress configuration + * @tdn_output_format: Temporal Denoise output format + * @sdn: Spatial Denoise configuration + * @blc: Black Level Correction configuration + * @stitch_compress: Stitch compress configuration + * @stitch_output_format: Stitch output format + * @stitch_input_format: Stitch input format + * @stitch_decompress: Stitch decompress configuration + * @stitch: Stitch configuration + * @lsc: Lens Shading Correction configuration + * @wbg: White Balance Gain configuration + * @cdn: Colour Denoise configuration + * @cac: Colour Aberration Correction configuration + * @debin: Debinning configuration + * @tonemap: Tonemapping configuration + * @demosaic: Demosaicing configuration + * @ccm: Colour Correction Matrix configuration + * @sat_control: Saturation Control configuration + * @ycbcr: YCbCr colour correction configuration + * @sharpen: Sharpening configuration + * @false_colour: False colour correction + * @sh_fc_combine: Sharpening and False Colour correction + * @ycbcr_inverse: Inverse YCbCr colour correction + * @gamma: Gamma curve configuration + * @csc: Color Space Conversion configuration + * @downscale: Downscale configuration + * @resample: Resampling configuration + * @output_format: Output format configuration + * @hog: HOG configuration + */ +struct pisp_be_config { + struct pisp_be_global_config global; + struct pisp_image_format_config input_format; + struct pisp_decompress_config decompress; + struct pisp_be_dpc_config dpc; + struct pisp_be_geq_config geq; + struct pisp_image_format_config tdn_input_format; + struct pisp_decompress_config tdn_decompress; + struct pisp_be_tdn_config tdn; + struct pisp_compress_config tdn_compress; + struct pisp_image_format_config tdn_output_format; + struct pisp_be_sdn_config sdn; + struct pisp_bla_config blc; + struct pisp_compress_config stitch_compress; + struct pisp_image_format_config stitch_output_format; + struct pisp_image_format_config stitch_input_format; + struct pisp_decompress_config stitch_decompress; + struct pisp_be_stitch_config stitch; + struct pisp_be_lsc_config lsc; + struct pisp_wbg_config wbg; + struct pisp_be_cdn_config cdn; + struct pisp_be_cac_config cac; + struct pisp_be_debin_config debin; + struct pisp_be_tonemap_config tonemap; + struct pisp_be_demosaic_config demosaic; + struct pisp_be_ccm_config ccm; + struct pisp_be_sat_control_config sat_control; + struct pisp_be_ccm_config ycbcr; + struct pisp_be_sharpen_config sharpen; + struct pisp_be_false_colour_config false_colour; + struct pisp_be_sh_fc_combine_config sh_fc_combine; + struct pisp_be_ccm_config ycbcr_inverse; + struct pisp_be_gamma_config gamma; + struct pisp_be_ccm_config csc[PISP_BACK_END_NUM_OUTPUTS]; + struct pisp_be_downscale_config downscale[PISP_BACK_END_NUM_OUTPUTS]; + struct pisp_be_resample_config resample[PISP_BACK_END_NUM_OUTPUTS]; + struct pisp_be_output_format_config + output_format[PISP_BACK_END_NUM_OUTPUTS]; + struct pisp_be_hog_config hog; +} __attribute__((packed)); + +/** + * enum pisp_tile_edge - PiSP Back End Tile position + * @PISP_LEFT_EDGE: Left edge tile + * @PISP_RIGHT_EDGE: Right edge tile + * @PISP_TOP_EDGE: Top edge tile + * @PISP_BOTTOM_EDGE: Bottom edge tile + */ +enum pisp_tile_edge { + PISP_LEFT_EDGE = (1 << 0), + PISP_RIGHT_EDGE = (1 << 1), + PISP_TOP_EDGE = (1 << 2), + PISP_BOTTOM_EDGE = (1 << 3) +}; + +/** + * struct pisp_tile - Raspberry Pi PiSP Back End tile configuration + * + * Tile parameters: each set of tile parameters is a 160-bytes block of data + * which contains the tile processing parameters. + * + * @edge: Edge tile flag + * @pad0: Padding bytes + * @input_addr_offset: Top-left pixel offset, in bytes + * @input_addr_offset2: Top-left pixel offset, in bytes for the second/ + * third image planes + * @input_offset_x: Horizontal offset in pixels of this tile in the + * input image + * @input_offset_y: Vertical offset in pixels of this tile in the + * input image + * @input_width: Width in pixels of this tile + * @input_height: Height in pixels of the this tile + * @tdn_input_addr_offset: TDN input image offset, in bytes + * @tdn_output_addr_offset: TDN output image offset, in bytes + * @stitch_input_addr_offset: Stitch input image offset, in bytes + * @stitch_output_addr_offset: Stitch output image offset, in bytes + * @lsc_grid_offset_x: Horizontal offset in the LSC table for this tile + * @lsc_grid_offset_y: Vertical offset in the LSC table for this tile + * @cac_grid_offset_x: Horizontal offset in the CAC table for this tile + * @cac_grid_offset_y: Horizontal offset in the CAC table for this tile + * @crop_x_start: Number of pixels cropped from the left of the + * tile + * @crop_x_end: Number of pixels cropped from the right of the + * tile + * @crop_y_start: Number of pixels cropped from the top of the + * tile + * @crop_y_end: Number of pixels cropped from the bottom of the + * tile + * @downscale_phase_x: Initial horizontal phase in pixels + * @downscale_phase_y: Initial vertical phase in pixels + * @resample_in_width: Width in pixels of the tile entering the + * Resample block + * @resample_in_height: Height in pixels of the tile entering the + * Resample block + * @resample_phase_x: Initial horizontal phase for the Resample block + * @resample_phase_y: Initial vertical phase for the Resample block + * @output_offset_x: Horizontal offset in pixels where the tile will + * be written into the output image + * @output_offset_y: Vertical offset in pixels where the tile will be + * written into the output image + * @output_width: Width in pixels in the output image of this tile + * @output_height: Height in pixels in the output image of this tile + * @output_addr_offset: Offset in bytes into the output buffer + * @output_addr_offset2: Offset in bytes into the output buffer for the + * second and third plane + * @output_hog_addr_offset: Offset in bytes into the HOG buffer where + * results of this tile are to be written + */ +struct pisp_tile { + __u8 edge; /* enum pisp_tile_edge */ + __u8 pad0[3]; + /* 4 bytes */ + __u32 input_addr_offset; + __u32 input_addr_offset2; + __u16 input_offset_x; + __u16 input_offset_y; + __u16 input_width; + __u16 input_height; + /* 20 bytes */ + __u32 tdn_input_addr_offset; + __u32 tdn_output_addr_offset; + __u32 stitch_input_addr_offset; + __u32 stitch_output_addr_offset; + /* 36 bytes */ + __u32 lsc_grid_offset_x; + __u32 lsc_grid_offset_y; + /* 44 bytes */ + __u32 cac_grid_offset_x; + __u32 cac_grid_offset_y; + /* 52 bytes */ + __u16 crop_x_start[PISP_BACK_END_NUM_OUTPUTS]; + __u16 crop_x_end[PISP_BACK_END_NUM_OUTPUTS]; + __u16 crop_y_start[PISP_BACK_END_NUM_OUTPUTS]; + __u16 crop_y_end[PISP_BACK_END_NUM_OUTPUTS]; + /* 68 bytes */ + /* Ordering is planes then branches */ + __u16 downscale_phase_x[3 * PISP_BACK_END_NUM_OUTPUTS]; + __u16 downscale_phase_y[3 * PISP_BACK_END_NUM_OUTPUTS]; + /* 92 bytes */ + __u16 resample_in_width[PISP_BACK_END_NUM_OUTPUTS]; + __u16 resample_in_height[PISP_BACK_END_NUM_OUTPUTS]; + /* 100 bytes */ + /* Ordering is planes then branches */ + __u16 resample_phase_x[3 * PISP_BACK_END_NUM_OUTPUTS]; + __u16 resample_phase_y[3 * PISP_BACK_END_NUM_OUTPUTS]; + /* 124 bytes */ + __u16 output_offset_x[PISP_BACK_END_NUM_OUTPUTS]; + __u16 output_offset_y[PISP_BACK_END_NUM_OUTPUTS]; + __u16 output_width[PISP_BACK_END_NUM_OUTPUTS]; + __u16 output_height[PISP_BACK_END_NUM_OUTPUTS]; + /* 140 bytes */ + __u32 output_addr_offset[PISP_BACK_END_NUM_OUTPUTS]; + __u32 output_addr_offset2[PISP_BACK_END_NUM_OUTPUTS]; + /* 156 bytes */ + __u32 output_hog_addr_offset; + /* 160 bytes */ +} __attribute__((packed)); + +/** + * struct pisp_be_tiles_config - Raspberry Pi PiSP Back End configuration + * @tiles: Tile descriptors + * @num_tiles: Number of tiles + * @config: PiSP Back End configuration + */ +struct pisp_be_tiles_config { + struct pisp_tile tiles[PISP_BACK_END_NUM_TILES]; + __u32 num_tiles; + struct pisp_be_config config; +} __attribute__((packed)); + +#endif /* _UAPI_PISP_BE_CONFIG_H_ */ diff --git a/include/uapi/linux/media/raspberrypi/pisp_common.h b/include/uapi/linux/media/raspberrypi/pisp_common.h new file mode 100644 index 000000000000..b2522e29c976 --- /dev/null +++ b/include/uapi/linux/media/raspberrypi/pisp_common.h @@ -0,0 +1,199 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * RP1 PiSP common definitions. + * + * Copyright (C) 2021 - Raspberry Pi Ltd. + * + */ +#ifndef _UAPI_PISP_COMMON_H_ +#define _UAPI_PISP_COMMON_H_ + +#include + +struct pisp_image_format_config { + /* size in pixels */ + __u16 width; + __u16 height; + /* must match struct pisp_image_format below */ + __u32 format; + __s32 stride; + /* some planar image formats will need a second stride */ + __s32 stride2; +} __attribute__((packed)); + +enum pisp_bayer_order { + /* + * Note how bayer_order&1 tells you if G is on the even pixels of the + * checkerboard or not, and bayer_order&2 tells you if R is on the even + * rows or is swapped with B. Note that if the top (of the 8) bits is + * set, this denotes a monochrome or greyscale image, and the lower bits + * should all be ignored. + */ + PISP_BAYER_ORDER_RGGB = 0, + PISP_BAYER_ORDER_GBRG = 1, + PISP_BAYER_ORDER_BGGR = 2, + PISP_BAYER_ORDER_GRBG = 3, + PISP_BAYER_ORDER_GREYSCALE = 128 +}; + +enum pisp_image_format { + /* + * Precise values are mostly tbd. Generally these will be portmanteau + * values comprising bit fields and flags. This format must be shared + * throughout the PiSP. + */ + PISP_IMAGE_FORMAT_BPS_8 = 0x00000000, + PISP_IMAGE_FORMAT_BPS_10 = 0x00000001, + PISP_IMAGE_FORMAT_BPS_12 = 0x00000002, + PISP_IMAGE_FORMAT_BPS_16 = 0x00000003, + PISP_IMAGE_FORMAT_BPS_MASK = 0x00000003, + + PISP_IMAGE_FORMAT_PLANARITY_INTERLEAVED = 0x00000000, + PISP_IMAGE_FORMAT_PLANARITY_SEMI_PLANAR = 0x00000010, + PISP_IMAGE_FORMAT_PLANARITY_PLANAR = 0x00000020, + PISP_IMAGE_FORMAT_PLANARITY_MASK = 0x00000030, + + PISP_IMAGE_FORMAT_SAMPLING_444 = 0x00000000, + PISP_IMAGE_FORMAT_SAMPLING_422 = 0x00000100, + PISP_IMAGE_FORMAT_SAMPLING_420 = 0x00000200, + PISP_IMAGE_FORMAT_SAMPLING_MASK = 0x00000300, + + PISP_IMAGE_FORMAT_ORDER_NORMAL = 0x00000000, + PISP_IMAGE_FORMAT_ORDER_SWAPPED = 0x00001000, + + PISP_IMAGE_FORMAT_SHIFT_0 = 0x00000000, + PISP_IMAGE_FORMAT_SHIFT_1 = 0x00010000, + PISP_IMAGE_FORMAT_SHIFT_2 = 0x00020000, + PISP_IMAGE_FORMAT_SHIFT_3 = 0x00030000, + PISP_IMAGE_FORMAT_SHIFT_4 = 0x00040000, + PISP_IMAGE_FORMAT_SHIFT_5 = 0x00050000, + PISP_IMAGE_FORMAT_SHIFT_6 = 0x00060000, + PISP_IMAGE_FORMAT_SHIFT_7 = 0x00070000, + PISP_IMAGE_FORMAT_SHIFT_8 = 0x00080000, + PISP_IMAGE_FORMAT_SHIFT_MASK = 0x000f0000, + + PISP_IMAGE_FORMAT_UNCOMPRESSED = 0x00000000, + PISP_IMAGE_FORMAT_COMPRESSION_MODE_1 = 0x01000000, + PISP_IMAGE_FORMAT_COMPRESSION_MODE_2 = 0x02000000, + PISP_IMAGE_FORMAT_COMPRESSION_MODE_3 = 0x03000000, + PISP_IMAGE_FORMAT_COMPRESSION_MASK = 0x03000000, + + PISP_IMAGE_FORMAT_HOG_SIGNED = 0x04000000, + PISP_IMAGE_FORMAT_HOG_UNSIGNED = 0x08000000, + PISP_IMAGE_FORMAT_INTEGRAL_IMAGE = 0x10000000, + PISP_IMAGE_FORMAT_WALLPAPER_ROLL = 0x20000000, + PISP_IMAGE_FORMAT_THREE_CHANNEL = 0x40000000, + + /* Lastly a few specific instantiations of the above. */ + PISP_IMAGE_FORMAT_SINGLE_16 = PISP_IMAGE_FORMAT_BPS_16, + PISP_IMAGE_FORMAT_THREE_16 = PISP_IMAGE_FORMAT_BPS_16 | + PISP_IMAGE_FORMAT_THREE_CHANNEL +}; + +#define PISP_IMAGE_FORMAT_bps_8(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_8) +#define PISP_IMAGE_FORMAT_bps_10(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_10) +#define PISP_IMAGE_FORMAT_bps_12(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_12) +#define PISP_IMAGE_FORMAT_bps_16(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_16) +#define PISP_IMAGE_FORMAT_bps(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) ? \ + 8 + (2 << (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) - 1)) : 8) +#define PISP_IMAGE_FORMAT_shift(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_SHIFT_MASK) / PISP_IMAGE_FORMAT_SHIFT_1) +#define PISP_IMAGE_FORMAT_three_channel(fmt) \ + ((fmt) & PISP_IMAGE_FORMAT_THREE_CHANNEL) +#define PISP_IMAGE_FORMAT_single_channel(fmt) \ + (!((fmt) & PISP_IMAGE_FORMAT_THREE_CHANNEL)) +#define PISP_IMAGE_FORMAT_compressed(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_COMPRESSION_MASK) != \ + PISP_IMAGE_FORMAT_UNCOMPRESSED) +#define PISP_IMAGE_FORMAT_sampling_444(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) == \ + PISP_IMAGE_FORMAT_SAMPLING_444) +#define PISP_IMAGE_FORMAT_sampling_422(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) == \ + PISP_IMAGE_FORMAT_SAMPLING_422) +#define PISP_IMAGE_FORMAT_sampling_420(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) == \ + PISP_IMAGE_FORMAT_SAMPLING_420) +#define PISP_IMAGE_FORMAT_order_normal(fmt) \ + (!((fmt) & PISP_IMAGE_FORMAT_ORDER_SWAPPED)) +#define PISP_IMAGE_FORMAT_order_swapped(fmt) \ + ((fmt) & PISP_IMAGE_FORMAT_ORDER_SWAPPED) +#define PISP_IMAGE_FORMAT_interleaved(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) == \ + PISP_IMAGE_FORMAT_PLANARITY_INTERLEAVED) +#define PISP_IMAGE_FORMAT_semiplanar(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) == \ + PISP_IMAGE_FORMAT_PLANARITY_SEMI_PLANAR) +#define PISP_IMAGE_FORMAT_planar(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) == \ + PISP_IMAGE_FORMAT_PLANARITY_PLANAR) +#define PISP_IMAGE_FORMAT_wallpaper(fmt) \ + ((fmt) & PISP_IMAGE_FORMAT_WALLPAPER_ROLL) +#define PISP_IMAGE_FORMAT_HOG(fmt) \ + ((fmt) & \ + (PISP_IMAGE_FORMAT_HOG_SIGNED | PISP_IMAGE_FORMAT_HOG_UNSIGNED)) + +#define PISP_WALLPAPER_WIDTH 128 /* in bytes */ + +struct pisp_bla_config { + __u16 black_level_r; + __u16 black_level_gr; + __u16 black_level_gb; + __u16 black_level_b; + __u16 output_black_level; + __u8 pad[2]; +} __attribute__((packed)); + +struct pisp_wbg_config { + __u16 gain_r; + __u16 gain_g; + __u16 gain_b; + __u8 pad[2]; +} __attribute__((packed)); + +struct pisp_compress_config { + /* value subtracted from incoming data */ + __u16 offset; + __u8 pad; + /* 1 => Companding; 2 => Delta (recommended); 3 => Combined (for HDR) */ + __u8 mode; +} __attribute__((packed)); + +struct pisp_decompress_config { + /* value added to reconstructed data */ + __u16 offset; + __u8 pad; + /* 1 => Companding; 2 => Delta (recommended); 3 => Combined (for HDR) */ + __u8 mode; +} __attribute__((packed)); + +enum pisp_axi_flags { + /* + * round down bursts to end at a 32-byte boundary, to align following + * bursts + */ + PISP_AXI_FLAG_ALIGN = 128, + /* for FE writer: force WSTRB high, to pad output to 16-byte boundary */ + PISP_AXI_FLAG_PAD = 64, + /* for FE writer: Use Output FIFO level to trigger "panic" */ + PISP_AXI_FLAG_PANIC = 32, +}; + +struct pisp_axi_config { + /* + * burst length minus one, which must be in the range 0:15; OR'd with + * flags + */ + __u8 maxlen_flags; + /* { prot[2:0], cache[3:0] } fields, echoed on AXI bus */ + __u8 cache_prot; + /* QoS field(s) (4x4 bits for FE writer; 4 bits for other masters) */ + __u16 qos; +} __attribute__((packed)); + +#endif /* _UAPI_PISP_COMMON_H_ */ -- cgit v1.2.3 From 8f6c2202222faffa0959929d8cd9090254a60831 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Wed, 26 Jun 2024 20:14:34 +0200 Subject: media: uapi: Add meta pixel format for PiSP BE config Add format description for the PiSP Back End configuration parameter buffer. Signed-off-by: Jacopo Mondi Reviewed-by: Naushir Patuck Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index dd6876380fe3..96fc0456081e 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -843,6 +843,9 @@ struct v4l2_pix_format { #define V4L2_META_FMT_RK_ISP1_PARAMS v4l2_fourcc('R', 'K', '1', 'P') /* Rockchip ISP1 3A Parameters */ #define V4L2_META_FMT_RK_ISP1_STAT_3A v4l2_fourcc('R', 'K', '1', 'S') /* Rockchip ISP1 3A Statistics */ +/* Vendor specific - used for RaspberryPi PiSP */ +#define V4L2_META_FMT_RPI_BE_CFG v4l2_fourcc('R', 'P', 'B', 'C') /* PiSP BE configuration */ + #ifdef __KERNEL__ /* * Line-based metadata formats. Remember to update v4l_fill_fmtdesc() when -- cgit v1.2.3 From d260c1224786c870edbae09ef6ecf5f35361821e Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Wed, 26 Jun 2024 20:14:35 +0200 Subject: media: uapi: Add PiSP Compressed RAW Bayer formats Add Raspberry Pi compressed RAW Bayer formats. The compression algorithm description is provided by Nick Hollinghurst from Raspberry Pi. Signed-off-by: Jacopo Mondi Reviewed-by: Naushir Patuck Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 96fc0456081e..4e91362da6da 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -816,6 +816,18 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_IPU3_SGRBG10 v4l2_fourcc('i', 'p', '3', 'G') /* IPU3 packed 10-bit GRBG bayer */ #define V4L2_PIX_FMT_IPU3_SRGGB10 v4l2_fourcc('i', 'p', '3', 'r') /* IPU3 packed 10-bit RGGB bayer */ +/* Raspberry Pi PiSP compressed formats. */ +#define V4L2_PIX_FMT_PISP_COMP1_RGGB v4l2_fourcc('P', 'C', '1', 'R') /* PiSP 8-bit mode 1 compressed RGGB bayer */ +#define V4L2_PIX_FMT_PISP_COMP1_GRBG v4l2_fourcc('P', 'C', '1', 'G') /* PiSP 8-bit mode 1 compressed GRBG bayer */ +#define V4L2_PIX_FMT_PISP_COMP1_GBRG v4l2_fourcc('P', 'C', '1', 'g') /* PiSP 8-bit mode 1 compressed GBRG bayer */ +#define V4L2_PIX_FMT_PISP_COMP1_BGGR v4l2_fourcc('P', 'C', '1', 'B') /* PiSP 8-bit mode 1 compressed BGGR bayer */ +#define V4L2_PIX_FMT_PISP_COMP1_MONO v4l2_fourcc('P', 'C', '1', 'M') /* PiSP 8-bit mode 1 compressed monochrome */ +#define V4L2_PIX_FMT_PISP_COMP2_RGGB v4l2_fourcc('P', 'C', '2', 'R') /* PiSP 8-bit mode 2 compressed RGGB bayer */ +#define V4L2_PIX_FMT_PISP_COMP2_GRBG v4l2_fourcc('P', 'C', '2', 'G') /* PiSP 8-bit mode 2 compressed GRBG bayer */ +#define V4L2_PIX_FMT_PISP_COMP2_GBRG v4l2_fourcc('P', 'C', '2', 'g') /* PiSP 8-bit mode 2 compressed GBRG bayer */ +#define V4L2_PIX_FMT_PISP_COMP2_BGGR v4l2_fourcc('P', 'C', '2', 'B') /* PiSP 8-bit mode 2 compressed BGGR bayer */ +#define V4L2_PIX_FMT_PISP_COMP2_MONO v4l2_fourcc('P', 'C', '2', 'M') /* PiSP 8-bit mode 2 compressed monochrome */ + /* SDR formats - used only for Software Defined Radio devices */ #define V4L2_SDR_FMT_CU8 v4l2_fourcc('C', 'U', '0', '8') /* IQ u8 */ #define V4L2_SDR_FMT_CU16LE v4l2_fourcc('C', 'U', '1', '6') /* IQ u16le */ -- cgit v1.2.3 From d04bccd8c19d601232ed3e3c9e248c0040167d47 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 7 Jun 2024 16:55:37 +0200 Subject: listmount: allow listing in reverse order util-linux is about to implement listmount() and statmount() support. Karel requested the ability to scan the mount table in backwards order because that's what libmount currently does in order to get the latest mount first. We currently don't support this in listmount(). Add a new LISTMOUNT_REVERSE flag to allow listing mounts in reverse order. For example, listing all child mounts of /sys without LISTMOUNT_REVERSE gives: /sys/kernel/security @ mnt_id: 4294968369 /sys/fs/cgroup @ mnt_id: 4294968370 /sys/firmware/efi/efivars @ mnt_id: 4294968371 /sys/fs/bpf @ mnt_id: 4294968372 /sys/kernel/tracing @ mnt_id: 4294968373 /sys/kernel/debug @ mnt_id: 4294968374 /sys/fs/fuse/connections @ mnt_id: 4294968375 /sys/kernel/config @ mnt_id: 4294968376 whereas with LISTMOUNT_REVERSE it gives: /sys/kernel/config @ mnt_id: 4294968376 /sys/fs/fuse/connections @ mnt_id: 4294968375 /sys/kernel/debug @ mnt_id: 4294968374 /sys/kernel/tracing @ mnt_id: 4294968373 /sys/fs/bpf @ mnt_id: 4294968372 /sys/firmware/efi/efivars @ mnt_id: 4294968371 /sys/fs/cgroup @ mnt_id: 4294968370 /sys/kernel/security @ mnt_id: 4294968369 Link: https://lore.kernel.org/r/20240607-vfs-listmount-reverse-v1-4-7877a2bfa5e5@kernel.org Reviewed-by: Josef Bacik Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index ad5478dbad00..88d78de1519f 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -207,5 +207,6 @@ struct mnt_id_req { * Special @mnt_id values that can be passed to listmount */ #define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ #endif /* _UAPI_LINUX_MOUNT_H */ -- cgit v1.2.3 From 09b31295f833031c88419550172703d45c5401e3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 24 Jun 2024 11:49:47 -0400 Subject: fs: export the mount ns id via statmount In order to allow users to iterate through children mount namespaces via listmount we need a way for them to know what the ns id for the mount. Add a new field to statmount called mnt_ns_id which will carry the ns id for the given mount entry. Co-developed-by: Christian Brauner Signed-off-by: Josef Bacik Link: https://lore.kernel.org/r/6dabf437331fb7415d886f7c64b21cb2a50b1c66.1719243756.git.josef@toxicpanda.com Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 88d78de1519f..a07508aee518 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -172,7 +172,8 @@ struct statmount { __u64 propagate_from; /* Propagation from in current namespace */ __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ - __u64 __spare2[50]; + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u64 __spare2[49]; char str[]; /* Variable size part containing strings */ }; @@ -202,6 +203,7 @@ struct mnt_id_req { #define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ /* * Special @mnt_id values that can be passed to listmount -- cgit v1.2.3 From 0a3deb11858ae8a0b3849b5fda45512ad383f0e1 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 24 Jun 2024 11:49:48 -0400 Subject: fs: Allow listmount() in foreign mount namespace Expand struct mnt_id_req to add an optional mnt_ns_id field. When this field is populated, listmount() will be performed on the specified mount namespace, provided the currently application has CAP_SYS_ADMIN in its user namespace and the mount namespace is a child of the current namespace. Co-developed-by: Josef Bacik Signed-off-by: Josef Bacik Link: https://lore.kernel.org/r/49930bdce29a8367a213eb14c1e68e7e49284f86.1719243756.git.josef@toxicpanda.com Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index a07508aee518..ee1559cd6764 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -189,10 +189,12 @@ struct mnt_id_req { __u32 spare; __u64 mnt_id; __u64 param; + __u64 mnt_ns_id; }; /* List of all mnt_id_req versions. */ #define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ /* * @mask bits for statmount(2) -- cgit v1.2.3 From e8e43a1fcc5c07575f37e40f8a2cd78aee46f9a0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 24 Jun 2024 11:49:50 -0400 Subject: fs: add an ioctl to get the mnt ns id from nsfs In order to utilize the listmount() and statmount() extensions that allow us to call them on different namespaces we need a way to get the mnt namespace id from user space. Add an ioctl to nsfs that will allow us to extract the mnt namespace id in order to make these new extensions usable. Signed-off-by: Josef Bacik Link: https://lore.kernel.org/r/180449959d5a756af7306d6bda55f41b9d53e3cb.1719243756.git.josef@toxicpanda.com Signed-off-by: Christian Brauner --- include/uapi/linux/nsfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index a0c8552b64ee..56e8b1639b98 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -15,5 +15,7 @@ #define NS_GET_NSTYPE _IO(NSIO, 0x3) /* Get owner UID (in the caller's user namespace) for a user namespace */ #define NS_GET_OWNER_UID _IO(NSIO, 0x4) +/* Get the id for a mount namespace */ +#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) #endif /* __LINUX_NSFS_H */ -- cgit v1.2.3 From 5b08bd408534bfb3a7cf5778da5b27d4e4fffe12 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Jun 2024 16:11:42 +0200 Subject: pidfs: allow retrieval of namespace file descriptors For users that hold a reference to a pidfd procfs might not even be available nor is it desirable to parse through procfs just for the sake of getting namespace file descriptors for a process. Make it possible to directly retrieve namespace file descriptors from a pidfd. Pidfds already can be used with setns() to change a set of namespaces atomically. Link: https://lore.kernel.org/r/20240627-work-pidfs-v1-4-7e9ab6cc3bb1@kernel.org Reviewed-by: Jeff Layton Reviewed-by: Josef Bacik Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Christian Brauner --- include/uapi/linux/pidfd.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 72ec000a97cd..565fc0629fff 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -5,6 +5,7 @@ #include #include +#include /* Flags for pidfd_open(). */ #define PIDFD_NONBLOCK O_NONBLOCK @@ -15,4 +16,17 @@ #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) +#define PIDFS_IOCTL_MAGIC 0xFF + +#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) +#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) +#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) +#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) +#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) +#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) +#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) +#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) +#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) +#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) + #endif /* _UAPI_LINUX_PIDFD_H */ -- cgit v1.2.3 From 46fb3ba95b93d1887e6dfa02a535e0526062de95 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Jun 2024 17:08:50 +0300 Subject: ethtool: Add an interface for flashing transceiver modules' firmware CMIS compliant modules such as QSFP-DD might be running a firmware that can be updated in a vendor-neutral way by exchanging messages between the host and the module as described in section 7.3.1 of revision 5.2 of the CMIS standard. Add a pair of new ethtool messages that allow: * User space to trigger firmware update of transceiver modules * The kernel to notify user space about the progress of the process The user interface is designed to be asynchronous in order to avoid RTNL being held for too long and to allow several modules to be updated simultaneously. The interface is designed with CMIS compliant modules in mind, but kept generic enough to accommodate future use cases, if these arise. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 18 ++++++++++++++++++ include/uapi/linux/ethtool_netlink.h | 19 +++++++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 8733a3117902..e011384c915c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -877,6 +877,24 @@ enum ethtool_mm_verify_status { ETHTOOL_MM_VERIFY_STATUS_DISABLED, }; +/** + * enum ethtool_module_fw_flash_status - plug-in module firmware flashing status + * @ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED: The firmware flashing process has + * started. + * @ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS: The firmware flashing process + * is in progress. + * @ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED: The firmware flashing process was + * completed successfully. + * @ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR: The firmware flashing process was + * stopped due to an error. + */ +enum ethtool_module_fw_flash_status { + ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED = 1, + ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS, + ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED, + ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR, +}; + /** * struct ethtool_gstrings - string set for data tagging * @cmd: Command number = %ETHTOOL_GSTRINGS diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index d15856c7e001..840dabdc9d88 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -57,6 +57,7 @@ enum { ETHTOOL_MSG_PLCA_GET_STATUS, ETHTOOL_MSG_MM_GET, ETHTOOL_MSG_MM_SET, + ETHTOOL_MSG_MODULE_FW_FLASH_ACT, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -109,6 +110,7 @@ enum { ETHTOOL_MSG_PLCA_NTF, ETHTOOL_MSG_MM_GET_REPLY, ETHTOOL_MSG_MM_NTF, + ETHTOOL_MSG_MODULE_FW_FLASH_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -1018,6 +1020,23 @@ enum { ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) }; +/* MODULE_FW_FLASH */ + +enum { + ETHTOOL_A_MODULE_FW_FLASH_UNSPEC, + ETHTOOL_A_MODULE_FW_FLASH_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, /* string */ + ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, /* u32 */ + ETHTOOL_A_MODULE_FW_FLASH_STATUS, /* u32 */ + ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, /* string */ + ETHTOOL_A_MODULE_FW_FLASH_DONE, /* uint */ + ETHTOOL_A_MODULE_FW_FLASH_TOTAL, /* uint */ + + /* add new constants above here */ + __ETHTOOL_A_MODULE_FW_FLASH_CNT, + ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 -- cgit v1.2.3 From f9af549d1fd31487bbbc666b5b158cfc940ccc17 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 24 Jun 2024 15:40:52 -0400 Subject: fs: export mount options via statmount() statmount() can export arbitrary strings, so utilize the __spare1 slot for a mnt_opts string pointer, and then support asking for and setting the mount options during statmount(). This calls into the helper for showing mount options, which already uses a seq_file, so fits in nicely with our existing mechanism for exporting strings via statmount(). Signed-off-by: Josef Bacik Link: https://lore.kernel.org/r/3aa6bf8bd5d0a21df9ebd63813af8ab532c18276.1719257716.git.josef@toxicpanda.com Reviewed-by: Jeff Layton [brauner: only call sb->s_op->show_options()] Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index ee1559cd6764..225bc366ffcb 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -154,7 +154,7 @@ struct mount_attr { */ struct statmount { __u32 size; /* Total size, including strings */ - __u32 __spare1; + __u32 mnt_opts; /* [str] Mount options of the mount */ __u64 mask; /* What results were written */ __u32 sb_dev_major; /* Device ID */ __u32 sb_dev_minor; @@ -206,6 +206,7 @@ struct mnt_id_req { #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ #define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ /* * Special @mnt_id values that can be passed to listmount -- cgit v1.2.3 From 3e26d9f08fbe0b73e951a5e810fdb7a332b7e37f Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 20 Jun 2024 14:27:22 +0200 Subject: iio: core: Add new DMABUF interface infrastructure Add the necessary infrastructure to the IIO core to support a new optional DMABUF based interface. With this new interface, DMABUF objects (externally created) can be attached to a IIO buffer, and subsequently used for data transfer. A userspace application can then use this interface to share DMABUF objects between several interfaces, allowing it to transfer data in a zero-copy fashion, for instance between IIO and the USB stack. The userspace application can also memory-map the DMABUF objects, and access the sample data directly. The advantage of doing this vs. the read() interface is that it avoids an extra copy of the data between the kernel and userspace. This is particularly userful for high-speed devices which produce several megabytes or even gigabytes of data per second. As part of the interface, 3 new IOCTLs have been added: IIO_BUFFER_DMABUF_ATTACH_IOCTL(int fd): Attach the DMABUF object identified by the given file descriptor to the buffer. IIO_BUFFER_DMABUF_DETACH_IOCTL(int fd): Detach the DMABUF object identified by the given file descriptor from the buffer. Note that closing the IIO buffer's file descriptor will automatically detach all previously attached DMABUF objects. IIO_BUFFER_DMABUF_ENQUEUE_IOCTL(struct iio_dmabuf *): Request a data transfer to/from the given DMABUF object. Its file descriptor, as well as the transfer size and flags are provided in the "iio_dmabuf" structure. These three IOCTLs have to be performed on the IIO buffer's file descriptor, obtained using the IIO_BUFFER_GET_FD_IOCTL() ioctl. Signed-off-by: Paul Cercueil Co-developed-by: Nuno Sa Signed-off-by: Nuno Sa Link: https://patch.msgid.link/20240620122726.41232-4-paul@crapouillou.net Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/buffer.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iio/buffer.h b/include/uapi/linux/iio/buffer.h index 13939032b3f6..c666aa95e532 100644 --- a/include/uapi/linux/iio/buffer.h +++ b/include/uapi/linux/iio/buffer.h @@ -5,6 +5,28 @@ #ifndef _UAPI_IIO_BUFFER_H_ #define _UAPI_IIO_BUFFER_H_ +#include + +/* Flags for iio_dmabuf.flags */ +#define IIO_BUFFER_DMABUF_CYCLIC (1 << 0) +#define IIO_BUFFER_DMABUF_SUPPORTED_FLAGS 0x00000001 + +/** + * struct iio_dmabuf - Descriptor for a single IIO DMABUF object + * @fd: file descriptor of the DMABUF object + * @flags: one or more IIO_BUFFER_DMABUF_* flags + * @bytes_used: number of bytes used in this DMABUF for the data transfer. + * Should generally be set to the DMABUF's size. + */ +struct iio_dmabuf { + __u32 fd; + __u32 flags; + __u64 bytes_used; +}; + #define IIO_BUFFER_GET_FD_IOCTL _IOWR('i', 0x91, int) +#define IIO_BUFFER_DMABUF_ATTACH_IOCTL _IOW('i', 0x92, int) +#define IIO_BUFFER_DMABUF_DETACH_IOCTL _IOW('i', 0x93, int) +#define IIO_BUFFER_DMABUF_ENQUEUE_IOCTL _IOW('i', 0x94, struct iio_dmabuf) #endif /* _UAPI_IIO_BUFFER_H_ */ -- cgit v1.2.3 From 7c8110057b1bb9ab9f47ac0efc554d5c3a53b693 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Jun 2024 14:35:50 -0700 Subject: tcp_metrics: add UAPI to the header guard tcp_metrics' header lacks the customary _UAPI in the header guard. This makes YNL build rules work less seamlessly. We can easily fix that on YNL side, but this could also be problematic if we ever needed to create a kernel-only tcp_metrics.h. Signed-off-by: Jakub Kicinski Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- include/uapi/linux/tcp_metrics.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h index 7cb4a172feed..c48841076998 100644 --- a/include/uapi/linux/tcp_metrics.h +++ b/include/uapi/linux/tcp_metrics.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* tcp_metrics.h - TCP Metrics Interface */ -#ifndef _LINUX_TCP_METRICS_H -#define _LINUX_TCP_METRICS_H +#ifndef _UAPI_LINUX_TCP_METRICS_H +#define _UAPI_LINUX_TCP_METRICS_H #include @@ -58,4 +58,4 @@ enum { #define TCP_METRICS_CMD_MAX (__TCP_METRICS_CMD_MAX - 1) -#endif /* _LINUX_TCP_METRICS_H */ +#endif /* _UAPI_LINUX_TCP_METRICS_H */ -- cgit v1.2.3 From 85674625e0bc25be4dbaa165a556ef6037328379 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Jun 2024 14:35:51 -0700 Subject: tcp_metrics: add netlink protocol spec in YAML Add a protocol spec for tcp_metrics, so that it's accessible via YNL. Useful at the very least for testing fixes. In this episode of "10,000 ways to complicate netlink" the metric nest has defines which are off by 1. iproute2 does: struct rtattr *m[TCP_METRIC_MAX + 1 + 1]; parse_rtattr_nested(m, TCP_METRIC_MAX + 1, a); for (i = 0; i < TCP_METRIC_MAX + 1; i++) { // ... attr = m[i + 1]; This is too weird to support in YNL, add a new set of defines with _correct_ values to the official kernel header. Signed-off-by: Jakub Kicinski Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- include/uapi/linux/tcp_metrics.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h index c48841076998..927c735a5b0e 100644 --- a/include/uapi/linux/tcp_metrics.h +++ b/include/uapi/linux/tcp_metrics.h @@ -27,6 +27,22 @@ enum tcp_metric_index { #define TCP_METRIC_MAX (__TCP_METRIC_MAX - 1) +/* Re-define enum tcp_metric_index, again, using the values carried + * as netlink attribute types. + */ +enum { + TCP_METRICS_A_METRICS_RTT = 1, + TCP_METRICS_A_METRICS_RTTVAR, + TCP_METRICS_A_METRICS_SSTHRESH, + TCP_METRICS_A_METRICS_CWND, + TCP_METRICS_A_METRICS_REODERING, + TCP_METRICS_A_METRICS_RTT_US, + TCP_METRICS_A_METRICS_RTTVAR_US, + + __TCP_METRICS_A_METRICS_MAX +}; +#define TCP_METRICS_A_METRICS_MAX (__TCP_METRICS_A_METRICS_MAX - 1) + enum { TCP_METRICS_ATTR_UNSPEC, TCP_METRICS_ATTR_ADDR_IPV4, /* u32 */ -- cgit v1.2.3 From 6555acdefc758a4dae7038c3f2301f311e287218 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Tue, 2 Jul 2024 19:21:18 +0200 Subject: um: time-travel: support time-travel protocol broadcast messages Add a message type to the time-travel protocol to broadcast a small (64-bit) value to all participants in a simulation. The main use case is to have an identical message come to all participants in a simulation, e.g. to separate out logs for different tests running in a single simulation. Down in the guts of time_travel_handle_message() we can't use printk() and not even printk_deferred(), so just store the message and print it at the start of the userspace() function. Unfortunately this means that other prints in the kernel can actually bypass the message, but in most cases where this is used, for example to separate test logs, userspace will be involved. Also, even if we could use printk_deferred(), we'd still need to flush it out in the userspace() function since otherwise userspace messages might cross it. As a result, this is a reasonable compromise, there's no need to have any core changes and it solves the main use case we have for it. Signed-off-by: Mordechay Goodstein Link: https://patch.msgid.link/20240702192118.c4093bc5b15e.I2ca8d006b67feeb866ac2017af7b741c9e06445a@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/um_timetravel.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/um_timetravel.h b/include/uapi/linux/um_timetravel.h index ca3238222b6d..078ea401aa2a 100644 --- a/include/uapi/linux/um_timetravel.h +++ b/include/uapi/linux/um_timetravel.h @@ -123,6 +123,17 @@ enum um_timetravel_ops { * the simulation. */ UM_TIMETRAVEL_GET_TOD = 8, + + /** + * @UM_TIMETRAVEL_BROADCAST: Send/Receive a broadcast message. + * This message can be used to sync all components in the system + * with a single message, if the calender gets the message, the + * calender broadcast the message to all components, and if a + * component receives it it should act based on it e.g print a + * message to it's log system. + * (calendar <-> host) + */ + UM_TIMETRAVEL_BROADCAST = 9, }; #endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */ -- cgit v1.2.3 From bfb80d8bc92fa70f5b44a57ed2b24d57685fe188 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jul 2024 19:21:21 +0200 Subject: um: add shared memory optimisation for time-travel=ext With external time travel, a LOT of message can end up being exchanged on the socket, taking a significant amount of time just to do that. Add a new shared memory optimisation to that, where a number of changes are made: - the controller sends a client ID and a shared memory FD (and a logging FD we don't use) in the ACK message to the initial START - the shared memory holds the current time and the free_until value, so that there's no need to exchange messages for that - if the client that's running has shared memory support, any client (the running one included) can request the next time it wants to run inside the shared memory, rather than sending a message, by also updating the free_until value - when shared memory is enabled, RUN/WAIT messages no longer have an ACK, further cutting down on messages Together, this can reduce the number of messages very significantly, and reduce overall test/simulation run time. Co-developed-by: Mordechay Goodstein Signed-off-by: Mordechay Goodstein Link: https://patch.msgid.link/20240702192118.6ad0a083f574.Ie41206c8ce4507fe26b991937f47e86c24ca7a31@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/um_timetravel.h | 179 ++++++++++++++++++++++++++++++++++--- 1 file changed, 165 insertions(+), 14 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/um_timetravel.h b/include/uapi/linux/um_timetravel.h index 078ea401aa2a..546a690b0346 100644 --- a/include/uapi/linux/um_timetravel.h +++ b/include/uapi/linux/um_timetravel.h @@ -1,17 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ /* - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019 - 2023 Intel Corporation */ #ifndef _UAPI_LINUX_UM_TIMETRAVEL_H #define _UAPI_LINUX_UM_TIMETRAVEL_H @@ -50,6 +39,36 @@ struct um_timetravel_msg { __u64 time; }; +/* max number of file descriptors that can be sent/received in a message */ +#define UM_TIMETRAVEL_MAX_FDS 2 + +/** + * enum um_timetravel_shared_mem_fds - fds sent in ACK message for START message + */ +enum um_timetravel_shared_mem_fds { + /** + * @UM_TIMETRAVEL_SHARED_MEMFD: Index of the shared memory file + * descriptor in the control message + */ + UM_TIMETRAVEL_SHARED_MEMFD, + /** + * @UM_TIMETRAVEL_SHARED_LOGFD: Index of the logging file descriptor + * in the control message + */ + UM_TIMETRAVEL_SHARED_LOGFD, + UM_TIMETRAVEL_SHARED_MAX_FDS, +}; + +/** + * enum um_timetravel_start_ack - ack-time mask for start message + */ +enum um_timetravel_start_ack { + /** + * @UM_TIMETRAVEL_START_ACK_ID: client ID that controller allocated. + */ + UM_TIMETRAVEL_START_ACK_ID = 0xffff, +}; + /** * enum um_timetravel_ops - Operation codes */ @@ -57,7 +76,9 @@ enum um_timetravel_ops { /** * @UM_TIMETRAVEL_ACK: response (ACK) to any previous message, * this usually doesn't carry any data in the 'time' field - * unless otherwise specified below + * unless otherwise specified below, note: while using shared + * memory no ACK for WAIT and RUN messages, for more info see + * &struct um_timetravel_schedshm. */ UM_TIMETRAVEL_ACK = 0, @@ -136,4 +157,134 @@ enum um_timetravel_ops { UM_TIMETRAVEL_BROADCAST = 9, }; +/* version of struct um_timetravel_schedshm */ +#define UM_TIMETRAVEL_SCHEDSHM_VERSION 2 + +/** + * enum um_timetravel_schedshm_cap - time travel capabilities of every client + * + * These flags must be set immediately after processing the ACK to + * the START message, before sending any message to the controller. + */ +enum um_timetravel_schedshm_cap { + /** + * @UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE: client can read current time + * update internal time request to shared memory and read + * free until and send no Ack on RUN and doesn't expect ACK on + * WAIT. + */ + UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE = 0x1, +}; + +/** + * enum um_timetravel_schedshm_flags - time travel flags of every client + */ +enum um_timetravel_schedshm_flags { + /** + * @UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN: client has a request to run. + * It's set by client when it has a request to run, if (and only + * if) the @running_id points to a client that is able to use + * shared memory, i.e. has %UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE + * (this includes the client itself). Otherwise, a message must + * be used. + */ + UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN = 0x1, +}; + +/** + * DOC: Time travel shared memory overview + * + * The main purpose of the shared memory is to avoid all time travel message + * that don't need any action, for example current time can be held in shared + * memory without the need of any client to send a message UM_TIMETRAVEL_GET + * in order to know what's the time. + * + * Since this is shared memory with all clients and controller and controller + * creates the shared memory space, all time values are absolute to controller + * time. So first time client connects to shared memory mode it should take the + * current_time value in shared memory and keep it internally as a diff to + * shared memory times, and once shared memory is initialized, any interaction + * with the controller must happen in the controller time domain, including any + * messages (for clients that are not using shared memory, the controller will + * handle an offset and make the clients think they start at time zero.) + * + * Along with the shared memory file descriptor is sent to the client a logging + * file descriptor, to have all logs related to shared memory, + * logged into one place. note: to have all logs synced into log file at write, + * file should be flushed (fflush) after writing to it. + * + * To avoid memory corruption, we define below for each field who can write to + * it at what time, defined in the structure fields. + * + * To avoid having to pack this struct, all fields in it must be naturally aligned + * (i.e. aligned to their size). + */ + +/** + * union um_timetravel_schedshm_client - UM time travel client struct + * + * Every entity using the shared memory including the controller has a place in + * the um_timetravel_schedshm clients array, that holds info related to the client + * using the shared memory, and can be set only by the client after it gets the + * fd memory. + * + * @capa: bit fields with client capabilities see + * &enum um_timetravel_schedshm_cap, set by client once after getting the + * shared memory file descriptor. + * @flags: bit fields for flags see &enum um_timetravel_schedshm_flags for doc. + * @req_time: request time to run, set by client on every request it needs. + * @name: unique id sent to the controller by client with START message. + */ +union um_timetravel_schedshm_client { + struct { + __u32 capa; + __u32 flags; + __u64 req_time; + __u64 name; + }; + char reserve[128]; /* reserved for future usage */ +}; + +/** + * struct um_timetravel_schedshm - UM time travel shared memory struct + * + * @hdr: header fields: + * @version: Current version struct UM_TIMETRAVEL_SCHEDSHM_VERSION, + * set by controller once at init, clients must check this after mapping + * and work without shared memory if they cannot handle the indicated + * version. + * @len: Length of all the memory including header (@hdr), clients should once + * per connection first mmap the header and take the length (@len) to remap the entire size. + * This is done in order to support dynamic struct size letting number of + * clients be dynamic based on controller support. + * @free_until: Stores the next request to run by any client, in order for the + * current client to know how long it can still run. A client needs to (at + * least) reload this value immediately after communicating with any other + * client, since the controller will update this field when a new request + * is made by any client. Clients also must update this value when they + * insert/update an own request into the shared memory while not running + * themselves, and the new request is before than the current value. + * current_time: Current time, can only be set by the client in running state + * (indicated by @running_id), though that client may only run until @free_until, + * so it must remain smaller than @free_until. + * @running_id: The current client in state running, set before a client is + * notified that it's now running. + * @max_clients: size of @clients array, set once at init by the controller. + * @clients: clients array see &union um_timetravel_schedshm_client for doc, + * set only by client. + */ +struct um_timetravel_schedshm { + union { + struct { + __u32 version; + __u32 len; + __u64 free_until; + __u64 current_time; + __u16 running_id; + __u16 max_clients; + }; + char hdr[4096]; /* align to 4K page size */ + }; + union um_timetravel_schedshm_client clients[]; +}; #endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */ -- cgit v1.2.3 From 4ecaf7e98a3ae0c843d67c76649ecc694232834b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 2 Jul 2024 15:33:54 -0400 Subject: tracing: Have memmapped ring buffer use ioctl of "R" range 0x20-2F To prevent conflicts with other ioctl numbers to allow strace to have an idea of what is happening, add the range of ioctls for the trace buffer mapping from _IO("T", 0x1) to the range of "R" 0x20 - 0x2F. Link: https://lore.kernel.org/linux-trace-kernel/20240630105322.GA17573@altlinux.org/ Link: https://lore.kernel.org/linux-trace-kernel/20240630213626.GA23566@altlinux.org/ Cc: Jonathan Corbet Fixes: cf9f0f7c4c5bb ("tracing: Allow user-space mapping of the ring-buffer") Link: https://lore.kernel.org/20240702153354.367861db@rorschach.local.home Reported-by: "Dmitry V. Levin" Reviewed-by: Mathieu Desnoyers Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/uapi/linux/trace_mmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h index bd1066754220..c102ef35d11e 100644 --- a/include/uapi/linux/trace_mmap.h +++ b/include/uapi/linux/trace_mmap.h @@ -43,6 +43,6 @@ struct trace_buffer_meta { __u64 Reserved2; }; -#define TRACE_MMAP_IOCTL_GET_READER _IO('T', 0x1) +#define TRACE_MMAP_IOCTL_GET_READER _IO('R', 0x20) #endif /* _TRACE_MMAP_H_ */ -- cgit v1.2.3 From 608f6976c309793ceea37292c54b057dab091944 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 26 Jun 2024 07:35:37 -0700 Subject: perf/x86/intel: Support new data source for Lunar Lake A new PEBS data source format is introduced for the p-core of Lunar Lake. The data source field is extended to 8 bits with new encodings. A new layout is introduced into the union intel_x86_pebs_dse. Introduce the lnl_latency_data() to parse the new format. Enlarge the pebs_data_source[] accordingly to include new encodings. Only the mem load and the mem store events can generate the data source. Introduce INTEL_HYBRID_LDLAT_CONSTRAINT and INTEL_HYBRID_STLAT_CONSTRAINT to mark them. Add two new bits for the new cache-related data src, L2_MHB and MSC. The L2_MHB is short for L2 Miss Handling Buffer, which is similar to LFB (Line Fill Buffer), but to track the L2 Cache misses. The MSC stands for the memory-side cache. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Reviewed-by: Ian Rogers Link: https://lkml.kernel.org/r/20240626143545.480761-6-kan.liang@linux.intel.com --- include/uapi/linux/perf_event.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 3a64499b0f5d..4842c36fdf80 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1349,12 +1349,14 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0x7 available */ +#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ +/* 0x7 available */ #define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ #define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ +#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ #define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ #define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ -- cgit v1.2.3 From 093b0f366567aa3fed85c316f832607069202b23 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:52 +0200 Subject: net: psample: add user cookie Add a user cookie to the sample metadata so that sample emitters can provide more contextual information to samples. If present, send the user cookie in a new attribute: PSAMPLE_ATTR_USER_COOKIE. Reviewed-by: Michal Kubiak Acked-by: Eelco Chaudron Reviewed-by: Simon Horman Reviewed-by: Ido Schimmel Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-2-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/psample.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h index e585db5bf2d2..e80637e1d97b 100644 --- a/include/uapi/linux/psample.h +++ b/include/uapi/linux/psample.h @@ -19,6 +19,7 @@ enum { PSAMPLE_ATTR_LATENCY, /* u64, nanoseconds */ PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */ PSAMPLE_ATTR_PROTO, /* u16 */ + PSAMPLE_ATTR_USER_COOKIE, /* binary, user provided data */ __PSAMPLE_ATTR_MAX }; -- cgit v1.2.3 From 7b1b2b60c63f070e0dfbe072ccaae13168b38d01 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:55 +0200 Subject: net: psample: allow using rate as probability Although not explicitly documented in the psample module itself, the definition of PSAMPLE_ATTR_SAMPLE_RATE seems inherited from act_sample. Quoting tc-sample(8): "RATE of 100 will lead to an average of one sampled packet out of every 100 observed." With this semantics, the rates that we can express with an unsigned 32-bits number are very unevenly distributed and concentrated towards "sampling few packets". For example, we can express a probability of 2.32E-8% but we cannot express anything between 100% and 50%. For sampling applications that are capable of sampling a decent amount of packets, this sampling rate semantics is not very useful. Add a new flag to the uAPI that indicates that the sampling rate is expressed in scaled probability, this is: - 0 is 0% probability, no packets get sampled. - U32_MAX is 100% probability, all packets get sampled. Reviewed-by: Aaron Conole Acked-by: Eelco Chaudron Reviewed-by: Ido Schimmel Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-5-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/psample.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h index e80637e1d97b..b765f0e81f20 100644 --- a/include/uapi/linux/psample.h +++ b/include/uapi/linux/psample.h @@ -8,7 +8,11 @@ enum { PSAMPLE_ATTR_ORIGSIZE, PSAMPLE_ATTR_SAMPLE_GROUP, PSAMPLE_ATTR_GROUP_SEQ, - PSAMPLE_ATTR_SAMPLE_RATE, + PSAMPLE_ATTR_SAMPLE_RATE, /* u32, ratio between observed and + * sampled packets or scaled probability + * if PSAMPLE_ATTR_SAMPLE_PROBABILITY + * is set. + */ PSAMPLE_ATTR_DATA, PSAMPLE_ATTR_GROUP_REFCOUNT, PSAMPLE_ATTR_TUNNEL, @@ -20,6 +24,10 @@ enum { PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */ PSAMPLE_ATTR_PROTO, /* u16 */ PSAMPLE_ATTR_USER_COOKIE, /* binary, user provided data */ + PSAMPLE_ATTR_SAMPLE_PROBABILITY,/* no argument, interpret rate in + * PSAMPLE_ATTR_SAMPLE_RATE as a + * probability scaled 0 - U32_MAX. + */ __PSAMPLE_ATTR_MAX }; -- cgit v1.2.3 From aae0b82b46cb5004bdf82a000c004d69a0885c33 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:56 +0200 Subject: net: openvswitch: add psample action Add support for a new action: psample. This action accepts a u32 group id and a variable-length cookie and uses the psample multicast group to make the packet available for observability. The maximum length of the user-defined cookie is set to 16, same as tc_cookie, to discourage using cookies that will not be offloadable. Reviewed-by: Michal Kubiak Reviewed-by: Aaron Conole Reviewed-by: Ilya Maximets Acked-by: Eelco Chaudron Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-6-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/openvswitch.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index efc82c318fa2..3dd653748725 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -914,6 +914,31 @@ struct check_pkt_len_arg { }; #endif +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16 +/** + * enum ovs_psample_attr - Attributes for %OVS_ACTION_ATTR_PSAMPLE + * action. + * + * @OVS_PSAMPLE_ATTR_GROUP: 32-bit number to identify the source of the + * sample. + * @OVS_PSAMPLE_ATTR_COOKIE: An optional variable-length binary cookie that + * contains user-defined metadata. The maximum length is + * OVS_PSAMPLE_COOKIE_MAX_SIZE bytes. + * + * Sends the packet to the psample multicast group with the specified group and + * cookie. It is possible to combine this action with the + * %OVS_ACTION_ATTR_TRUNC action to limit the size of the sample. + */ +enum ovs_psample_attr { + OVS_PSAMPLE_ATTR_GROUP = 1, /* u32 number. */ + OVS_PSAMPLE_ATTR_COOKIE, /* Optional, user specified cookie. */ + + /* private: */ + __OVS_PSAMPLE_ATTR_MAX +}; + +#define OVS_PSAMPLE_ATTR_MAX (__OVS_PSAMPLE_ATTR_MAX - 1) + /** * enum ovs_action_attr - Action types. * @@ -966,6 +991,8 @@ struct check_pkt_len_arg { * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS * argument. * @OVS_ACTION_ATTR_DROP: Explicit drop action. + * @OVS_ACTION_ATTR_PSAMPLE: Send a sample of the packet to external observers + * via psample. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -1004,6 +1031,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ OVS_ACTION_ATTR_DROP, /* u32 error code. */ + OVS_ACTION_ATTR_PSAMPLE, /* Nested OVS_PSAMPLE_ATTR_*. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ -- cgit v1.2.3 From 71763d8a8203c28178d7be7f18af73d4dddb36ba Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:57 +0200 Subject: net: openvswitch: store sampling probability in cb. When a packet sample is observed, the sampling rate that was used is important to estimate the real frequency of such event. Store the probability of the parent sample action in the skb's cb area and use it in psample action to pass it down to psample module. Reviewed-by: Aaron Conole Acked-by: Eelco Chaudron Reviewed-by: Ilya Maximets Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-7-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/openvswitch.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 3dd653748725..3a701bd1f31b 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -649,7 +649,8 @@ enum ovs_flow_attr { * Actions are passed as nested attributes. * * Executes the specified actions with the given probability on a per-packet - * basis. + * basis. Nested actions will be able to access the probability value of the + * parent @OVS_ACTION_ATTR_SAMPLE. */ enum ovs_sample_attr { OVS_SAMPLE_ATTR_UNSPEC, -- cgit v1.2.3 From e46296002113b4556baffd5dc68c4f9e22dae13a Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Thu, 4 Jul 2024 10:11:56 +0200 Subject: net: ethtool: pse-pd: Expand C33 PSE status with class, power and extended state This update expands the status information provided by ethtool for PSE c33. It includes details such as the detected class, current power delivered, and extended state information. Reviewed-by: Oleksij Rempel Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240704-feature_poe_power_cap-v6-1-320003204264@bootlin.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 191 +++++++++++++++++++++++++++++++++++ include/uapi/linux/ethtool_netlink.h | 4 + 2 files changed, 195 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index e011384c915c..230110b97029 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -752,6 +752,197 @@ enum ethtool_module_power_mode { ETHTOOL_MODULE_POWER_MODE_HIGH, }; +/** + * enum ethtool_c33_pse_ext_state - groups of PSE extended states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION: Group of error_condition states + * @ETHTOOL_C33_PSE_EXT_STATE_MR_MPS_VALID: Group of mr_mps_valid states + * @ETHTOOL_C33_PSE_EXT_STATE_MR_PSE_ENABLE: Group of mr_pse_enable states + * @ETHTOOL_C33_PSE_EXT_STATE_OPTION_DETECT_TED: Group of option_detect_ted + * states + * @ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM: Group of option_vport_lim states + * @ETHTOOL_C33_PSE_EXT_STATE_OVLD_DETECTED: Group of ovld_detected states + * @ETHTOOL_C33_PSE_EXT_STATE_PD_DLL_POWER_TYPE: Group of pd_dll_power_type + * states + * @ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE: Group of power_not_available + * states + * @ETHTOOL_C33_PSE_EXT_STATE_SHORT_DETECTED: Group of short_detected states + */ +enum ethtool_c33_pse_ext_state { + ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION = 1, + ETHTOOL_C33_PSE_EXT_STATE_MR_MPS_VALID, + ETHTOOL_C33_PSE_EXT_STATE_MR_PSE_ENABLE, + ETHTOOL_C33_PSE_EXT_STATE_OPTION_DETECT_TED, + ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM, + ETHTOOL_C33_PSE_EXT_STATE_OVLD_DETECTED, + ETHTOOL_C33_PSE_EXT_STATE_PD_DLL_POWER_TYPE, + ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE, + ETHTOOL_C33_PSE_EXT_STATE_SHORT_DETECTED, +}; + +/** + * enum ethtool_c33_pse_ext_substate_mr_mps_valid - mr_mps_valid states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_DETECTED_UNDERLOAD: Underload + * state + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_CONNECTION_OPEN: Port is not + * connected + * + * The PSE monitors either the DC or AC Maintain Power Signature + * (MPS, see 33.2.9.1). This variable indicates the presence or absence of + * a valid MPS. + */ +enum ethtool_c33_pse_ext_substate_mr_mps_valid { + ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_DETECTED_UNDERLOAD = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_CONNECTION_OPEN, +}; + +/** + * enum ethtool_c33_pse_ext_substate_error_condition - error_condition states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_NON_EXISTING_PORT: Non-existing + * port number + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNDEFINED_PORT: Undefined port + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_INTERNAL_HW_FAULT: Internal + * hardware fault + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_COMM_ERROR_AFTER_FORCE_ON: + * Communication error after force on + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS: Unknown + * port status + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_TURN_OFF: Host + * crash turn off + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_FORCE_SHUTDOWN: + * Host crash force shutdown + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_CONFIG_CHANGE: Configuration + * change + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_DETECTED_OVER_TEMP: Over + * temperature detected + * + * error_condition is a variable indicating the status of + * implementation-specific fault conditions or optionally other system faults + * that prevent the PSE from meeting the specifications in Table 33–11 and that + * require the PSE not to source power. These error conditions are different + * from those monitored by the state diagrams in Figure 33–10. + */ +enum ethtool_c33_pse_ext_substate_error_condition { + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_NON_EXISTING_PORT = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNDEFINED_PORT, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_INTERNAL_HW_FAULT, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_COMM_ERROR_AFTER_FORCE_ON, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_TURN_OFF, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_FORCE_SHUTDOWN, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_CONFIG_CHANGE, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_DETECTED_OVER_TEMP, +}; + +/** + * enum ethtool_c33_pse_ext_substate_mr_pse_enable - mr_pse_enable states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_PSE_ENABLE_DISABLE_PIN_ACTIVE: Disable + * pin active + * + * mr_pse_enable is control variable that selects PSE operation and test + * functions. + */ +enum ethtool_c33_pse_ext_substate_mr_pse_enable { + ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_PSE_ENABLE_DISABLE_PIN_ACTIVE = 1, +}; + +/** + * enum ethtool_c33_pse_ext_substate_option_detect_ted - option_detect_ted + * states functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_DET_IN_PROCESS: Detection + * in process + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_CONNECTION_CHECK_ERROR: + * Connection check error + * + * option_detect_ted is a variable indicating if detection can be performed + * by the PSE during the ted_timer interval. + */ +enum ethtool_c33_pse_ext_substate_option_detect_ted { + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_DET_IN_PROCESS = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_CONNECTION_CHECK_ERROR, +}; + +/** + * enum ethtool_c33_pse_ext_substate_option_vport_lim - option_vport_lim states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_HIGH_VOLTAGE: Main supply + * voltage is high + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_LOW_VOLTAGE: Main supply + * voltage is low + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_VOLTAGE_INJECTION: Voltage + * injection into the port + * + * option_vport_lim is an optional variable indicates if VPSE is out of the + * operating range during normal operating state. + */ +enum ethtool_c33_pse_ext_substate_option_vport_lim { + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_HIGH_VOLTAGE = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_LOW_VOLTAGE, + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_VOLTAGE_INJECTION, +}; + +/** + * enum ethtool_c33_pse_ext_substate_ovld_detected - ovld_detected states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OVLD_DETECTED_OVERLOAD: Overload state + * + * ovld_detected is a variable indicating if the PSE output current has been + * in an overload condition (see 33.2.7.6) for at least TCUT of a one-second + * sliding time. + */ +enum ethtool_c33_pse_ext_substate_ovld_detected { + ETHTOOL_C33_PSE_EXT_SUBSTATE_OVLD_DETECTED_OVERLOAD = 1, +}; + +/** + * enum ethtool_c33_pse_ext_substate_power_not_available - power_not_available + * states functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_BUDGET_EXCEEDED: Power + * budget exceeded for the controller + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PORT_PW_LIMIT_EXCEEDS_CONTROLLER_BUDGET: + * Configured port power limit exceeded controller power budget + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PD_REQUEST_EXCEEDS_PORT_LIMIT: + * Power request from PD exceeds port limit + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_HW_PW_LIMIT: Power + * denied due to Hardware power limit + * + * power_not_available is a variable that is asserted in an + * implementation-dependent manner when the PSE is no longer capable of + * sourcing sufficient power to support the attached PD. Sufficient power + * is defined by classification; see 33.2.6. + */ +enum ethtool_c33_pse_ext_substate_power_not_available { + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_BUDGET_EXCEEDED = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PORT_PW_LIMIT_EXCEEDS_CONTROLLER_BUDGET, + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PD_REQUEST_EXCEEDS_PORT_LIMIT, + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_HW_PW_LIMIT, +}; + +/** + * enum ethtool_c33_pse_ext_substate_short_detected - short_detected states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_SHORT_DETECTED_SHORT_CONDITION: Short + * condition was detected + * + * short_detected is a variable indicating if the PSE output current has been + * in a short circuit condition for TLIM within a sliding window (see 33.2.7.7). + */ +enum ethtool_c33_pse_ext_substate_short_detected { + ETHTOOL_C33_PSE_EXT_SUBSTATE_SHORT_DETECTED_SHORT_CONDITION = 1, +}; + /** * enum ethtool_pse_types - Types of PSE controller. * @ETHTOOL_PSE_UNKNOWN: Type of PSE controller is unknown diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 840dabdc9d88..b8895da001bc 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -939,6 +939,10 @@ enum { ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */ ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */ ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */ + ETHTOOL_A_C33_PSE_PW_CLASS, /* u32 */ + ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ + ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ + ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ /* add new constants above here */ __ETHTOOL_A_PSE_CNT, -- cgit v1.2.3 From 30d7b6727724ce3729f2cb5b8be985d2d1931d2b Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Thu, 4 Jul 2024 10:12:00 +0200 Subject: net: ethtool: Add new power limit get and set features This patch expands the status information provided by ethtool for PSE c33 with available power limit and available power limit ranges. It also adds a call to pse_ethtool_set_pw_limit() to configure the PSE control power limit. Reviewed-by: Oleksij Rempel Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240704-feature_poe_power_cap-v6-5-320003204264@bootlin.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index b8895da001bc..6d5bdcc67631 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -930,6 +930,12 @@ enum { }; /* Power Sourcing Equipment */ +enum { + ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, + ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, /* u32 */ +}; + enum { ETHTOOL_A_PSE_UNSPEC, ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ @@ -943,6 +949,8 @@ enum { ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ + ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, /* nest - _C33_PSE_PW_LIMIT_* */ /* add new constants above here */ __ETHTOOL_A_PSE_CNT, -- cgit v1.2.3 From 6a68aed602d7b770552c7f890d0c30c53725c7b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:21 -0400 Subject: NFSv4: Add new attribute delegation definitions Add the attribute delegation XDR definitions from the spec. Signed-off-by: Tom Haynes Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/uapi/linux/nfs4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 1d2043708bf1..afd7e32906c3 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -69,6 +69,8 @@ #define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL 0x10000 #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000 +#define NFS4_SHARE_WANT_DELEG_TIMESTAMPS 0x100000 + #define NFS4_CDFC4_FORE 0x1 #define NFS4_CDFC4_BACK 0x2 #define NFS4_CDFC4_BOTH 0x3 -- cgit v1.2.3 From d2a00cceb93a4df2afaceb836297628d0aeec7ad Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:33 -0400 Subject: NFSv4: Detect support for OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION If the server supports the NFSv4.2 protocol extension to optimise away returning a stateid when it returns a delegation, then we cache that information in another capability flag. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/uapi/linux/nfs4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index afd7e32906c3..caf4db2fcbb9 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -46,6 +46,7 @@ #define NFS4_OPEN_RESULT_CONFIRM 0x0002 #define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 #define NFS4_OPEN_RESULT_PRESERVE_UNLINKED 0x0008 +#define NFS4_OPEN_RESULT_NO_OPEN_STATEID 0x0010 #define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020 #define NFS4_SHARE_ACCESS_MASK 0x000F @@ -70,6 +71,7 @@ #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000 #define NFS4_SHARE_WANT_DELEG_TIMESTAMPS 0x100000 +#define NFS4_SHARE_WANT_OPEN_XOR_DELEGATION 0x200000 #define NFS4_CDFC4_FORE 0x1 #define NFS4_CDFC4_BACK 0x2 -- cgit v1.2.3 From 00506072d70829196849e835ee4ef0b350b61fb9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Jun 2024 08:59:10 -0400 Subject: nfsd: new netlink ops to get/set server pool_mode Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/uapi/linux/nfsd_netlink.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfsd_netlink.h b/include/uapi/linux/nfsd_netlink.h index 24c86dbc7ed5..887cbd12b695 100644 --- a/include/uapi/linux/nfsd_netlink.h +++ b/include/uapi/linux/nfsd_netlink.h @@ -70,6 +70,14 @@ enum { NFSD_A_SERVER_SOCK_MAX = (__NFSD_A_SERVER_SOCK_MAX - 1) }; +enum { + NFSD_A_POOL_MODE_MODE = 1, + NFSD_A_POOL_MODE_NPOOLS, + + __NFSD_A_POOL_MODE_MAX, + NFSD_A_POOL_MODE_MAX = (__NFSD_A_POOL_MODE_MAX - 1) +}; + enum { NFSD_CMD_RPC_STATUS_GET = 1, NFSD_CMD_THREADS_SET, @@ -78,6 +86,8 @@ enum { NFSD_CMD_VERSION_GET, NFSD_CMD_LISTENER_SET, NFSD_CMD_LISTENER_GET, + NFSD_CMD_POOL_MODE_SET, + NFSD_CMD_POOL_MODE_GET, __NFSD_CMD_MAX, NFSD_CMD_MAX = (__NFSD_CMD_MAX - 1) -- cgit v1.2.3 From e6c06ca8f21d1cdb444c708e385d86a54bc5fc60 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:30 +0200 Subject: wifi: cfg80211: add support for advertising multiple radios belonging to a wiphy The prerequisite for MLO support in cfg80211/mac80211 is that all the links participating in MLO must be from the same wiphy/ieee80211_hw. To meet this expectation, some drivers may need to group multiple discrete hardware each acting as a link in MLO under single wiphy. With this change, supported frequencies and interface combinations of each individual radio are reported to user space. This allows user space to figure out the limitations of what combination of channels can be used concurrently. Even for non-MLO devices, this improves support for devices capable of running on multiple channels at the same time. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/18a88f9ce82b1c9f7c12f1672430eaf2bb0be295.1720514221.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 65 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6ae3997061b6..f97f5adc8d51 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2052,6 +2052,10 @@ enum nl80211_commands { * @NL80211_ATTR_INTERFACE_COMBINATIONS: Nested attribute listing the supported * interface combinations. In each nested item, it contains attributes * defined in &enum nl80211_if_combination_attrs. + * If the wiphy uses multiple radios (@NL80211_ATTR_WIPHY_RADIOS is set), + * this attribute contains the interface combinations of the first radio. + * See @NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS for the global wiphy + * combinations for the sum of all radios. * @NL80211_ATTR_SOFTWARE_IFTYPES: Nested attribute (just like * %NL80211_ATTR_SUPPORTED_IFTYPES) containing the interface types that * are managed in software: interfaces of these types aren't subject to @@ -2856,6 +2860,14 @@ enum nl80211_commands { * %NL80211_CMD_ASSOCIATE indicating the SPP A-MSDUs * are used on this connection * + * @NL80211_ATTR_WIPHY_RADIOS: Nested attribute describing physical radios + * belonging to this wiphy. See &enum nl80211_wiphy_radio_attrs. + * + * @NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS: Nested attribute listing the + * supported interface combinations for all radios combined. In each + * nested item, it contains attributes defined in + * &enum nl80211_if_combination_attrs. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3401,6 +3413,9 @@ enum nl80211_attrs { NL80211_ATTR_ASSOC_SPP_AMSDU, + NL80211_ATTR_WIPHY_RADIOS, + NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -8005,4 +8020,54 @@ enum nl80211_ap_settings_flags { NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT = 1 << 1, }; +/** + * enum nl80211_wiphy_radio_attrs - wiphy radio attributes + * + * @__NL80211_WIPHY_RADIO_ATTR_INVALID: Invalid + * + * @NL80211_WIPHY_RADIO_ATTR_INDEX: Index of this radio (u32) + * @NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE: Frequency range supported by this + * radio. Attribute may be present multiple times. + * @NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION: Supported interface + * combination for this radio. Attribute may be present multiple times + * and contains attributes defined in &enum nl80211_if_combination_attrs. + * + * @__NL80211_WIPHY_RADIO_ATTR_LAST: Internal + * @NL80211_WIPHY_RADIO_ATTR_MAX: Highest attribute + */ +enum nl80211_wiphy_radio_attrs { + __NL80211_WIPHY_RADIO_ATTR_INVALID, + + NL80211_WIPHY_RADIO_ATTR_INDEX, + NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE, + NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION, + + /* keep last */ + __NL80211_WIPHY_RADIO_ATTR_LAST, + NL80211_WIPHY_RADIO_ATTR_MAX = __NL80211_WIPHY_RADIO_ATTR_LAST - 1, +}; + +/** + * enum nl80211_wiphy_radio_freq_range - wiphy radio frequency range + * + * @__NL80211_WIPHY_RADIO_FREQ_ATTR_INVALID: Invalid + * + * @NL80211_WIPHY_RADIO_FREQ_ATTR_START: Frequency range start (u32). + * The unit is kHz. + * @NL80211_WIPHY_RADIO_FREQ_ATTR_END: Frequency range end (u32). + * The unit is kHz. + * + * @__NL80211_WIPHY_RADIO_FREQ_ATTR_LAST: Internal + * @NL80211_WIPHY_RADIO_FREQ_ATTR_MAX: Highest attribute + */ +enum nl80211_wiphy_radio_freq_range { + __NL80211_WIPHY_RADIO_FREQ_ATTR_INVALID, + + NL80211_WIPHY_RADIO_FREQ_ATTR_START, + NL80211_WIPHY_RADIO_FREQ_ATTR_END, + + __NL80211_WIPHY_RADIO_FREQ_ATTR_LAST, + NL80211_WIPHY_RADIO_FREQ_ATTR_MAX = __NL80211_WIPHY_RADIO_FREQ_ATTR_LAST - 1, +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From c714f15860fcca02fe0fd7c3f1f1fc35b1768ac1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:39 +0800 Subject: iommufd: Add fault and response message definitions iommu_hwpt_pgfaults represent fault messages that the userspace can retrieve. Multiple iommu_hwpt_pgfaults might be put in an iopf group, with the IOMMU_PGFAULT_FLAGS_LAST_PAGE flag set only for the last iommu_hwpt_pgfault. An iommu_hwpt_page_response is a response message that the userspace should send to the kernel after finishing handling a group of fault messages. The @dev_id, @pasid, and @grpid fields in the message identify an outstanding iopf group for a device. The @cookie field, which matches the cookie field of the last fault in the group, will be used by the kernel to look up the pending message. Link: https://lore.kernel.org/r/20240702063444.105814-6-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 83 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 1dfeaa2e649e..4d89ed97b533 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -692,4 +692,87 @@ struct iommu_hwpt_invalidate { __u32 __reserved; }; #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE) + +/** + * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault + * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is + * valid. + * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group. + */ +enum iommu_hwpt_pgfault_flags { + IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0), + IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1), +}; + +/** + * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault + * @IOMMU_PGFAULT_PERM_READ: request for read permission + * @IOMMU_PGFAULT_PERM_WRITE: request for write permission + * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the + * Execute Requested bit set in PASID TLP Prefix. + * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the + * Privileged Mode Requested bit set in PASID TLP + * Prefix. + */ +enum iommu_hwpt_pgfault_perm { + IOMMU_PGFAULT_PERM_READ = (1 << 0), + IOMMU_PGFAULT_PERM_WRITE = (1 << 1), + IOMMU_PGFAULT_PERM_EXEC = (1 << 2), + IOMMU_PGFAULT_PERM_PRIV = (1 << 3), +}; + +/** + * struct iommu_hwpt_pgfault - iommu page fault data + * @flags: Combination of enum iommu_hwpt_pgfault_flags + * @dev_id: id of the originated device + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @perm: Combination of enum iommu_hwpt_pgfault_perm + * @addr: Fault address + * @length: a hint of how much data the requestor is expecting to fetch. For + * example, if the PRI initiator knows it is going to do a 10MB + * transfer, it could fill in 10MB and the OS could pre-fault in + * 10MB of IOVA. It's default to 0 if there's no such hint. + * @cookie: kernel-managed cookie identifying a group of fault messages. The + * cookie number encoded in the last page fault of the group should + * be echoed back in the response message. + */ +struct iommu_hwpt_pgfault { + __u32 flags; + __u32 dev_id; + __u32 pasid; + __u32 grpid; + __u32 perm; + __u64 addr; + __u32 length; + __u32 cookie; +}; + +/** + * enum iommufd_page_response_code - Return status of fault handlers + * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables + * populated, retry the access. This is the + * "Success" defined in PCI 10.4.2.1. + * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the + * access. This is the "Invalid Request" in PCI + * 10.4.2.1. + * @IOMMUFD_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from + * this device if possible. This is the "Response + * Failure" in PCI 10.4.2.1. + */ +enum iommufd_page_response_code { + IOMMUFD_PAGE_RESP_SUCCESS = 0, + IOMMUFD_PAGE_RESP_INVALID, + IOMMUFD_PAGE_RESP_FAILURE, +}; + +/** + * struct iommu_hwpt_page_response - IOMMU page fault response + * @cookie: The kernel-managed cookie reported in the fault message. + * @code: One of response code in enum iommufd_page_response_code. + */ +struct iommu_hwpt_page_response { + __u32 cookie; + __u32 code; +}; #endif -- cgit v1.2.3 From 07838f7fd529c8a6de44b601d4b7057e6c8d36ed Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:40 +0800 Subject: iommufd: Add iommufd fault object An iommufd fault object provides an interface for delivering I/O page faults to user space. These objects are created and destroyed by user space, and they can be associated with or dissociated from hardware page table objects during page table allocation or destruction. User space interacts with the fault object through a file interface. This interface offers a straightforward and efficient way for user space to handle page faults. It allows user space to read fault messages sequentially and respond to them by writing to the same file. The file interface supports reading messages in poll mode, so it's recommended that user space applications use io_uring to enhance read and write efficiency. A fault object can be associated with any iopf-capable iommufd_hw_pgtable during the pgtable's allocation. All I/O page faults triggered by devices when accessing the I/O addresses of an iommufd_hw_pgtable are routed through the fault object to user space. Similarly, user space's responses to these page faults are routed back to the iommu device driver through the same fault object. Link: https://lore.kernel.org/r/20240702063444.105814-7-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 4d89ed97b533..70b8a38fcd46 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -50,6 +50,7 @@ enum { IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, IOMMUFD_CMD_HWPT_INVALIDATE, + IOMMUFD_CMD_FAULT_QUEUE_ALLOC, }; /** @@ -775,4 +776,21 @@ struct iommu_hwpt_page_response { __u32 cookie; __u32 code; }; + +/** + * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC) + * @size: sizeof(struct iommu_fault_alloc) + * @flags: Must be 0 + * @out_fault_id: The ID of the new FAULT + * @out_fault_fd: The fd of the new FAULT + * + * Explicitly allocate a fault handling object. + */ +struct iommu_fault_alloc { + __u32 size; + __u32 flags; + __u32 out_fault_id; + __u32 out_fault_fd; +}; +#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC) #endif -- cgit v1.2.3 From 34765cbc679c59ea5d952d738d2d16bf4aadc497 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:42 +0800 Subject: iommufd: Associate fault object with iommufd_hw_pgtable When allocating a user iommufd_hw_pagetable, the user space is allowed to associate a fault object with the hw_pagetable by specifying the fault object ID in the page table allocation data and setting the IOMMU_HWPT_FAULT_ID_VALID flag bit. On a successful return of hwpt allocation, the user can retrieve and respond to page faults by reading and writing the file interface of the fault object. Once a fault object has been associated with a hwpt, the hwpt is iopf-capable, indicated by hwpt->fault is non NULL. Attaching, detaching, or replacing an iopf-capable hwpt to an RID or PASID will differ from those that are not iopf-capable. Link: https://lore.kernel.org/r/20240702063444.105814-9-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 70b8a38fcd46..ede2b464a761 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -357,10 +357,13 @@ struct iommu_vfio_ioas { * the parent HWPT in a nesting configuration. * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is * enforced on device attachment + * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is + * valid. */ enum iommufd_hwpt_alloc_flags { IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, + IOMMU_HWPT_FAULT_ID_VALID = 1 << 2, }; /** @@ -412,6 +415,9 @@ enum iommu_hwpt_data_type { * @data_type: One of enum iommu_hwpt_data_type * @data_len: Length of the type specific data * @data_uptr: User pointer to the type specific data + * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of + * IOMMU_HWPT_FAULT_ID_VALID is set. + * @__reserved2: Padding to 64-bit alignment. Must be 0. * * Explicitly allocate a hardware page table object. This is the same object * type that is returned by iommufd_device_attach() and represents the @@ -442,6 +448,8 @@ struct iommu_hwpt_alloc { __u32 data_type; __u32 data_len; __aligned_u64 data_uptr; + __u32 fault_id; + __u32 __reserved2; }; #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) -- cgit v1.2.3 From 87128f520a6b7573f8086f2c89b9381ee7e85e51 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 10 Jun 2024 06:22:56 +0930 Subject: btrfs: uapi: record temporary super flags used by btrfstune [BUG] There is a bug report that a canceled checksum conversion (still experimental feature) results in unexpected super block flags: csum_type 0 (crc32c) csum_size 4 csum 0x14973811 [match] bytenr 65536 flags 0x1000000001 ( WRITTEN | CHANGING_FSID_V2 ) magic _BHRfS_M [match] While for a filesystem with ongoing checksum conversion it should have either CHANGING_DATA_CSUM or CHANGING_META_CSUM. [CAUSE] It turns out that, due to btrfs-progs keeps its own extra flags inside its own ctree.h headers, not the shared uapi headers, we have conflicting super flags: kernel-shared/uapi/btrfs_tree.h:#define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34) kernel-shared/uapi/btrfs_tree.h:#define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35) kernel-shared/uapi/btrfs_tree.h:#define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36) kernel-shared/ctree.h:#define BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM (1ULL << 36) kernel-shared/ctree.h:#define BTRFS_SUPER_FLAG_CHANGING_META_CSUM (1ULL << 37) Note that CHANGING_FSID_V2 is conflicting with CHANGING_DATA_CSUM. [FIX] The proper fix would be done inside btrfs-progs, but to keep everything properly recorded, we should have everything inside the same uapi header. Copy all the new flags into uapi header, and change the value for CHANGING_DATA_CSUM and CHANGING_META_CSUM, while keep the value of CHANGING_BG_TREE untouched. Thankfully checksum change is still only experimental and all those CHANGING_* flags are transient (only for btrfs-progs to resume the conversion, and kernel will reject them all), the damage is still minor. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index d24e8e121507..c7636331e566 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -777,6 +777,14 @@ struct btrfs_stripe_extent { #define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35) #define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36) +/* + * Those are temporaray flags utilized by btrfs-progs to do offline conversion. + * They are rejected by kernel. + * But still keep them all here to avoid conflicts. + */ +#define BTRFS_SUPER_FLAG_CHANGING_BG_TREE (1ULL << 38) +#define BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM (1ULL << 39) +#define BTRFS_SUPER_FLAG_CHANGING_META_CSUM (1ULL << 40) /* * items in the extent btree are used to record the objectid of the -- cgit v1.2.3 From 2422547e99f99f952d1a37f26b63289f749d5fcd Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 13 May 2024 18:01:43 +0200 Subject: btrfs: remove raid-stripe-tree encoding field from stripe_extent Remove the encoding field from 'struct btrfs_stripe_extent'. It was originally intended to encode the RAID type as well as if we're a data or a parity stripe. But the RAID type can be inferred form the block-group and the data vs. parity differentiation can be done easier with adding a new key type for parity stripes in the RAID stripe tree. Signed-off-by: Johannes Thumshirn Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index c7636331e566..fc29d273845d 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -747,21 +747,9 @@ struct btrfs_raid_stride { __le64 physical; } __attribute__ ((__packed__)); -/* The stripe_extent::encoding, 1:1 mapping of enum btrfs_raid_types. */ -#define BTRFS_STRIPE_RAID0 1 -#define BTRFS_STRIPE_RAID1 2 -#define BTRFS_STRIPE_DUP 3 -#define BTRFS_STRIPE_RAID10 4 -#define BTRFS_STRIPE_RAID5 5 -#define BTRFS_STRIPE_RAID6 6 -#define BTRFS_STRIPE_RAID1C3 7 -#define BTRFS_STRIPE_RAID1C4 8 - struct btrfs_stripe_extent { - __u8 encoding; - __u8 reserved[7]; /* An array of raid strides this stripe is composed of. */ - struct btrfs_raid_stride strides[]; + __DECLARE_FLEX_ARRAY(struct btrfs_raid_stride, strides); } __attribute__ ((__packed__)); #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) -- cgit v1.2.3 From 861f96a785149a0062cce6578e0fa7cb95435a7e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 10 Jul 2024 16:33:39 +0800 Subject: iommufd: Remove IOMMUFD_PAGE_RESP_FAILURE The response code of IOMMUFD_PAGE_RESP_FAILURE was defined to be equivalent to the "Response Failure" in PCI spec, section 10.4.2.1. This response code indicates that one or more pages within the associated request group have encountered or caused an unrecoverable error. Therefore, this response disables the PRI at the function. Modern I/O virtualization technologies, like SR-IOV, share PRI among the assignable device units. Therefore, a response failure on one unit might cause I/O failure on other units. Remove this response code so that user space can only respond with SUCCESS or INVALID. The VMM is recommended to emulate a failure response as a PRI reset, or PRI disable and changing to a non-PRI domain. Fixes: c714f15860fc ("iommufd: Add fault and response message definitions") Link: https://lore.kernel.org/r/20240710083341.44617-2-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index ede2b464a761..e31385b75d0b 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -765,14 +765,10 @@ struct iommu_hwpt_pgfault { * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the * access. This is the "Invalid Request" in PCI * 10.4.2.1. - * @IOMMUFD_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from - * this device if possible. This is the "Response - * Failure" in PCI 10.4.2.1. */ enum iommufd_page_response_code { IOMMUFD_PAGE_RESP_SUCCESS = 0, IOMMUFD_PAGE_RESP_INVALID, - IOMMUFD_PAGE_RESP_FAILURE, }; /** -- cgit v1.2.3 From 79cf9c6ee44e0af7e1383365d29c64eece6665bb Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Fri, 28 Jun 2024 15:29:42 +0200 Subject: media: uapi: pisp_be_config: Drop BIT() from uAPI The pisp_be_config.h uAPI header file contains a bit-field definition that uses the BIT() helper macro. As the BIT() identifier is not defined in userspace, drop it from the uAPI header. Fixes: c6c49bac8770 ("media: uapi: Add Raspberry Pi PiSP Back End uAPI") Signed-off-by: Jacopo Mondi Reviewed-by: Tomi Valkeinen Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/media/raspberrypi/pisp_be_config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/media/raspberrypi/pisp_be_config.h b/include/uapi/linux/media/raspberrypi/pisp_be_config.h index 1684ae068d4f..27d0cc417d6b 100644 --- a/include/uapi/linux/media/raspberrypi/pisp_be_config.h +++ b/include/uapi/linux/media/raspberrypi/pisp_be_config.h @@ -146,7 +146,7 @@ struct pisp_be_dpc_config { */ struct pisp_be_geq_config { __u16 offset; -#define PISP_BE_GEQ_SHARPER BIT(15) +#define PISP_BE_GEQ_SHARPER (1U << 15) #define PISP_BE_GEQ_SLOPE ((1 << 10) - 1) /* top bit is the "sharper" flag, slope value is bottom 10 bits */ __u16 slope_sharper; -- cgit v1.2.3 From 1991a09e6d7c1dff5efea65b5b31082332f2e64e Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Fri, 28 Jun 2024 15:29:43 +0200 Subject: media: uapi: pisp_common: Add 32 bpp format test Add definition and test for 32-bits image formats to the pisp_common.h uAPI header. Fixes: c6c49bac8770 ("media: uapi: Add Raspberry Pi PiSP Back End uAPI") Signed-off-by: Jacopo Mondi Acked-by: David Plowman Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/media/raspberrypi/pisp_common.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/media/raspberrypi/pisp_common.h b/include/uapi/linux/media/raspberrypi/pisp_common.h index b2522e29c976..74d096188233 100644 --- a/include/uapi/linux/media/raspberrypi/pisp_common.h +++ b/include/uapi/linux/media/raspberrypi/pisp_common.h @@ -72,6 +72,8 @@ enum pisp_image_format { PISP_IMAGE_FORMAT_SHIFT_8 = 0x00080000, PISP_IMAGE_FORMAT_SHIFT_MASK = 0x000f0000, + PISP_IMAGE_FORMAT_BPP_32 = 0x00100000, + PISP_IMAGE_FORMAT_UNCOMPRESSED = 0x00000000, PISP_IMAGE_FORMAT_COMPRESSION_MODE_1 = 0x01000000, PISP_IMAGE_FORMAT_COMPRESSION_MODE_2 = 0x02000000, @@ -134,6 +136,7 @@ enum pisp_image_format { PISP_IMAGE_FORMAT_PLANARITY_PLANAR) #define PISP_IMAGE_FORMAT_wallpaper(fmt) \ ((fmt) & PISP_IMAGE_FORMAT_WALLPAPER_ROLL) +#define PISP_IMAGE_FORMAT_bpp_32(fmt) ((fmt) & PISP_IMAGE_FORMAT_BPP_32) #define PISP_IMAGE_FORMAT_HOG(fmt) \ ((fmt) & \ (PISP_IMAGE_FORMAT_HOG_SIGNED | PISP_IMAGE_FORMAT_HOG_UNSIGNED)) -- cgit v1.2.3 From f5cee94f2dfe5b7625452b831f7629369ce68a7b Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Fri, 28 Jun 2024 15:29:44 +0200 Subject: media: uapi: pisp_common: Capitalize all macros The macro used to inspect an image format characteristic use a mixture of capitalized and non-capitalized letters, which is rather unusual for the Linux kernel style. Capitalize all identifiers. Fixes: c6c49bac8770 ("media: uapi: Add Raspberry Pi PiSP Back End uAPI") Signed-off-by: Jacopo Mondi Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/media/raspberrypi/pisp_common.h | 38 +++++++++++----------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/media/raspberrypi/pisp_common.h b/include/uapi/linux/media/raspberrypi/pisp_common.h index 74d096188233..cbdccfed1261 100644 --- a/include/uapi/linux/media/raspberrypi/pisp_common.h +++ b/include/uapi/linux/media/raspberrypi/pisp_common.h @@ -92,51 +92,51 @@ enum pisp_image_format { PISP_IMAGE_FORMAT_THREE_CHANNEL }; -#define PISP_IMAGE_FORMAT_bps_8(fmt) \ +#define PISP_IMAGE_FORMAT_BPS_8(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_8) -#define PISP_IMAGE_FORMAT_bps_10(fmt) \ +#define PISP_IMAGE_FORMAT_BPS_10(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_10) -#define PISP_IMAGE_FORMAT_bps_12(fmt) \ +#define PISP_IMAGE_FORMAT_BPS_12(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_12) -#define PISP_IMAGE_FORMAT_bps_16(fmt) \ +#define PISP_IMAGE_FORMAT_BPS_16(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_16) -#define PISP_IMAGE_FORMAT_bps(fmt) \ +#define PISP_IMAGE_FORMAT_BPS(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) ? \ 8 + (2 << (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) - 1)) : 8) -#define PISP_IMAGE_FORMAT_shift(fmt) \ +#define PISP_IMAGE_FORMAT_SHIFT(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_SHIFT_MASK) / PISP_IMAGE_FORMAT_SHIFT_1) -#define PISP_IMAGE_FORMAT_three_channel(fmt) \ +#define PISP_IMAGE_FORMAT_THREE_CHANNEL(fmt) \ ((fmt) & PISP_IMAGE_FORMAT_THREE_CHANNEL) -#define PISP_IMAGE_FORMAT_single_channel(fmt) \ +#define PISP_IMAGE_FORMAT_SINGLE_CHANNEL(fmt) \ (!((fmt) & PISP_IMAGE_FORMAT_THREE_CHANNEL)) -#define PISP_IMAGE_FORMAT_compressed(fmt) \ +#define PISP_IMAGE_FORMAT_COMPRESSED(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_COMPRESSION_MASK) != \ PISP_IMAGE_FORMAT_UNCOMPRESSED) -#define PISP_IMAGE_FORMAT_sampling_444(fmt) \ +#define PISP_IMAGE_FORMAT_SAMPLING_444(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) == \ PISP_IMAGE_FORMAT_SAMPLING_444) -#define PISP_IMAGE_FORMAT_sampling_422(fmt) \ +#define PISP_IMAGE_FORMAT_SAMPLING_422(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) == \ PISP_IMAGE_FORMAT_SAMPLING_422) -#define PISP_IMAGE_FORMAT_sampling_420(fmt) \ +#define PISP_IMAGE_FORMAT_SAMPLING_420(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) == \ PISP_IMAGE_FORMAT_SAMPLING_420) -#define PISP_IMAGE_FORMAT_order_normal(fmt) \ +#define PISP_IMAGE_FORMAT_ORDER_NORMAL(fmt) \ (!((fmt) & PISP_IMAGE_FORMAT_ORDER_SWAPPED)) -#define PISP_IMAGE_FORMAT_order_swapped(fmt) \ +#define PISP_IMAGE_FORMAT_ORDER_SWAPPED(fmt) \ ((fmt) & PISP_IMAGE_FORMAT_ORDER_SWAPPED) -#define PISP_IMAGE_FORMAT_interleaved(fmt) \ +#define PISP_IMAGE_FORMAT_INTERLEAVED(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) == \ PISP_IMAGE_FORMAT_PLANARITY_INTERLEAVED) -#define PISP_IMAGE_FORMAT_semiplanar(fmt) \ +#define PISP_IMAGE_FORMAT_SEMIPLANAR(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) == \ PISP_IMAGE_FORMAT_PLANARITY_SEMI_PLANAR) -#define PISP_IMAGE_FORMAT_planar(fmt) \ +#define PISP_IMAGE_FORMAT_PLANAR(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) == \ PISP_IMAGE_FORMAT_PLANARITY_PLANAR) -#define PISP_IMAGE_FORMAT_wallpaper(fmt) \ +#define PISP_IMAGE_FORMAT_WALLPAPER(fmt) \ ((fmt) & PISP_IMAGE_FORMAT_WALLPAPER_ROLL) -#define PISP_IMAGE_FORMAT_bpp_32(fmt) ((fmt) & PISP_IMAGE_FORMAT_BPP_32) +#define PISP_IMAGE_FORMAT_BPP_32(fmt) ((fmt) & PISP_IMAGE_FORMAT_BPP_32) #define PISP_IMAGE_FORMAT_HOG(fmt) \ ((fmt) & \ (PISP_IMAGE_FORMAT_HOG_SIGNED | PISP_IMAGE_FORMAT_HOG_UNSIGNED)) -- cgit v1.2.3 From 639065c621df9bad9d94373084e1c568f81d34e0 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Fri, 28 Jun 2024 15:29:45 +0200 Subject: media: uapi: pisp_be_config: Re-sort pisp_be_tiles_config The order of the members of pisp_be_tiles_config is relevant as the driver logic assumes 'config' to be at offset 0. Re-sort the member to match the driver's expectations. Fixes: c6c49bac8770 ("media: uapi: Add Raspberry Pi PiSP Back End uAPI") Signed-off-by: Jacopo Mondi Acked-by: Naushir Patuck Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/media/raspberrypi/pisp_be_config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/media/raspberrypi/pisp_be_config.h b/include/uapi/linux/media/raspberrypi/pisp_be_config.h index 27d0cc417d6b..f8650ca92bf8 100644 --- a/include/uapi/linux/media/raspberrypi/pisp_be_config.h +++ b/include/uapi/linux/media/raspberrypi/pisp_be_config.h @@ -919,9 +919,9 @@ struct pisp_tile { * @config: PiSP Back End configuration */ struct pisp_be_tiles_config { + struct pisp_be_config config; struct pisp_tile tiles[PISP_BACK_END_NUM_TILES]; __u32 num_tiles; - struct pisp_be_config config; } __attribute__((packed)); #endif /* _UAPI_PISP_BE_CONFIG_H_ */ -- cgit v1.2.3 From 1c2c57bd439ecaffc728139a0a00701bee886d0a Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Fri, 28 Jun 2024 15:29:46 +0200 Subject: media: uapi: pisp_be_config: Add extra config fields Complete the pisp_be_config strcture by adding fields that even if not written to the HW are relevant to complete the uAPI and put it in par with the BSP driver. Fixes: c6c49bac8770 ("media: uapi: Add Raspberry Pi PiSP Back End uAPI") Signed-off-by: Jacopo Mondi Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- .../uapi/linux/media/raspberrypi/pisp_be_config.h | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/media/raspberrypi/pisp_be_config.h b/include/uapi/linux/media/raspberrypi/pisp_be_config.h index f8650ca92bf8..cbeb714f4d61 100644 --- a/include/uapi/linux/media/raspberrypi/pisp_be_config.h +++ b/include/uapi/linux/media/raspberrypi/pisp_be_config.h @@ -716,6 +716,13 @@ struct pisp_be_hog_buffer_config { /** * struct pisp_be_config - RaspberryPi PiSP Back End Processing configuration * + * @input_buffer: Input buffer addresses + * @tdn_input_buffer: TDN input buffer addresses + * @stitch_input_buffer: Stitch input buffer addresses + * @tdn_output_buffer: TDN output buffer addresses + * @stitch_output_buffer: Stitch output buffer addresses + * @output_buffer: Output buffers addresses + * @hog_buffer: HOG buffer addresses * @global: Global PiSP configuration * @input_format: Input image format * @decompress: Decompress configuration @@ -753,8 +760,30 @@ struct pisp_be_hog_buffer_config { * @resample: Resampling configuration * @output_format: Output format configuration * @hog: HOG configuration + * @axi: AXI bus configuration + * @lsc_extra: LSC extra info + * @cac_extra: CAC extra info + * @downscale_extra: Downscaler extra info + * @resample_extra: Resample extra info + * @crop: Crop configuration + * @hog_format: HOG format info + * @dirty_flags_bayer: Bayer enable dirty flags + * (:c:type:`pisp_be_bayer_enable`) + * @dirty_flags_rgb: RGB enable dirty flags + * (:c:type:`pisp_be_rgb_enable`) + * @dirty_flags_extra: Extra dirty flags */ struct pisp_be_config { + /* I/O configuration: */ + struct pisp_be_input_buffer_config input_buffer; + struct pisp_be_tdn_input_buffer_config tdn_input_buffer; + struct pisp_be_stitch_input_buffer_config stitch_input_buffer; + struct pisp_be_tdn_output_buffer_config tdn_output_buffer; + struct pisp_be_stitch_output_buffer_config stitch_output_buffer; + struct pisp_be_output_buffer_config + output_buffer[PISP_BACK_END_NUM_OUTPUTS]; + struct pisp_be_hog_buffer_config hog_buffer; + /* Processing configuration: */ struct pisp_be_global_config global; struct pisp_image_format_config input_format; struct pisp_decompress_config decompress; @@ -793,6 +822,18 @@ struct pisp_be_config { struct pisp_be_output_format_config output_format[PISP_BACK_END_NUM_OUTPUTS]; struct pisp_be_hog_config hog; + struct pisp_be_axi_config axi; + /* Non-register fields: */ + struct pisp_be_lsc_extra lsc_extra; + struct pisp_be_cac_extra cac_extra; + struct pisp_be_downscale_extra + downscale_extra[PISP_BACK_END_NUM_OUTPUTS]; + struct pisp_be_resample_extra resample_extra[PISP_BACK_END_NUM_OUTPUTS]; + struct pisp_be_crop_config crop; + struct pisp_image_format_config hog_format; + __u32 dirty_flags_bayer; /* these use pisp_be_bayer_enable */ + __u32 dirty_flags_rgb; /* use pisp_be_rgb_enable */ + __u32 dirty_flags_extra; /* these use pisp_be_dirty_t */ } __attribute__((packed)); /** -- cgit v1.2.3 From bc1a5cd002116552db4c3541e91f8a5b1b0cf65d Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Wed, 10 Apr 2024 15:07:28 -0700 Subject: KVM: Add KVM_PRE_FAULT_MEMORY vcpu ioctl to pre-populate guest memory Add a new ioctl KVM_PRE_FAULT_MEMORY in the KVM common code. It iterates on the memory range and calls the arch-specific function. The implementation is optional and enabled by a Kconfig symbol. Suggested-by: Sean Christopherson Signed-off-by: Isaku Yamahata Reviewed-by: Rick Edgecombe Message-ID: <819322b8f25971f2b9933bfa4506e618508ad782.1712785629.git.isaku.yamahata@intel.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d03842abae57..e5af8c692dc0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -917,6 +917,7 @@ struct kvm_enable_cap { #define KVM_CAP_MEMORY_ATTRIBUTES 233 #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 +#define KVM_CAP_PRE_FAULT_MEMORY 236 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1548,4 +1549,13 @@ struct kvm_create_guest_memfd { __u64 reserved[6]; }; +#define KVM_PRE_FAULT_MEMORY _IOWR(KVMIO, 0xd5, struct kvm_pre_fault_memory) + +struct kvm_pre_fault_memory { + __u64 gpa; + __u64 size; + __u64 flags; + __u64 padding[5]; +}; + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From ed5d583a88a9207b866c14ba834984c6f3c51d23 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 27 Jun 2024 10:08:54 -0700 Subject: fs/procfs: implement efficient VMA querying API for /proc//maps /proc//maps file is extremely useful in practice for various tasks involving figuring out process memory layout, what files are backing any given memory range, etc. One important class of applications that absolutely rely on this are profilers/stack symbolizers (perf tool being one of them). Patterns of use differ, but they generally would fall into two categories. In on-demand pattern, a profiler/symbolizer would normally capture stack trace containing absolute memory addresses of some functions, and would then use /proc//maps file to find corresponding backing ELF files (normally, only executable VMAs are of interest), file offsets within them, and then continue from there to get yet more information (ELF symbols, DWARF information) to get human-readable symbolic information. This pattern is used by Meta's fleet-wide profiler, as one example. In preprocessing pattern, application doesn't know the set of addresses of interest, so it has to fetch all relevant VMAs (again, probably only executable ones), store or cache them, then proceed with profiling and stack trace capture. Once done, it would do symbolization based on stored VMA information. This can happen at much later point in time. This patterns is used by perf tool, as an example. In either case, there are both performance and correctness requirement involved. This address to VMA information translation has to be done as efficiently as possible, but also not miss any VMA (especially in the case of loading/unloading shared libraries). In practice, correctness can't be guaranteed (due to process dying before VMA data can be captured, or shared library being unloaded, etc), but any effort to maximize the chance of finding the VMA is appreciated. Unfortunately, for all the /proc//maps file universality and usefulness, it doesn't fit the above use cases 100%. First, it's main purpose is to emit all VMAs sequentially, but in practice captured addresses would fall only into a smaller subset of all process' VMAs, mainly containing executable text. Yet, library would need to parse most or all of the contents to find needed VMAs, as there is no way to skip VMAs that are of no use. Efficient library can do the linear pass and it is still relatively efficient, but it's definitely an overhead that can be avoided, if there was a way to do more targeted querying of the relevant VMA information. Second, it's a text based interface, which makes its programmatic use from applications and libraries more cumbersome and inefficient due to the need to handle text parsing to get necessary pieces of information. The overhead is actually payed both by kernel, formatting originally binary VMA data into text, and then by user space application, parsing it back into binary data for further use. For the on-demand pattern of usage, described above, another problem when writing generic stack trace symbolization library is an unfortunate performance-vs-correctness tradeoff that needs to be made. Library has to make a decision to either cache parsed contents of /proc//maps (after initial processing) to service future requests (if application requests to symbolize another set of addresses (for the same process), captured at some later time, which is typical for periodic/continuous profiling cases) to avoid higher costs of re-parsing this file. Or it has to choose to cache the contents in memory to speed up future requests. In the former case, more memory is used for the cache and there is a risk of getting stale data if application loads or unloads shared libraries, or otherwise changed its set of VMAs somehow, e.g., through additional mmap() calls. In the latter case, it's the performance hit that comes from re-opening the file and re-parsing its contents all over again. This patch aims to solve this problem by providing a new API built on top of /proc//maps. It's meant to address both non-selectiveness and text nature of /proc//maps, by giving user more control of what sort of VMA(s) needs to be queried, and being binary-based interface eliminates the overhead of text formatting (on kernel side) and parsing (on user space side). It's also designed to be extensible and forward/backward compatible by including required struct size field, which user has to provide. We use established copy_struct_from_user() approach to handle extensibility. User has a choice to pick either getting VMA that covers provided address or -ENOENT if none is found (exact, least surprising, case). Or, with an extra query flag (PROCMAP_QUERY_COVERING_OR_NEXT_VMA), they can get either VMA that covers the address (if there is one), or the closest next VMA (i.e., VMA with the smallest vm_start > addr). The latter allows more efficient use, but, given it could be a surprising behavior, requires an explicit opt-in. There is another query flag that is useful for some use cases. PROCMAP_QUERY_FILE_BACKED_VMA instructs this API to only return file-backed VMAs. Combining this with PROCMAP_QUERY_COVERING_OR_NEXT_VMA makes it possible to efficiently iterate only file-backed VMAs of the process, which is what profilers/symbolizers are normally interested in. All the above querying flags can be combined with (also optional) set of desired VMA permissions flags. This allows to, for example, iterate only an executable subset of VMAs, which is what preprocessing pattern, used by perf tool, would benefit from, as the assumption is that captured stack traces would have addresses of executable code. This saves time by skipping non-executable VMAs altogether efficienty. All these querying flags (modifiers) are orthogonal and can be combined in a semantically meaningful and natural way. Basing this ioctl()-based API on top of /proc//maps's FD makes sense given it's querying the same set of VMA data. It's also benefitial because permission checks for /proc//maps is performed at open time once, and the actual data read of text contents of /proc//maps is done without further permission checks. We piggyback on this pattern with ioctl()-based API as well, as that's a desired property. Both for performance reasons, but also for security and flexibility reasons. Allowing application to open an FD for /proc/self/maps without any extra capabilities, and then passing it to some sort of profiling agent through Unix-domain socket, would allow such profiling agent to not require some of the capabilities that are otherwise expected when opening /proc//maps file for *another* process. This is a desirable property for some more restricted setups. This new ioctl-based implementation doesn't interfere with seq_file-based implementation of /proc//maps textual interface, and so could be used together or independently without paying any price for that. Note also, that fetching VMA name (e.g., backing file path, or special hard-coded or user-provided names) is optional just like build ID. If user sets vma_name_size to zero, kernel code won't attempt to retrieve it, saving resources. Earlier versions of this patch set were adding per-VMA locking, which is why we have a code structure that is ready for abstracting mmap_lock vs vm_lock differences (query_vma_setup(), query_vma_teardown(), and query_vma_find_by_addr()), but given anon_vma_name() is not yet compatible with per-VMA locking, initial implementation sticks to using only mmap_lock for now. It will be easy to add back per-VMA locking once all the pieces are ready later on. Which is why we keep existing code structure with setup/teardown/query helper functions. [andrii@kernel.org: improve PROCMAP_QUERY's compat mode handling] Link: https://lkml.kernel.org/r/20240701174805.1897344-2-andrii@kernel.org Link: https://lkml.kernel.org/r/20240627170900.1672542-3-andrii@kernel.org Signed-off-by: Andrii Nakryiko Acked-by: Liam R. Howlett Cc: Alexey Dobriyan Cc: Al Viro Cc: Christian Brauner Cc: Greg Kroah-Hartman Cc: Mike Rapoport (IBM) Cc: Suren Baghdasaryan Cc: Andi Kleen Cc: Arnd Bergmann Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/uapi/linux/fs.h | 130 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 129 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 45e4e64fd664..5d440f9b5d92 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -333,8 +333,10 @@ typedef int __bitwise __kernel_rwf_t; #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ RWF_APPEND | RWF_NOAPPEND) +#define PROCFS_IOCTL_MAGIC 'f' + /* Pagemap ioctl */ -#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) +#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg) /* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */ #define PAGE_IS_WPALLOWED (1 << 0) @@ -393,4 +395,130 @@ struct pm_scan_arg { __u64 return_mask; }; +/* /proc//maps ioctl */ +#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query) + +enum procmap_query_flags { + /* + * VMA permission flags. + * + * Can be used as part of procmap_query.query_flags field to look up + * only VMAs satisfying specified subset of permissions. E.g., specifying + * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs, + * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only + * return read/write VMAs, though both executable/non-executable and + * private/shared will be ignored. + * + * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags + * field to specify actual VMA permissions. + */ + PROCMAP_QUERY_VMA_READABLE = 0x01, + PROCMAP_QUERY_VMA_WRITABLE = 0x02, + PROCMAP_QUERY_VMA_EXECUTABLE = 0x04, + PROCMAP_QUERY_VMA_SHARED = 0x08, + /* + * Query modifier flags. + * + * By default VMA that covers provided address is returned, or -ENOENT + * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest + * VMA with vma_start > addr will be returned if no covering VMA is + * found. + * + * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that + * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA + * to iterate all VMAs with file backing. + */ + PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10, + PROCMAP_QUERY_FILE_BACKED_VMA = 0x20, +}; + +/* + * Input/output argument structured passed into ioctl() call. It can be used + * to query a set of VMAs (Virtual Memory Areas) of a process. + * + * Each field can be one of three kinds, marked in a short comment to the + * right of the field: + * - "in", input argument, user has to provide this value, kernel doesn't modify it; + * - "out", output argument, kernel sets this field with VMA data; + * - "in/out", input and output argument; user provides initial value (used + * to specify maximum allowable buffer size), and kernel sets it to actual + * amount of data written (or zero, if there is no data). + * + * If matching VMA is found (according to criterias specified by + * query_addr/query_flags, all the out fields are filled out, and ioctl() + * returns 0. If there is no matching VMA, -ENOENT will be returned. + * In case of any other error, negative error code other than -ENOENT is + * returned. + * + * Most of the data is similar to the one returned as text in /proc//maps + * file, but procmap_query provides more querying flexibility. There are no + * consistency guarantees between subsequent ioctl() calls, but data returned + * for matched VMA is self-consistent. + */ +struct procmap_query { + /* Query struct size, for backwards/forward compatibility */ + __u64 size; + /* + * Query flags, a combination of enum procmap_query_flags values. + * Defines query filtering and behavior, see enum procmap_query_flags. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_flags; /* in */ + /* + * Query address. By default, VMA that covers this address will + * be looked up. PROCMAP_QUERY_* flags above modify this default + * behavior further. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_addr; /* in */ + /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */ + __u64 vma_start; /* out */ + __u64 vma_end; /* out */ + /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */ + __u64 vma_flags; /* out */ + /* VMA backing page size granularity. */ + __u64 vma_page_size; /* out */ + /* + * VMA file offset. If VMA has file backing, this specifies offset + * within the file that VMA's start address corresponds to. + * Is set to zero if VMA has no backing file. + */ + __u64 vma_offset; /* out */ + /* Backing file's inode number, or zero, if VMA has no backing file. */ + __u64 inode; /* out */ + /* Backing file's device major/minor number, or zero, if VMA has no backing file. */ + __u32 dev_major; /* out */ + __u32 dev_minor; /* out */ + /* + * If set to non-zero value, signals the request to return VMA name + * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix + * appended, if file was unlinked from FS) for matched VMA. VMA name + * can also be some special name (e.g., "[heap]", "[stack]") or could + * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME). + * + * Kernel will set this field to zero, if VMA has no associated name. + * Otherwise kernel will return actual amount of bytes filled in + * user-supplied buffer (see vma_name_addr field below), including the + * terminating zero. + * + * If VMA name is longer that user-supplied maximum buffer size, + * -E2BIG error is returned. + * + * If this field is set to non-zero value, vma_name_addr should point + * to valid user space memory buffer of at least vma_name_size bytes. + * If set to zero, vma_name_addr should be set to zero as well + */ + __u32 vma_name_size; /* in/out */ + /* + * User-supplied address of a buffer of at least vma_name_size bytes + * for kernel to fill with matched VMA's name (see vma_name_size field + * description above for details). + * + * Should be set to zero if VMA name should not be returned. + */ + __u64 vma_name_addr; /* in */ +}; + #endif /* _UAPI_LINUX_FS_H */ -- cgit v1.2.3 From bfc69fd05ef9b6416f4811aafafb7f2b34daa000 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 27 Jun 2024 10:08:55 -0700 Subject: fs/procfs: add build ID fetching to PROCMAP_QUERY API The need to get ELF build ID reliably is an important aspect when dealing with profiling and stack trace symbolization, and /proc//maps textual representation doesn't help with this. To get backing file's ELF build ID, application has to first resolve VMA, then use it's start/end address range to follow a special /proc//map_files/- symlink to open the ELF file (this is necessary because backing file might have been removed from the disk or was already replaced with another binary in the same file path. Such approach, beyond just adding complexity of having to do a bunch of extra work, has extra security implications. Because application opens underlying ELF file and needs read access to its entire contents (as far as kernel is concerned), kernel puts additional capable() checks on following /proc//map_files/- symlink. And that makes sense in general. But in the case of build ID, profiler/symbolizer doesn't need the contents of ELF file, per se. It's only build ID that is of interest, and ELF build ID itself doesn't provide any sensitive information. So this patch adds a way to request backing file's ELF build ID along the rest of VMA information in the same API. User has control over whether this piece of information is requested or not by either setting build_id_size field to zero or non-zero maximum buffer size they provided through build_id_addr field (which encodes user pointer as __u64 field). This is a completely optional piece of information, and so has no performance implications for user cases that don't care about build ID, while improving performance and simplifying the setup for those application that do need it. Kernel already implements build ID fetching, which is used from BPF subsystem. We are reusing this code here, but plan a follow up changes to make it work better under more relaxed assumption (compared to what existing code assumes) of being called from user process context, in which page faults are allowed. BPF-specific implementation currently bails out if necessary part of ELF file is not paged in, all due to extra BPF-specific restrictions (like the need to fetch build ID in restrictive contexts such as NMI handler). [andrii@kernel.org: fix integer to pointer cast warning in do_procmap_query()] Link: https://lkml.kernel.org/r/20240701174805.1897344-1-andrii@kernel.org Link: https://lkml.kernel.org/r/20240627170900.1672542-4-andrii@kernel.org Signed-off-by: Andrii Nakryiko Acked-by: Liam R. Howlett Cc: Alexey Dobriyan Cc: Al Viro Cc: Christian Brauner Cc: Greg Kroah-Hartman Cc: Mike Rapoport (IBM) Cc: Suren Baghdasaryan Cc: Andi Kleen Cc: Arnd Bergmann Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/uapi/linux/fs.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 5d440f9b5d92..2a4a5f50c98e 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -511,6 +511,26 @@ struct procmap_query { * If set to zero, vma_name_addr should be set to zero as well */ __u32 vma_name_size; /* in/out */ + /* + * If set to non-zero value, signals the request to extract and return + * VMA's backing file's build ID, if the backing file is an ELF file + * and it contains embedded build ID. + * + * Kernel will set this field to zero, if VMA has no backing file, + * backing file is not an ELF file, or ELF file has no build ID + * embedded. + * + * Build ID is a binary value (not a string). Kernel will set + * build_id_size field to exact number of bytes used for build ID. + * If build ID is requested and present, but needs more bytes than + * user-supplied maximum buffer size (see build_id_addr field below), + * -E2BIG error will be returned. + * + * If this field is set to non-zero value, build_id_addr should point + * to valid user space memory buffer of at least build_id_size bytes. + * If set to zero, build_id_addr should be set to zero as well + */ + __u32 build_id_size; /* in/out */ /* * User-supplied address of a buffer of at least vma_name_size bytes * for kernel to fill with matched VMA's name (see vma_name_size field @@ -519,6 +539,14 @@ struct procmap_query { * Should be set to zero if VMA name should not be returned. */ __u64 vma_name_addr; /* in */ + /* + * User-supplied address of a buffer of at least build_id_size bytes + * for kernel to fill with matched VMA's ELF build ID, if available + * (see build_id_size field description above for details). + * + * Should be set to zero if build ID should not be returned. + */ + __u64 build_id_addr; /* in */ }; #endif /* _UAPI_LINUX_FS_H */ -- cgit v1.2.3 From 2c1583290b08c5aa9005178a573be8f329de2976 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Hor=C3=A1k=20=282N=29?= Date: Fri, 12 Jul 2024 17:07:06 +0200 Subject: net: phy: bcm54811: New link mode for BroadR-Reach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new link mode necessary for 10 MBit single-pair connection in BroadR-Reach mode on bcm5481x PHY by Broadcom. This new link mode, 10baseT1BRR, is known as 1BR10 in the Broadcom terminology. Another link mode to be used is 1BR100 and it is already present as 100baseT1, because Broadcom's 1BR100 became 100baseT1 (IEEE 802.3bw). Signed-off-by: Kamil Horák (2N) Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20240712150709.3134474-2-kamilh@axis.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 230110b97029..4a0a6e703483 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2054,6 +2054,7 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_10baseT1S_Full_BIT = 99, ETHTOOL_LINK_MODE_10baseT1S_Half_BIT = 100, ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT = 101, + ETHTOOL_LINK_MODE_10baseT1BRR_Full_BIT = 102, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS -- cgit v1.2.3 From 136a8066676e593cd29627219467fc222c8f3b04 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 11 Jul 2024 21:11:03 -0300 Subject: iommufd: Put constants for all the uAPI enums Relying on position in the enum makes it subtly harder when doing merge resolutions or backporting as it is easy to grab a patch and not notice it is a uAPI change with a differently ordered enum. This may become a bigger problem in next cycles when iommu_hwpt_invalidate_data_type and other per-driver enums have patches flowing through different trees. So lets start including constants for all the uAPI enums to make this safer. No functional change. Link: https://lore.kernel.org/r/0-v1-2c06ec044924+133-iommufd_uapi_const_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Tested-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index e31385b75d0b..4dde745cfb7e 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -37,20 +37,20 @@ enum { IOMMUFD_CMD_BASE = 0x80, IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE, - IOMMUFD_CMD_IOAS_ALLOC, - IOMMUFD_CMD_IOAS_ALLOW_IOVAS, - IOMMUFD_CMD_IOAS_COPY, - IOMMUFD_CMD_IOAS_IOVA_RANGES, - IOMMUFD_CMD_IOAS_MAP, - IOMMUFD_CMD_IOAS_UNMAP, - IOMMUFD_CMD_OPTION, - IOMMUFD_CMD_VFIO_IOAS, - IOMMUFD_CMD_HWPT_ALLOC, - IOMMUFD_CMD_GET_HW_INFO, - IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, - IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, - IOMMUFD_CMD_HWPT_INVALIDATE, - IOMMUFD_CMD_FAULT_QUEUE_ALLOC, + IOMMUFD_CMD_IOAS_ALLOC = 0x81, + IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82, + IOMMUFD_CMD_IOAS_COPY = 0x83, + IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84, + IOMMUFD_CMD_IOAS_MAP = 0x85, + IOMMUFD_CMD_IOAS_UNMAP = 0x86, + IOMMUFD_CMD_OPTION = 0x87, + IOMMUFD_CMD_VFIO_IOAS = 0x88, + IOMMUFD_CMD_HWPT_ALLOC = 0x89, + IOMMUFD_CMD_GET_HW_INFO = 0x8a, + IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b, + IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c, + IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, + IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, }; /** @@ -400,8 +400,8 @@ struct iommu_hwpt_vtd_s1 { * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table */ enum iommu_hwpt_data_type { - IOMMU_HWPT_DATA_NONE, - IOMMU_HWPT_DATA_VTD_S1, + IOMMU_HWPT_DATA_NONE = 0, + IOMMU_HWPT_DATA_VTD_S1 = 1, }; /** @@ -491,8 +491,8 @@ struct iommu_hw_info_vtd { * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type */ enum iommu_hw_info_type { - IOMMU_HW_INFO_TYPE_NONE, - IOMMU_HW_INFO_TYPE_INTEL_VTD, + IOMMU_HW_INFO_TYPE_NONE = 0, + IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, }; /** @@ -629,7 +629,7 @@ struct iommu_hwpt_get_dirty_bitmap { * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 */ enum iommu_hwpt_invalidate_data_type { - IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, + IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0, }; /** @@ -768,7 +768,7 @@ struct iommu_hwpt_pgfault { */ enum iommufd_page_response_code { IOMMUFD_PAGE_RESP_SUCCESS = 0, - IOMMUFD_PAGE_RESP_INVALID, + IOMMUFD_PAGE_RESP_INVALID = 1, }; /** -- cgit v1.2.3 From 6e5c85c003e47fd49b72ca052b9181644e04a520 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Sat, 13 Jul 2024 02:18:58 +0000 Subject: net/sched: flower: refactor control flag definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Redefine the flower control flags as an enum, so they are included in BTF info. Make the kernel-side enum a more explicit superset of TCA_FLOWER_KEY_FLAGS_*, new flags still need to be added to both enums, but at least the bit position only has to be defined once. FLOW_DIS_ENCAPSULATION is never set for mask, so it can't be exposed to userspace in an unsupported flags mask error message, so it will be placed one bit position above the last uAPI flag. Suggested-by: Alexander Lobakin Suggested-by: Jakub Kicinski Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-2-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_cls.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index b6d38f5fd7c0..12db276f0c11 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -677,8 +677,11 @@ enum { enum { TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), + __TCA_FLOWER_KEY_FLAGS_MAX, }; +#define TCA_FLOWER_KEY_FLAGS_MAX (__TCA_FLOWER_KEY_FLAGS_MAX - 1) + enum { TCA_FLOWER_KEY_CFM_OPT_UNSPEC, TCA_FLOWER_KEY_CFM_MD_LEVEL, -- cgit v1.2.3 From bfda5a63137bc83c344c4d995f404c8e701ff0fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Sat, 13 Jul 2024 02:19:00 +0000 Subject: net/sched: flower: define new tunnel flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define new TCA_FLOWER_KEY_FLAGS_* flags for use in struct flow_dissector_key_control, covering the same flags as currently exposed through TCA_FLOWER_KEY_ENC_FLAGS. Put the new flags under FLOW_DIS_F_*. The idea is that we can later, move the existing flags under FLOW_DIS_F_* as well. The ynl flag names have been taken from the RFC iproute2 patch. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20240713021911.1631517-4-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_cls.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 12db276f0c11..3dc4388e944c 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -677,6 +677,10 @@ enum { enum { TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), + TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM = (1 << 2), + TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT = (1 << 3), + TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM = (1 << 4), + TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT = (1 << 5), __TCA_FLOWER_KEY_FLAGS_MAX, }; -- cgit v1.2.3 From 11036bd7a0b3b05c5e1f43d107ddb02abf83adb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Sat, 13 Jul 2024 02:19:06 +0000 Subject: net/sched: cls_flower: rework TCA_FLOWER_KEY_ENC_FLAGS usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch changes how TCA_FLOWER_KEY_ENC_FLAGS is used, so that it is used with TCA_FLOWER_KEY_FLAGS_* flags, in the same way as TCA_FLOWER_KEY_FLAGS is currently used. Where TCA_FLOWER_KEY_FLAGS uses {key,mask}->control.flags, then TCA_FLOWER_KEY_ENC_FLAGS now uses {key,mask}->enc_control.flags, therefore {key,mask}->enc_flags is now unused. As the generic fl_set_key_flags/fl_dump_key_flags() is used with encap set to true, then fl_{set,dump}_key_enc_flags() is removed. This breaks unreleased userspace API (net-next since 2024-06-04). Signed-off-by: Asbjørn Sloth Tønnesen Tested-by: Davide Caratti Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-10-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_cls.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 3dc4388e944c..d36d9cdf0c00 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -554,8 +554,8 @@ enum { TCA_FLOWER_KEY_SPI, /* be32 */ TCA_FLOWER_KEY_SPI_MASK, /* be32 */ - TCA_FLOWER_KEY_ENC_FLAGS, /* u32 */ - TCA_FLOWER_KEY_ENC_FLAGS_MASK, /* u32 */ + TCA_FLOWER_KEY_ENC_FLAGS, /* be32 */ + TCA_FLOWER_KEY_ENC_FLAGS_MASK, /* be32 */ __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From 33c97e7c0338d921c2ae0c2e54bf17121007fb94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Thu, 11 Jul 2024 16:54:57 +0000 Subject: landlock: Clarify documentation for struct landlock_ruleset_attr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The explanation for @handled_access_fs and @handled_access_net has significant overlap and is better explained together. * Explain the commonalities in structure-level documentation. * Clarify some wording and break up longer sentences. * Put emphasis on the word "handled" to make it clearer that "handled" is a term with special meaning in the context of Landlock. I'd like to transfer this wording into the man pages as well. Signed-off-by: Günther Noack Cc: Alejandro Colomar Cc: Konstantin Meskhidze Cc: linux-security-module@vger.kernel.org Link: https://lore.kernel.org/r/20240711165456.2148590-2-gnoack@google.com [mic: Format commit message] Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 68625e728f43..e76186da3260 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -12,29 +12,36 @@ #include /** - * struct landlock_ruleset_attr - Ruleset definition + * struct landlock_ruleset_attr - Ruleset definition. * - * Argument of sys_landlock_create_ruleset(). This structure can grow in - * future versions. + * Argument of sys_landlock_create_ruleset(). + * + * This structure defines a set of *handled access rights*, a set of actions on + * different object types, which should be denied by default when the ruleset is + * enacted. Vice versa, access rights that are not specifically listed here are + * not going to be denied by this ruleset when it is enacted. + * + * For historical reasons, the %LANDLOCK_ACCESS_FS_REFER right is always denied + * by default, even when its bit is not set in @handled_access_fs. In order to + * add new rules with this access right, the bit must still be set explicitly + * (cf. `Filesystem flags`_). + * + * The explicit listing of *handled access rights* is required for backwards + * compatibility reasons. In most use cases, processes that use Landlock will + * *handle* a wide range or all access rights that they know about at build time + * (and that they have tested with a kernel that supported them all). + * + * This structure can grow in future Landlock versions. */ struct landlock_ruleset_attr { /** - * @handled_access_fs: Bitmask of actions (cf. `Filesystem flags`_) - * that is handled by this ruleset and should then be forbidden if no - * rule explicitly allow them: it is a deny-by-default list that should - * contain as much Landlock access rights as possible. Indeed, all - * Landlock filesystem access rights that are not part of - * handled_access_fs are allowed. This is needed for backward - * compatibility reasons. One exception is the - * %LANDLOCK_ACCESS_FS_REFER access right, which is always implicitly - * handled, but must still be explicitly handled to add new rules with - * this access right. + * @handled_access_fs: Bitmask of handled filesystem actions + * (cf. `Filesystem flags`_). */ __u64 handled_access_fs; /** - * @handled_access_net: Bitmask of actions (cf. `Network flags`_) - * that is handled by this ruleset and should then be forbidden if no - * rule explicitly allow them. + * @handled_access_net: Bitmask of handled network actions (cf. `Network + * flags`_). */ __u64 handled_access_net; }; -- cgit v1.2.3 From 88caf544c9305313e1c48ac1a437faa5df8fff06 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 1 Jul 2024 17:31:46 -0500 Subject: KVM: SEV: Provide support for SNP_GUEST_REQUEST NAE event Version 2 of GHCB specification added support for the SNP Guest Request Message NAE event. The event allows for an SEV-SNP guest to make requests to the SEV-SNP firmware through the hypervisor using the SNP_GUEST_REQUEST API defined in the SEV-SNP firmware specification. This is used by guests primarily to request attestation reports from firmware. There are other request types are available as well, but the specifics of what guest requests are being made generally does not affect how they are handled by the hypervisor, which only serves as a proxy for the guest requests and firmware responses. Implement handling for these events. When an SNP Guest Request is issued, the guest will provide its own request/response pages, which could in theory be passed along directly to firmware. However, these pages would need special care: - Both pages are from shared guest memory, so they need to be protected from migration/etc. occurring while firmware reads/writes to them. At a minimum, this requires elevating the ref counts and potentially needing an explicit pinning of the memory. This places additional restrictions on what type of memory backends userspace can use for shared guest memory since there would be some reliance on using refcounted pages. - The response page needs to be switched to Firmware-owned state before the firmware can write to it, which can lead to potential host RMP #PFs if the guest is misbehaved and hands the host a guest page that KVM is writing to for other reasons (e.g. virtio buffers). Both of these issues can be avoided completely by using separately-allocated bounce pages for both the request/response pages and passing those to firmware instead. So that's the approach taken here. Signed-off-by: Brijesh Singh Co-developed-by: Alexey Kardashevskiy Signed-off-by: Alexey Kardashevskiy Co-developed-by: Ashish Kalra Signed-off-by: Ashish Kalra Reviewed-by: Tom Lendacky Reviewed-by: Liam Merwick [mdr: ensure FW command failures are indicated to guest, drop extended request handling to be re-written as separate patch, massage commit] Signed-off-by: Michael Roth Message-ID: <20240701223148.3798365-2-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/sev-guest.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h index 154a87a1eca9..fcdfea767fca 100644 --- a/include/uapi/linux/sev-guest.h +++ b/include/uapi/linux/sev-guest.h @@ -89,6 +89,9 @@ struct snp_ext_report_req { #define SNP_GUEST_FW_ERR_MASK GENMASK_ULL(31, 0) #define SNP_GUEST_VMM_ERR_SHIFT 32 #define SNP_GUEST_VMM_ERR(x) (((u64)x) << SNP_GUEST_VMM_ERR_SHIFT) +#define SNP_GUEST_FW_ERR(x) ((x) & SNP_GUEST_FW_ERR_MASK) +#define SNP_GUEST_ERR(vmm_err, fw_err) (SNP_GUEST_VMM_ERR(vmm_err) | \ + SNP_GUEST_FW_ERR(fw_err)) #define SNP_GUEST_VMM_ERR_INVALID_LEN 1 #define SNP_GUEST_VMM_ERR_BUSY 2 -- cgit v1.2.3 From 332d2c1d713e232e163386c35a3ba0c1b90df83f Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 1 May 2024 03:52:10 -0500 Subject: crypto: ccp: Add the SNP_VLEK_LOAD command When requesting an attestation report a guest is able to specify whether it wants SNP firmware to sign the report using either a Versioned Chip Endorsement Key (VCEK), which is derived from chip-unique secrets, or a Versioned Loaded Endorsement Key (VLEK) which is obtained from an AMD Key Derivation Service (KDS) and derived from seeds allocated to enrolled cloud service providers (CSPs). For VLEK keys, an SNP_VLEK_LOAD SNP firmware command is used to load them into the system after obtaining them from the KDS. Add a corresponding userspace interface so to allow the loading of VLEK keys into the system. See SEV-SNP Firmware ABI 1.54, SNP_VLEK_LOAD for more details. Reviewed-by: Tom Lendacky Signed-off-by: Michael Roth Message-ID: <20240501085210.2213060-21-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/psp-sev.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h index b7a2c2ee35b7..2289b7c76c59 100644 --- a/include/uapi/linux/psp-sev.h +++ b/include/uapi/linux/psp-sev.h @@ -31,6 +31,7 @@ enum { SNP_PLATFORM_STATUS, SNP_COMMIT, SNP_SET_CONFIG, + SNP_VLEK_LOAD, SEV_MAX, }; @@ -214,6 +215,32 @@ struct sev_user_data_snp_config { __u8 rsvd1[52]; } __packed; +/** + * struct sev_data_snp_vlek_load - SNP_VLEK_LOAD structure + * + * @len: length of the command buffer read by the PSP + * @vlek_wrapped_version: version of wrapped VLEK hashstick (Must be 0h) + * @rsvd: reserved + * @vlek_wrapped_address: address of a wrapped VLEK hashstick + * (struct sev_user_data_snp_wrapped_vlek_hashstick) + */ +struct sev_user_data_snp_vlek_load { + __u32 len; /* In */ + __u8 vlek_wrapped_version; /* In */ + __u8 rsvd[3]; /* In */ + __u64 vlek_wrapped_address; /* In */ +} __packed; + +/** + * struct sev_user_data_snp_vlek_wrapped_vlek_hashstick - Wrapped VLEK data + * + * @data: Opaque data provided by AMD KDS (as described in SEV-SNP Firmware ABI + * 1.54, SNP_VLEK_LOAD) + */ +struct sev_user_data_snp_wrapped_vlek_hashstick { + __u8 data[432]; /* In */ +} __packed; + /** * struct sev_issue_cmd - SEV ioctl parameters * -- cgit v1.2.3 From f4b89d8ce5a835afa51404977ee7e3889c2b9722 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Mon, 15 Jul 2024 16:03:29 +0000 Subject: landlock: Various documentation improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix some typos, incomplete or confusing phrases. * Split paragraphs where appropriate. * List the same error code multiple times, if it has multiple possible causes. * Bring wording closer to the man page wording, which has undergone more thorough review (esp. for LANDLOCK_ACCESS_FS_WRITE_FILE). * Small semantic clarifications * Call the ephemeral port range "ephemeral" * Clarify reasons for EFAULT in landlock_add_rule() * Clarify @rule_type doc for landlock_add_rule() This is a collection of small fixes which I collected when preparing the corresponding man pages [1]. Cc: Alejandro Colomar Cc: Konstantin Meskhidze Link: https://lore.kernel.org/r/20240715155554.2791018-1-gnoack@google.com [1] Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20240715160328.2792835-2-gnoack@google.com [mic: Add label to link, fix formatting spotted by make htmldocs, synchronize userspace-api documentation's date] Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index e76186da3260..2c8dbc74b955 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -104,20 +104,21 @@ struct landlock_path_beneath_attr { */ struct landlock_net_port_attr { /** - * @allowed_access: Bitmask of allowed access network for a port + * @allowed_access: Bitmask of allowed network actions for a port * (cf. `Network flags`_). */ __u64 allowed_access; /** * @port: Network port in host endianness. * - * It should be noted that port 0 passed to :manpage:`bind(2)` will - * bind to an available port from a specific port range. This can be - * configured thanks to the ``/proc/sys/net/ipv4/ip_local_port_range`` - * sysctl (also used for IPv6). A Landlock rule with port 0 and the - * ``LANDLOCK_ACCESS_NET_BIND_TCP`` right means that requesting to bind - * on port 0 is allowed and it will automatically translate to binding - * on the related port range. + * It should be noted that port 0 passed to :manpage:`bind(2)` will bind + * to an available port from the ephemeral port range. This can be + * configured with the ``/proc/sys/net/ipv4/ip_local_port_range`` sysctl + * (also used for IPv6). + * + * A Landlock rule with port 0 and the ``LANDLOCK_ACCESS_NET_BIND_TCP`` + * right means that requesting to bind on port 0 is allowed and it will + * automatically translate to binding on the related port range. */ __u64 port; }; @@ -138,10 +139,10 @@ struct landlock_net_port_attr { * The following access rights apply only to files: * * - %LANDLOCK_ACCESS_FS_EXECUTE: Execute a file. - * - %LANDLOCK_ACCESS_FS_WRITE_FILE: Open a file with write access. Note that - * you might additionally need the %LANDLOCK_ACCESS_FS_TRUNCATE right in order - * to overwrite files with :manpage:`open(2)` using ``O_TRUNC`` or - * :manpage:`creat(2)`. + * - %LANDLOCK_ACCESS_FS_WRITE_FILE: Open a file with write access. When + * opening files for writing, you will often additionally need the + * %LANDLOCK_ACCESS_FS_TRUNCATE right. In many cases, these system calls + * truncate existing files when overwriting them (e.g., :manpage:`creat(2)`). * - %LANDLOCK_ACCESS_FS_READ_FILE: Open a file with read access. * - %LANDLOCK_ACCESS_FS_TRUNCATE: Truncate a file with :manpage:`truncate(2)`, * :manpage:`ftruncate(2)`, :manpage:`creat(2)`, or :manpage:`open(2)` with @@ -263,7 +264,7 @@ struct landlock_net_port_attr { * These flags enable to restrict a sandboxed process to a set of network * actions. This is supported since the Landlock ABI version 4. * - * TCP sockets with allowed actions: + * The following access rights apply to TCP port numbers: * * - %LANDLOCK_ACCESS_NET_BIND_TCP: Bind a TCP socket to a local port. * - %LANDLOCK_ACCESS_NET_CONNECT_TCP: Connect an active TCP socket to -- cgit v1.2.3 From 9651fcedf7b92d3f7f1ab179e8ab55b85ee10fc1 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 8 Dec 2022 17:55:04 +0100 Subject: mm: add MAP_DROPPABLE for designating always lazily freeable mappings The vDSO getrandom() implementation works with a buffer allocated with a new system call that has certain requirements: - It shouldn't be written to core dumps. * Easy: VM_DONTDUMP. - It should be zeroed on fork. * Easy: VM_WIPEONFORK. - It shouldn't be written to swap. * Uh-oh: mlock is rlimited. * Uh-oh: mlock isn't inherited by forks. - It shouldn't reserve actual memory, but it also shouldn't crash when page faulting in memory if none is available * Uh-oh: VM_NORESERVE means segfaults. It turns out that the vDSO getrandom() function has three really nice characteristics that we can exploit to solve this problem: 1) Due to being wiped during fork(), the vDSO code is already robust to having the contents of the pages it reads zeroed out midway through the function's execution. 2) In the absolute worst case of whatever contingency we're coding for, we have the option to fallback to the getrandom() syscall, and everything is fine. 3) The buffers the function uses are only ever useful for a maximum of 60 seconds -- a sort of cache, rather than a long term allocation. These characteristics mean that we can introduce VM_DROPPABLE, which has the following semantics: a) It never is written out to swap. b) Under memory pressure, mm can just drop the pages (so that they're zero when read back again). c) It is inherited by fork. d) It doesn't count against the mlock budget, since nothing is locked. e) If there's not enough memory to service a page fault, it's not fatal, and no signal is sent. This way, allocations used by vDSO getrandom() can use: VM_DROPPABLE | VM_DONTDUMP | VM_WIPEONFORK | VM_NORESERVE And there will be no problem with OOMing, crashing on overcommitment, using memory when not in use, not wiping on fork(), coredumps, or writing out to swap. In order to let vDSO getrandom() use this, expose these via mmap(2) as MAP_DROPPABLE. Note that this involves removing the MADV_FREE special case from sort_folio(), which according to Yu Zhao is unnecessary and will simply result in an extra call to shrink_folio_list() in the worst case. The chunk removed reenables the swapbacked flag, which we don't want for VM_DROPPABLE, and we can't conditionalize it here because there isn't a vma reference available. Finally, the provided self test ensures that this is working as desired. Cc: linux-mm@kvack.org Acked-by: David Hildenbrand Signed-off-by: Jason A. Donenfeld --- include/uapi/linux/mman.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h index a246e11988d5..e89d00528f2f 100644 --- a/include/uapi/linux/mman.h +++ b/include/uapi/linux/mman.h @@ -17,6 +17,7 @@ #define MAP_SHARED 0x01 /* Share changes */ #define MAP_PRIVATE 0x02 /* Changes are private */ #define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ +#define MAP_DROPPABLE 0x08 /* Zero memory under memory pressure. */ /* * Huge page size encoding when MAP_HUGETLB is specified, and a huge page -- cgit v1.2.3 From 4ad10a5f5f78a5b3e525a63bd075a4eb1139dde1 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 18 Nov 2022 17:23:34 +0100 Subject: random: introduce generic vDSO getrandom() implementation Provide a generic C vDSO getrandom() implementation, which operates on an opaque state returned by vgetrandom_alloc() and produces random bytes the same way as getrandom(). This has the following API signature: ssize_t vgetrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); The return value and the first three arguments are the same as ordinary getrandom(), while the last two arguments are a pointer to the opaque allocated state and its size. Were all five arguments passed to the getrandom() syscall, nothing different would happen, and the functions would have the exact same behavior. The actual vDSO RNG algorithm implemented is the same one implemented by drivers/char/random.c, using the same fast-erasure techniques as that. Should the in-kernel implementation change, so too will the vDSO one. It requires an implementation of ChaCha20 that does not use any stack, in order to maintain forward secrecy if a multi-threaded program forks (though this does not account for a similar issue with SA_SIGINFO copying registers to the stack), so this is left as an architecture-specific fill-in. Stack-less ChaCha20 is an easy algorithm to implement on a variety of architectures, so this shouldn't be too onerous. Initially, the state is keyless, and so the first call makes a getrandom() syscall to generate that key, and then uses it for subsequent calls. By keeping track of a generation counter, it knows when its key is invalidated and it should fetch a new one using the syscall. Later, more than just a generation counter might be used. Since MADV_WIPEONFORK is set on the opaque state, the key and related state is wiped during a fork(), so secrets don't roll over into new processes, and the same state doesn't accidentally generate the same random stream. The generation counter, as well, is always >0, so that the 0 counter is a useful indication of a fork() or otherwise uninitialized state. If the kernel RNG is not yet initialized, then the vDSO always calls the syscall, because that behavior cannot be emulated in userspace, but fortunately that state is short lived and only during early boot. If it has been initialized, then there is no need to inspect the `flags` argument, because the behavior does not change post-initialization regardless of the `flags` value. Since the opaque state passed to it is mutated, vDSO getrandom() is not reentrant, when used with the same opaque state, which libc should be mindful of. The function works over an opaque per-thread state of a particular size, which must be marked VM_WIPEONFORK, VM_DONTDUMP, VM_NORESERVE, and VM_DROPPABLE for proper operation. Over time, the nuances of these allocations may change or grow or even differ based on architectural features. The opaque state passed to vDSO getrandom() must be allocated using the mmap_flags and mmap_prot parameters provided by the vgetrandom_opaque_params struct, which also contains the size of each state. That struct can be obtained with a call to vgetrandom(NULL, 0, 0, ¶ms, ~0UL). Then, libc can call mmap(2) and slice up the returned array into a state per each thread, while ensuring that no single state straddles a page boundary. Libc is expected to allocate a chunk of these on first use, and then dole them out to threads as they're created, allocating more when needed. vDSO getrandom() provides the ability for userspace to generate random bytes quickly and safely, and is intended to be integrated into libc's thread management. As an illustrative example, the introduced code in the vdso_test_getrandom self test later in this series might be used to do the same outside of libc. In a libc the various pthread-isms are expected to be elided into libc internals. Reviewed-by: Thomas Gleixner Signed-off-by: Jason A. Donenfeld --- include/uapi/linux/random.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h index e744c23582eb..2a3fe4c2cdc9 100644 --- a/include/uapi/linux/random.h +++ b/include/uapi/linux/random.h @@ -55,4 +55,19 @@ struct rand_pool_info { #define GRND_RANDOM 0x0002 #define GRND_INSECURE 0x0004 +/** + * struct vgetrandom_opaque_params - arguments for allocating memory for vgetrandom + * + * @size_per_opaque_state: Size of each state that is to be passed to vgetrandom(). + * @mmap_prot: Value of the prot argument in mmap(2). + * @mmap_flags: Value of the flags argument in mmap(2). + * @reserved: Reserved for future use. + */ +struct vgetrandom_opaque_params { + __u32 size_of_opaque_state; + __u32 mmap_prot; + __u32 mmap_flags; + __u32 reserved[13]; +}; + #endif /* _UAPI_LINUX_RANDOM_H */ -- cgit v1.2.3 From 13f75d9ecf3d8efcf597dfcd8f7be7460cdaa11a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 15 Jul 2024 04:50:07 +0200 Subject: random: note that RNDGETPOOL was removed in 2.6.9-rc2 RNDGETPOOL was thankfully removed twenty years ago, but it's stuck around in headers. Probably removing it from uapi headers isn't great in case there are some weird users out there, but we should at least mark this as having been removed, to save future readers the same goose chase I just went on. Link: https://lore.kernel.org/all/E1By1St-0001TS-Qj@thunk.org/ Link: https://lore.kernel.org/all/Pine.LNX.4.58.0409130937050.4094@ppc970.osdl.org/ Signed-off-by: Jason A. Donenfeld --- include/uapi/linux/random.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h index 2a3fe4c2cdc9..1dd047ec98a1 100644 --- a/include/uapi/linux/random.h +++ b/include/uapi/linux/random.h @@ -20,7 +20,7 @@ /* Add to (or subtract from) the entropy count. (Superuser only.) */ #define RNDADDTOENTCNT _IOW( 'R', 0x01, int ) -/* Get the contents of the entropy pool. (Superuser only.) */ +/* Get the contents of the entropy pool. (Superuser only.) (Removed in 2.6.9-rc2.) */ #define RNDGETPOOL _IOR( 'R', 0x02, int [2] ) /* -- cgit v1.2.3 From d5e726d9143c5624135f5dc9e4069799adeef734 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 12 Jul 2024 18:52:51 -0700 Subject: xsk: Require XDP_UMEM_TX_METADATA_LEN to actuate tx_metadata_len Julian reports that commit 341ac980eab9 ("xsk: Support tx_metadata_len") can break existing use cases which don't zero-initialize xdp_umem_reg padding. Introduce new XDP_UMEM_TX_METADATA_LEN to make sure we interpret the padding as tx_metadata_len only when being explicitly asked. Fixes: 341ac980eab9 ("xsk: Support tx_metadata_len") Reported-by: Julian Schindel Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Reviewed-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20240713015253.121248-2-sdf@fomichev.me --- include/uapi/linux/if_xdp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index d31698410410..42ec5ddaab8d 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -41,6 +41,10 @@ */ #define XDP_UMEM_TX_SW_CSUM (1 << 1) +/* Request to reserve tx_metadata_len bytes of per-chunk metadata. + */ +#define XDP_UMEM_TX_METADATA_LEN (1 << 2) + struct sockaddr_xdp { __u16 sxdp_family; __u16 sxdp_flags; -- cgit v1.2.3