From 38163af068810b388f6723a681dfd8c7b3680d38 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 14 Oct 2025 23:54:58 +0000 Subject: bpf: Introduce SK_BPF_BYPASS_PROT_MEM. If a socket has sk->sk_bypass_prot_mem flagged, the socket opts out of the global protocol memory accounting. This is easily controlled by net.core.bypass_prot_mem sysctl, but it lacks flexibility. Let's support flagging (and clearing) sk->sk_bypass_prot_mem via bpf_setsockopt() at the BPF_CGROUP_INET_SOCK_CREATE hook. int val = 1; bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM, &val, sizeof(val)); As with net.core.bypass_prot_mem, this is inherited to child sockets, and BPF always takes precedence over sysctl at socket(2) and accept(2). SK_BPF_BYPASS_PROT_MEM is only supported at BPF_CGROUP_INET_SOCK_CREATE and not supported on other hooks for some reasons: 1. UDP charges memory under sk->sk_receive_queue.lock instead of lock_sock() 2. Modifying the flag after skb is charged to sk requires such adjustment during bpf_setsockopt() and complicates the logic unnecessarily We can support other hooks later if a real use case justifies that. Most changes are inline and hard to trace, but a microbenchmark on __sk_mem_raise_allocated() during neper/tcp_stream showed that more samples completed faster with sk->sk_bypass_prot_mem == 1. This will be more visible under tcp_mem pressure (but it's not a fair comparison). # bpftrace -e 'kprobe:__sk_mem_raise_allocated { @start[tid] = nsecs; } kretprobe:__sk_mem_raise_allocated /@start[tid]/ { @end[tid] = nsecs - @start[tid]; @times = hist(@end[tid]); delete(@start[tid]); }' # tcp_stream -6 -F 1000 -N -T 256 Without bpf prog: [128, 256) 3846 | | [256, 512) 1505326 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [512, 1K) 1371006 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [1K, 2K) 198207 |@@@@@@ | [2K, 4K) 31199 |@ | With bpf prog in the next patch: (must be attached before tcp_stream) # bpftool prog load sk_bypass_prot_mem.bpf.o /sys/fs/bpf/test type cgroup/sock_create # bpftool cgroup attach /sys/fs/cgroup/test cgroup_inet_sock_create pinned /sys/fs/bpf/test [128, 256) 6413 | | [256, 512) 1868425 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [512, 1K) 1101697 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [1K, 2K) 117031 |@@@@ | [2K, 4K) 11773 | | Signed-off-by: Kuniyuki Iwashima Signed-off-by: Martin KaFai Lau Acked-by: Roman Gushchin Link: https://patch.msgid.link/20251014235604.3057003-6-kuniyu@google.com --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6829936d33f5..6eb75ad900b1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7200,6 +7200,8 @@ enum { TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */ SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */ + SK_BPF_BYPASS_PROT_MEM = 1010, /* Get or Set sk->sk_bypass_prot_mem */ + }; enum { -- cgit v1.2.3 From a63ca4236e6799cf4343f9aec9d92afdfa582446 Mon Sep 17 00:00:00 2001 From: Ackerley Tng Date: Thu, 16 Oct 2025 10:28:44 -0700 Subject: KVM: guest_memfd: Use guest mem inodes instead of anonymous inodes guest_memfd's inode represents memory the guest_memfd is providing. guest_memfd's file represents a struct kvm's view of that memory. Using a custom inode allows customization of the inode teardown process via callbacks. For example, ->evict_inode() allows customization of the truncation process on file close, and ->destroy_inode() and ->free_inode() allow customization of the inode freeing process. Customizing the truncation process allows flexibility in management of guest_memfd memory and customization of the inode freeing process allows proper cleanup of memory metadata stored on the inode. Memory metadata is more appropriately stored on the inode (as opposed to the file), since the metadata is for the memory and is not unique to a specific binding and struct kvm. Acked-by: David Hildenbrand Co-developed-by: Fuad Tabba Signed-off-by: Fuad Tabba Signed-off-by: Ackerley Tng Signed-off-by: Shivank Garg Tested-by: Ashish Kalra [sean: drop helpers, open code logic in __kvm_gmem_create()] Link: https://lore.kernel.org/r/20251016172853.52451-4-seanjc@google.com Signed-off-by: Sean Christopherson --- include/uapi/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index bb575f3ab45e..638ca21b7a90 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -103,5 +103,6 @@ #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ #define PID_FS_MAGIC 0x50494446 /* "PIDF" */ +#define GUEST_MEMFD_MAGIC 0x474d454d /* "GMEM" */ #endif /* __LINUX_MAGIC_H__ */ -- cgit v1.2.3 From ca4709843b7e72f96976cd6b35bca148a4071673 Mon Sep 17 00:00:00 2001 From: David Yang Date: Fri, 17 Oct 2025 14:08:54 +0800 Subject: net: dsa: tag_yt921x: add support for Motorcomm YT921x tags Add support for Motorcomm YT921x tags, which includes a proper configurable ethertype field (default to 0x9988). Signed-off-by: David Yang Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251017060859.326450-3-mmyangfl@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 69e0457eb200..cfd200c87e5e 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -114,6 +114,7 @@ #define ETH_P_QINQ1 0x9100 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_QINQ2 0x9200 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_QINQ3 0x9300 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_YT921X 0x9988 /* Motorcomm YT921x DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_DSA_8021Q 0xDADB /* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_DSA_A5PSW 0xE001 /* A5PSW Tag Value [ NOT AN OFFICIALLY REGISTERED ID ] */ -- cgit v1.2.3 From 1cba30bf9fdd6c982708f3587f609a30c370d889 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 16 Oct 2025 11:09:38 -0700 Subject: io_uring: add support for IORING_SETUP_SQE_MIXED Normal rings support 64b SQEs for posting submissions, while certain features require the ring to be configured with IORING_SETUP_SQE128, as they need to convey more information per submission. This, in turn, makes ALL the SQEs be 128b in size. This is somewhat wasteful and inefficient, particularly when only certain SQEs need to be of the bigger variant. This adds support for setting up a ring with mixed SQE sizes, using IORING_SETUP_SQE_MIXED. When setup in this mode, SQEs posted to the ring may be either 64b or 128b in size. If a SQE is 128b in size, then opcode will be set to a variante to indicate that this is the case. Any other non-128b opcode will assume the SQ's default size. SQEs on these types of mixed rings may also utilize NOP with skip success set. This can happen if the ring is one (small) SQE entry away from wrapping, and an attempt is made to get a 128b SQE. As SQEs must be contiguous in the SQ ring, a 128b SQE cannot wrap the ring. For this case, a single NOP SQE should be inserted with the SKIP_SUCCESS flag set. The kernel will process this as a normal NOP and without posting a CQE. Signed-off-by: Keith Busch [axboe: {} style fix and assign sqe before opcode read] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 263bed13473e..04797a9b76bc 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -231,6 +231,12 @@ enum io_uring_sqe_flags_bit { */ #define IORING_SETUP_CQE_MIXED (1U << 18) +/* + * Allow both 64b and 128b SQEs. If a 128b SQE is posted, it will have + * a 128b opcode. + */ +#define IORING_SETUP_SQE_MIXED (1U << 19) + enum io_uring_op { IORING_OP_NOP, IORING_OP_READV, @@ -295,6 +301,8 @@ enum io_uring_op { IORING_OP_READV_FIXED, IORING_OP_WRITEV_FIXED, IORING_OP_PIPE, + IORING_OP_NOP128, + IORING_OP_URING_CMD128, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 0d8627cc936de8ea04f3cc1e6921c63fb72cc199 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 22 Oct 2025 13:41:06 +0200 Subject: blktrace: add definitions for blk_user_trace_setup2 Add definitions for a version 2 of the blk_user_trace_setup ioctl. This new ioctl will enable a different struct layout of the binary data passed to user-space when using a new version of the blktrace utility requesting the new struct layout. Reviewed-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/uapi/linux/blktrace_api.h | 16 ++++++++++++++++ include/uapi/linux/fs.h | 1 + 2 files changed, 17 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index 1bfb635e309b..a6958708d477 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -129,6 +129,7 @@ enum { }; #define BLKTRACE_BDEV_SIZE 32 +#define BLKTRACE_BDEV_SIZE2 64 /* * User setup structure passed with BLKTRACESETUP @@ -143,4 +144,19 @@ struct blk_user_trace_setup { __u32 pid; }; +/* + * User setup structure passed with BLKTRACESETUP2 + */ +struct blk_user_trace_setup2 { + char name[BLKTRACE_BDEV_SIZE2]; /* output */ + __u64 act_mask; /* input */ + __u32 buf_size; /* input */ + __u32 buf_nr; /* input */ + __u64 start_lba; + __u64 end_lba; + __u32 pid; + __u32 flags; /* currently unused */ + __u64 reserved[11]; +}; + #endif /* _UAPIBLKTRACE_H */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index beb4c2d1e41c..957ce3343a4f 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -300,6 +300,7 @@ struct file_attr { #define BLKGETDISKSEQ _IOR(0x12,128,__u64) /* 130-136 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */ /* 137-141 are used by blk-crypto ioctls (uapi/linux/blk-crypto.h) */ +#define BLKTRACESETUP2 _IOWR(0x12, 142, struct blk_user_trace_setup2) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ -- cgit v1.2.3 From c44347d606260f36a81f6d8415a5af33cb3015fa Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 22 Oct 2025 13:41:08 +0200 Subject: blktrace: add definitions for struct blk_io_trace2 Add definitions for the extended version of the blktrace protocol using a wider action type to be able to record new actions in the kernel. Reviewed-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Signed-off-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/uapi/linux/blktrace_api.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index a6958708d477..9f9834d76e00 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -94,6 +94,7 @@ enum blktrace_notify { #define BLK_IO_TRACE_MAGIC 0x65617400 #define BLK_IO_TRACE_VERSION 0x07 +#define BLK_IO_TRACE2_VERSION 0x08 /* * The trace itself @@ -113,6 +114,21 @@ struct blk_io_trace { /* cgroup id will be stored here if exists */ }; +struct blk_io_trace2 { + __u32 magic; /* MAGIC << 8 | BLK_IO_TRACE2_VERSION */ + __u32 sequence; /* event number */ + __u64 time; /* in nanoseconds */ + __u64 sector; /* disk offset */ + __u32 bytes; /* transfer length */ + __u32 pid; /* who did it */ + __u64 action; /* what happened */ + __u32 device; /* device number */ + __u32 cpu; /* on what cpu did it happen */ + __u16 error; /* completion error */ + __u16 pdu_len; /* length of data after this trace */ + __u8 pad[12]; + /* cgroup id will be stored here if it exists */ +}; /* * The remap event */ -- cgit v1.2.3 From f9ee38bbf70fb20584625849a253c8652176fa66 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 22 Oct 2025 13:41:12 +0200 Subject: blktrace: add block trace commands for zone operations Add block trace commands for zone operations. These commands can only be handled with version 2 of the blktrace protocol. For version 1, warn if a command that does not fit into the 16 bits reserved for the command in this version is passed in. Reviewed-by: Martin K. Petersen Signed-off-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Signed-off-by: Jens Axboe --- include/uapi/linux/blktrace_api.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index 9f9834d76e00..190a3c5ab0a0 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -26,11 +26,20 @@ enum blktrace_cat { BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */ BLK_TC_FUA = 1 << 15, /* fua requests */ - BLK_TC_END = 1 << 15, /* we've run out of bits! */ + BLK_TC_END_V1 = 1 << 15, /* we've run out of bits! */ + + BLK_TC_ZONE_APPEND = 1ull << 16, /* zone append */ + BLK_TC_ZONE_RESET = 1ull << 17, /* zone reset */ + BLK_TC_ZONE_RESET_ALL = 1ull << 18, /* zone reset all */ + BLK_TC_ZONE_FINISH = 1ull << 19, /* zone finish */ + BLK_TC_ZONE_OPEN = 1ull << 20, /* zone open */ + BLK_TC_ZONE_CLOSE = 1ull << 21, /* zone close */ + + BLK_TC_END_V2 = 1ull << 21, }; #define BLK_TC_SHIFT (16) -#define BLK_TC_ACT(act) ((act) << BLK_TC_SHIFT) +#define BLK_TC_ACT(act) ((u64)(act) << BLK_TC_SHIFT) /* * Basic trace actions -- cgit v1.2.3 From 1c164fcc1b08e75f1cad1532718f09cddc0ddebe Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 22 Oct 2025 13:41:13 +0200 Subject: blktrace: expose ZONE APPEND completions to blktrace Expose ZONE APPEND completions as a block trace completion action to blktrace. As tracing of zoned block commands needs the upper 32bit of the widened 64bit action, only add traces to blktrace if user-space has requested version 2 of the blktrace protocol. Reviewed-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/uapi/linux/blktrace_api.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index 190a3c5ab0a0..289872e51fc5 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -97,6 +97,9 @@ enum blktrace_notify { #define BLK_TA_ABORT (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE)) #define BLK_TA_DRV_DATA (__BLK_TA_DRV_DATA | BLK_TC_ACT(BLK_TC_DRV_DATA)) +#define BLK_TA_ZONE_APPEND (__BLK_TA_COMPLETE |\ + BLK_TC_ACT(BLK_TC_ZONE_APPEND)) + #define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_TN_MESSAGE (__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY)) -- cgit v1.2.3 From 3f6722816a73e2017599d965683dbe71833afd7a Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 22 Oct 2025 13:41:14 +0200 Subject: blktrace: trace zone write plugging operations Trace zone write plugging operations on block devices. As tracing of zoned block commands needs the upper 32bit of the widened 64bit action, only add traces to blktrace if user-space has requested version 2 of the blktrace protocol. Reviewed-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/uapi/linux/blktrace_api.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index 289872e51fc5..30f3d2589365 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -62,6 +62,8 @@ enum blktrace_act { __BLK_TA_REMAP, /* bio was remapped */ __BLK_TA_ABORT, /* request aborted */ __BLK_TA_DRV_DATA, /* driver-specific binary data */ + __BLK_TA_ZONE_PLUG, /* zone write plug was plugged */ + __BLK_TA_ZONE_UNPLUG, /* zone write plug was unplugged */ __BLK_TA_CGROUP = 1 << 8, /* from a cgroup*/ }; @@ -99,6 +101,9 @@ enum blktrace_notify { #define BLK_TA_ZONE_APPEND (__BLK_TA_COMPLETE |\ BLK_TC_ACT(BLK_TC_ZONE_APPEND)) +#define BLK_TA_ZONE_PLUG (__BLK_TA_ZONE_PLUG | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_ZONE_UNPLUG (__BLK_TA_ZONE_UNPLUG |\ + BLK_TC_ACT(BLK_TC_QUEUE)) #define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY)) -- cgit v1.2.3 From bd26631ccdfd11701fa29e665a7f041875ba9423 Mon Sep 17 00:00:00 2001 From: Changwoo Min Date: Tue, 21 Oct 2025 07:09:07 +0900 Subject: PM: EM: Add em.yaml and autogen files Add a generic netlink spec in YAML format and autogenerate boilerplate code using ynl-regen.sh to introduce a generic netlink for the energy model. It allows a userspace program to read the performance domain and its energy model. It notifies the userspace program when a performance domain is created or deleted or its energy model is updated through a multicast interface. Specifically, it supports two commands: - EM_CMD_GET_PDS: Get the list of information for all performance domains. - EM_CMD_GET_PD_TABLE: Get the energy model table of a performance domain. Also, it supports three notification events: - EM_CMD_PD_CREATED: When a performance domain is created. - EM_CMD_PD_DELETED: When a performance domain is deleted. - EM_CMD_PD_UPDATED: When the energy model table of a performance domain is updated. Finally, update MAINTAINERS to include new files. Signed-off-by: Changwoo Min Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/20251020220914.320832-4-changwoo@igalia.com Signed-off-by: Rafael J. Wysocki --- include/uapi/linux/energy_model.h | 62 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 include/uapi/linux/energy_model.h (limited to 'include/uapi') diff --git a/include/uapi/linux/energy_model.h b/include/uapi/linux/energy_model.h new file mode 100644 index 000000000000..4ec4c0eabbbb --- /dev/null +++ b/include/uapi/linux/energy_model.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/em.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_ENERGY_MODEL_H +#define _UAPI_LINUX_ENERGY_MODEL_H + +#define EM_FAMILY_NAME "em" +#define EM_FAMILY_VERSION 1 + +enum { + EM_A_PDS_PD = 1, + + __EM_A_PDS_MAX, + EM_A_PDS_MAX = (__EM_A_PDS_MAX - 1) +}; + +enum { + EM_A_PD_PAD = 1, + EM_A_PD_PD_ID, + EM_A_PD_FLAGS, + EM_A_PD_CPUS, + + __EM_A_PD_MAX, + EM_A_PD_MAX = (__EM_A_PD_MAX - 1) +}; + +enum { + EM_A_PD_TABLE_PD_ID = 1, + EM_A_PD_TABLE_PS, + + __EM_A_PD_TABLE_MAX, + EM_A_PD_TABLE_MAX = (__EM_A_PD_TABLE_MAX - 1) +}; + +enum { + EM_A_PS_PAD = 1, + EM_A_PS_PERFORMANCE, + EM_A_PS_FREQUENCY, + EM_A_PS_POWER, + EM_A_PS_COST, + EM_A_PS_FLAGS, + + __EM_A_PS_MAX, + EM_A_PS_MAX = (__EM_A_PS_MAX - 1) +}; + +enum { + EM_CMD_GET_PDS = 1, + EM_CMD_GET_PD_TABLE, + EM_CMD_PD_CREATED, + EM_CMD_PD_UPDATED, + EM_CMD_PD_DELETED, + + __EM_CMD_MAX, + EM_CMD_MAX = (__EM_CMD_MAX - 1) +}; + +#define EM_MCGRP_EVENT "event" + +#endif /* _UAPI_LINUX_ENERGY_MODEL_H */ -- cgit v1.2.3 From 531b87d865eb9e625c2e46ec8f06a65a6157ee45 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Sun, 26 Oct 2025 20:38:45 +0000 Subject: bpf: widen dynptr size/offset to 64 bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dynptr currently caps size and offset at 24 bits, which isn’t sufficient for file-backed use cases; even 32 bits can be limiting. Refactor dynptr helpers/kfuncs to use 64-bit size and offset, ensuring consistency across the APIs. This change does not affect internals of xdp, skb or other dynptrs, which continue to behave as before. Also it does not break binary compatibility. The widening enables large-file access support via dynptr, implemented in the next patches. Signed-off-by: Mykyta Yatsenko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251026203853.135105-3-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6829936d33f5..77edd0253989 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5618,7 +5618,7 @@ union bpf_attr { * Return * *sk* if casting is valid, or **NULL** otherwise. * - * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr) * Description * Get a dynptr to local memory *data*. * @@ -5661,7 +5661,7 @@ union bpf_attr { * Return * Nothing. Always succeeds. * - * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags) + * long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags) * Description * Read *len* bytes from *src* into *dst*, starting from *offset* * into *src*. @@ -5671,7 +5671,7 @@ union bpf_attr { * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if * *flags* is not 0. * - * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) + * long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags) * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. @@ -5692,7 +5692,7 @@ union bpf_attr { * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs, * other errors correspond to errors returned by **bpf_skb_store_bytes**\ (). * - * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len) + * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len) * Description * Get a pointer to the underlying dynptr data. * -- cgit v1.2.3 From 82cb5be6ad64198a3a028aeb49dcc7f6224d558a Mon Sep 17 00:00:00 2001 From: Wilfred Mallawa Date: Wed, 22 Oct 2025 10:19:36 +1000 Subject: net/tls: support setting the maximum payload size During a handshake, an endpoint may specify a maximum record size limit. Currently, the kernel defaults to TLS_MAX_PAYLOAD_SIZE (16KB) for the maximum record size. Meaning that, the outgoing records from the kernel can exceed a lower size negotiated during the handshake. In such a case, the TLS endpoint must send a fatal "record_overflow" alert [1], and thus the record is discarded. Upcoming Western Digital NVMe-TCP hardware controllers implement TLS support. For these devices, supporting TLS record size negotiation is necessary because the maximum TLS record size supported by the controller is less than the default 16KB currently used by the kernel. Currently, there is no way to inform the kernel of such a limit. This patch adds support to a new setsockopt() option `TLS_TX_MAX_PAYLOAD_LEN` that allows for setting the maximum plaintext fragment size. Once set, outgoing records are no larger than the size specified. This option can be used to specify the record size limit. [1] https://www.rfc-editor.org/rfc/rfc8449 Signed-off-by: Wilfred Mallawa Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20251022001937.20155-1-wilfred.opensource@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/tls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index b66a800389cc..b8b9c42f848c 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -41,6 +41,7 @@ #define TLS_RX 2 /* Set receive parameters */ #define TLS_TX_ZEROCOPY_RO 3 /* TX zerocopy (only sendfile now) */ #define TLS_RX_EXPECT_NO_PAD 4 /* Attempt opportunistic zero-copy */ +#define TLS_TX_MAX_PAYLOAD_LEN 5 /* Maximum plaintext size */ /* Supported versions */ #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) @@ -194,6 +195,7 @@ enum { TLS_INFO_RXCONF, TLS_INFO_ZC_RO_TX, TLS_INFO_RX_NO_PAD, + TLS_INFO_TX_MAX_PAYLOAD_LEN, __TLS_INFO_MAX, }; #define TLS_INFO_MAX (__TLS_INFO_MAX - 1) -- cgit v1.2.3 From feeaf1346f80ffb181b6f9b739628103aa73b067 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Sat, 18 Oct 2025 11:57:36 +0800 Subject: bpf: Add overwrite mode for BPF ring buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the BPF ring buffer is full, a new event cannot be recorded until one or more old events are consumed to make enough space for it. In cases such as fault diagnostics, where recent events are more useful than older ones, this mechanism may lead to critical events being lost. So add overwrite mode for BPF ring buffer to address it. In this mode, the new event overwrites the oldest event when the buffer is full. The basic idea is as follows: 1. producer_pos tracks the next position to record new event. When there is enough free space, producer_pos is simply advanced by producer to make space for the new event. 2. To avoid waiting for consumer when the buffer is full, a new variable, overwrite_pos, is introduced for producer. It points to the oldest event committed in the buffer. It is advanced by producer to discard one or more oldest events to make space for the new event when the buffer is full. 3. pending_pos tracks the oldest event to be committed. pending_pos is never passed by producer_pos, so multiple producers never write to the same position at the same time. The following example diagrams show how it works in a 4096-byte ring buffer. 1. At first, {producer,overwrite,pending,consumer}_pos are all set to 0. 0 512 1024 1536 2048 2560 3072 3584 4096 +-----------------------------------------------------------------------+ | | | | | | +-----------------------------------------------------------------------+ ^ | | producer_pos = 0 overwrite_pos = 0 pending_pos = 0 consumer_pos = 0 2. Now reserve a 512-byte event A. There is enough free space, so A is allocated at offset 0. And producer_pos is advanced to 512, the end of A. Since A is not submitted, the BUSY bit is set. 0 512 1024 1536 2048 2560 3072 3584 4096 +-----------------------------------------------------------------------+ | | | | A | | | [BUSY] | | +-----------------------------------------------------------------------+ ^ ^ | | | | | producer_pos = 512 | overwrite_pos = 0 pending_pos = 0 consumer_pos = 0 3. Reserve event B, size 1024. B is allocated at offset 512 with BUSY bit set, and producer_pos is advanced to the end of B. 0 512 1024 1536 2048 2560 3072 3584 4096 +-----------------------------------------------------------------------+ | | | | | A | B | | | [BUSY] | [BUSY] | | +-----------------------------------------------------------------------+ ^ ^ | | | | | producer_pos = 1536 | overwrite_pos = 0 pending_pos = 0 consumer_pos = 0 4. Reserve event C, size 2048. C is allocated at offset 1536, and producer_pos is advanced to 3584. 0 512 1024 1536 2048 2560 3072 3584 4096 +-----------------------------------------------------------------------+ | | | | | | A | B | C | | | [BUSY] | [BUSY] | [BUSY] | | +-----------------------------------------------------------------------+ ^ ^ | | | | | producer_pos = 3584 | overwrite_pos = 0 pending_pos = 0 consumer_pos = 0 5. Submit event A. The BUSY bit of A is cleared. B becomes the oldest event to be committed, so pending_pos is advanced to 512, the start of B. 0 512 1024 1536 2048 2560 3072 3584 4096 +-----------------------------------------------------------------------+ | | | | | | A | B | C | | | | [BUSY] | [BUSY] | | +-----------------------------------------------------------------------+ ^ ^ ^ | | | | | | | pending_pos = 512 producer_pos = 3584 | overwrite_pos = 0 consumer_pos = 0 6. Submit event B. The BUSY bit of B is cleared, and pending_pos is advanced to the start of C, which is now the oldest event to be committed. 0 512 1024 1536 2048 2560 3072 3584 4096 +-----------------------------------------------------------------------+ | | | | | | A | B | C | | | | | [BUSY] | | +-----------------------------------------------------------------------+ ^ ^ ^ | | | | | | | pending_pos = 1536 producer_pos = 3584 | overwrite_pos = 0 consumer_pos = 0 7. Reserve event D, size 1536 (3 * 512). There are 2048 bytes not being written between producer_pos (currently 3584) and pending_pos, so D is allocated at offset 3584, and producer_pos is advanced by 1536 (from 3584 to 5120). Since event D will overwrite all bytes of event A and the first 512 bytes of event B, overwrite_pos is advanced to the start of event C, the oldest event that is not overwritten. 0 512 1024 1536 2048 2560 3072 3584 4096 +-----------------------------------------------------------------------+ | | | | | | D End | | C | D Begin| | [BUSY] | | [BUSY] | [BUSY] | +-----------------------------------------------------------------------+ ^ ^ ^ | | | | | pending_pos = 1536 | | overwrite_pos = 1536 | | | producer_pos=5120 | consumer_pos = 0 8. Reserve event E, size 1024. Although there are 512 bytes not being written between producer_pos and pending_pos, E cannot be reserved, as it would overwrite the first 512 bytes of event C, which is still being written. 9. Submit event C and D. pending_pos is advanced to the end of D. 0 512 1024 1536 2048 2560 3072 3584 4096 +-----------------------------------------------------------------------+ | | | | | | D End | | C | D Begin| | | | | | +-----------------------------------------------------------------------+ ^ ^ ^ | | | | | overwrite_pos = 1536 | | | producer_pos=5120 | pending_pos=5120 | consumer_pos = 0 The performance data for overwrite mode will be provided in a follow-up patch that adds overwrite-mode benchmarks. A sample of performance data for non-overwrite mode, collected on an x86_64 CPU and an arm64 CPU, before and after this patch, is shown below. As we can see, no obvious performance regression occurs. - x86_64 (AMD EPYC 9654) Before: Ringbuf, multi-producer contention ================================== rb-libbpf nr_prod 1 11.623 ± 0.027M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 2 15.812 ± 0.014M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 3 7.871 ± 0.003M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 4 6.703 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 8 2.896 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 12 2.054 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 16 1.864 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 20 1.580 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 24 1.484 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 28 1.369 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 32 1.316 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 36 1.272 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 40 1.239 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 44 1.226 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 48 1.213 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 52 1.193 ± 0.001M/s (drops 0.000 ± 0.000M/s) After: Ringbuf, multi-producer contention ================================== rb-libbpf nr_prod 1 11.845 ± 0.036M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 2 15.889 ± 0.006M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 3 8.155 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 4 6.708 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 8 2.918 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 12 2.065 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 16 1.870 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 20 1.582 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 24 1.482 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 28 1.372 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 32 1.323 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 36 1.264 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 40 1.236 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 44 1.209 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 48 1.189 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 52 1.165 ± 0.002M/s (drops 0.000 ± 0.000M/s) - arm64 (HiSilicon Kunpeng 920) Before: Ringbuf, multi-producer contention ================================== rb-libbpf nr_prod 1 11.310 ± 0.623M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 2 9.947 ± 0.004M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 3 6.634 ± 0.011M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 4 4.502 ± 0.003M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 8 3.888 ± 0.003M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 12 3.372 ± 0.005M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 16 3.189 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 20 2.998 ± 0.006M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 24 3.086 ± 0.018M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 28 2.845 ± 0.004M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 32 2.815 ± 0.008M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 36 2.771 ± 0.009M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 40 2.814 ± 0.011M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 44 2.752 ± 0.006M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 48 2.695 ± 0.006M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 52 2.710 ± 0.006M/s (drops 0.000 ± 0.000M/s) After: Ringbuf, multi-producer contention ================================== rb-libbpf nr_prod 1 11.283 ± 0.550M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 2 9.993 ± 0.003M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 3 6.898 ± 0.006M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 4 5.257 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 8 3.830 ± 0.005M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 12 3.528 ± 0.013M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 16 3.265 ± 0.018M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 20 2.990 ± 0.007M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 24 2.929 ± 0.014M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 28 2.898 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 32 2.818 ± 0.006M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 36 2.789 ± 0.012M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 40 2.770 ± 0.006M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 44 2.651 ± 0.007M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 48 2.669 ± 0.005M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 52 2.695 ± 0.009M/s (drops 0.000 ± 0.000M/s) Signed-off-by: Xu Kuohai Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20251018035738.4039621-2-xukuohai@huaweicloud.com --- include/uapi/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 77edd0253989..1d73f165394d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1430,6 +1430,9 @@ enum { /* Do not translate kernel bpf_arena pointers to user pointers */ BPF_F_NO_USER_CONV = (1U << 18), + +/* Enable BPF ringbuf overwrite mode */ + BPF_F_RB_OVERWRITE = (1U << 19), }; /* Flags for BPF_PROG_QUERY. */ @@ -6231,6 +6234,7 @@ enum { BPF_RB_RING_SIZE = 1, BPF_RB_CONS_POS = 2, BPF_RB_PROD_POS = 3, + BPF_RB_OVERWRITE_POS = 4, }; /* BPF ring buffer constants */ -- cgit v1.2.3 From f74ee32963f1b74865fe679e2475450434fea51c Mon Sep 17 00:00:00 2001 From: Qinxin Xia Date: Tue, 28 Oct 2025 20:09:00 +0800 Subject: tools/dma: move dma_map_benchmark from selftests to tools/dma dma_map_benchmark is a standalone developer tool rather than an automated selftest. It has no pass/fail criteria, expects manual invocation, and is built as a normal userspace binary. Move it to tools/dma/ and add a minimal Makefile. Suggested-by: Marek Szyprowski Suggested-by: Barry Song Signed-off-by: Qinxin Xia Acked-by: Barry Song Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20251028120900.2265511-3-xiaqinxin@huawei.com --- include/uapi/linux/map_benchmark.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 include/uapi/linux/map_benchmark.h (limited to 'include/uapi') diff --git a/include/uapi/linux/map_benchmark.h b/include/uapi/linux/map_benchmark.h new file mode 100644 index 000000000000..c2d91088a40d --- /dev/null +++ b/include/uapi/linux/map_benchmark.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (C) 2022-2025 HiSilicon Limited. + */ + +#ifndef _UAPI_DMA_BENCHMARK_H +#define _UAPI_DMA_BENCHMARK_H + +#include + +#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) +#define DMA_MAP_MAX_THREADS 1024 +#define DMA_MAP_MAX_SECONDS 300 +#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC) + +#define DMA_MAP_BIDIRECTIONAL 0 +#define DMA_MAP_TO_DEVICE 1 +#define DMA_MAP_FROM_DEVICE 2 + +struct map_benchmark { + __u64 avg_map_100ns; /* average map latency in 100ns */ + __u64 map_stddev; /* standard deviation of map latency */ + __u64 avg_unmap_100ns; /* as above */ + __u64 unmap_stddev; + __u32 threads; /* how many threads will do map/unmap in parallel */ + __u32 seconds; /* how long the test will last */ + __s32 node; /* which numa node this benchmark will run on */ + __u32 dma_bits; /* DMA addressing capability */ + __u32 dma_dir; /* DMA data direction */ + __u32 dma_trans_ns; /* time for DMA transmission in ns */ + __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ + __u8 expansion[76]; /* For future use */ +}; + +#endif /* _UAPI_DMA_BENCHMARK_H */ -- cgit v1.2.3 From c69993ecdd4dfde2b7da08b022052a33b203da07 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Oct 2025 15:17:05 +0200 Subject: perf: Support deferred user unwind Add support for deferred userspace unwind to perf. Where perf currently relies on in-place stack unwinding; from NMI context and all that. This moves the userspace part of the unwind to right before the return-to-userspace. This has two distinct benefits, the biggest is that it moves the unwind to a faultable context. It becomes possible to fault in debug info (.eh_frame, SFrame etc.) that might not otherwise be readily available. And secondly, it de-duplicates the user callchain where multiple samples happen during the same kernel entry. To facilitate this the perf interface is extended with a new record type: PERF_RECORD_CALLCHAIN_DEFERRED and two new attribute flags: perf_event_attr::defer_callchain - to request the user unwind be deferred perf_event_attr::defer_output - to request PERF_RECORD_CALLCHAIN_DEFERRED records The existing PERF_RECORD_SAMPLE callchain section gets a new context type: PERF_CONTEXT_USER_DEFERRED After which will come a single entry, denoting the 'cookie' of the deferred callchain that should be attached here, matching the 'cookie' field of the above mentioned PERF_RECORD_CALLCHAIN_DEFERRED. The 'defer_callchain' flag is expected on all events with PERF_SAMPLE_CALLCHAIN. The 'defer_output' flag is expect on the event responsible for collecting side-band events (like mmap, comm etc.). Setting 'defer_output' on multiple events will get you duplicated PERF_RECORD_CALLCHAIN_DEFERRED records. Based on earlier patches by Josh and Steven. Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20251023150002.GR4067720@noisy.programming.kicks-ass.net --- include/uapi/linux/perf_event.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 78a362b80027..d292f96bc06f 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -463,7 +463,9 @@ struct perf_event_attr { inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ - __reserved_1 : 26; + defer_callchain: 1, /* request PERF_RECORD_CALLCHAIN_DEFERRED records */ + defer_output : 1, /* output PERF_RECORD_CALLCHAIN_DEFERRED records */ + __reserved_1 : 24; union { __u32 wakeup_events; /* wake up every n events */ @@ -1239,6 +1241,22 @@ enum perf_event_type { */ PERF_RECORD_AUX_OUTPUT_HW_ID = 21, + /* + * This user callchain capture was deferred until shortly before + * returning to user space. Previous samples would have kernel + * callchains only and they need to be stitched with this to make full + * callchains. + * + * struct { + * struct perf_event_header header; + * u64 cookie; + * u64 nr; + * u64 ips[nr]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_CALLCHAIN_DEFERRED = 22, + PERF_RECORD_MAX, /* non-ABI */ }; @@ -1269,6 +1287,7 @@ enum perf_callchain_context { PERF_CONTEXT_HV = (__u64)-32, PERF_CONTEXT_KERNEL = (__u64)-128, PERF_CONTEXT_USER = (__u64)-512, + PERF_CONTEXT_USER_DEFERRED = (__u64)-640, PERF_CONTEXT_GUEST = (__u64)-2048, PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, -- cgit v1.2.3 From 4061c43a99772c66c378cfacaa71550ab3b35909 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Oct 2025 09:45:48 +0100 Subject: pidfs: add missing PIDFD_INFO_SIZE_VER1 We grew struct pidfd_info not too long ago. Link: https://patch.msgid.link/20251028-work-coredump-signal-v1-3-ca449b7b7aa0@kernel.org Fixes: 1d8db6fd698d ("pidfs, coredump: add PIDFD_INFO_COREDUMP") Reviewed-by: Alexander Mikhalitsyn Reviewed-by: Oleg Nesterov Signed-off-by: Christian Brauner --- include/uapi/linux/pidfd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 957db425d459..6ccbabd9a68d 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -28,6 +28,7 @@ #define PIDFD_INFO_COREDUMP (1UL << 4) /* Only returned if requested. */ #define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */ +#define PIDFD_INFO_SIZE_VER1 72 /* sizeof second published struct */ /* * Values for @coredump_mask in pidfd_info. -- cgit v1.2.3 From dfd78546c95330db2252e0d7e937a15ab5eddb4e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Oct 2025 09:45:50 +0100 Subject: pidfd: add a new supported_mask field Some of the future fields in struct pidfd_info can be optional. If the kernel has nothing to emit in that field, then it doesn't set the flag in the reply. This presents a problem: There is currently no way to know what mask flags the kernel supports since one can't always count on them being in the reply. Add a new PIDFD_INFO_SUPPORTED_MASK flag and field that the kernel can set in the reply. Userspace can use this to determine if the fields it requires from the kernel are supported. This also gives us a way to deprecate fields in the future, if that should become necessary. Link: https://patch.msgid.link/20251028-work-coredump-signal-v1-5-ca449b7b7aa0@kernel.org Reviewed-by: Alexander Mikhalitsyn Reviewed-by: Oleg Nesterov Signed-off-by: Christian Brauner --- include/uapi/linux/pidfd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 6ccbabd9a68d..e05caa0e00fe 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -26,9 +26,11 @@ #define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */ #define PIDFD_INFO_EXIT (1UL << 3) /* Only returned if requested. */ #define PIDFD_INFO_COREDUMP (1UL << 4) /* Only returned if requested. */ +#define PIDFD_INFO_SUPPORTED_MASK (1UL << 5) /* Want/got supported mask flags */ #define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */ #define PIDFD_INFO_SIZE_VER1 72 /* sizeof second published struct */ +#define PIDFD_INFO_SIZE_VER2 80 /* sizeof third published struct */ /* * Values for @coredump_mask in pidfd_info. @@ -94,6 +96,7 @@ struct pidfd_info { __s32 exit_code; __u32 coredump_mask; __u32 __spare1; + __u64 supported_mask; /* Mask flags that this kernel supports */ }; #define PIDFS_IOCTL_MAGIC 0xFF -- cgit v1.2.3 From 036375522be8425874e9e0f907c7127e315c7a52 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 Oct 2025 09:45:53 +0100 Subject: pidfs: expose coredump signal Userspace needs access to the signal that caused the coredump before the coredumping process has been reaped. Expose it as part of the coredump information in struct pidfd_info. After the process has been reaped that info is also available as part of PIDFD_INFO_EXIT's exit_code field. Link: https://patch.msgid.link/20251028-work-coredump-signal-v1-8-ca449b7b7aa0@kernel.org Reviewed-by: Alexander Mikhalitsyn Reviewed-by: Oleg Nesterov Signed-off-by: Christian Brauner --- include/uapi/linux/pidfd.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index e05caa0e00fe..ea9a6811fc76 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -27,6 +27,7 @@ #define PIDFD_INFO_EXIT (1UL << 3) /* Only returned if requested. */ #define PIDFD_INFO_COREDUMP (1UL << 4) /* Only returned if requested. */ #define PIDFD_INFO_SUPPORTED_MASK (1UL << 5) /* Want/got supported mask flags */ +#define PIDFD_INFO_COREDUMP_SIGNAL (1UL << 6) /* Always returned if PIDFD_INFO_COREDUMP is requested. */ #define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */ #define PIDFD_INFO_SIZE_VER1 72 /* sizeof second published struct */ @@ -94,8 +95,10 @@ struct pidfd_info { __u32 fsuid; __u32 fsgid; __s32 exit_code; - __u32 coredump_mask; - __u32 __spare1; + struct /* coredump info */ { + __u32 coredump_mask; + __u32 coredump_signal; + }; __u64 supported_mask; /* Mask flags that this kernel supports */ }; -- cgit v1.2.3 From 30176bf7c871681df506f3165ffe76ec462db991 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 29 Oct 2025 16:32:06 +0100 Subject: dpll: add phase-adjust-gran pin attribute Phase-adjust values are currently limited by a min-max range. Some hardware requires, for certain pin types, that values be multiples of a specific granularity, as in the zl3073x driver. Add a `phase-adjust-gran` pin attribute and an appropriate field in dpll_pin_properties. If set by the driver, use its value to validate user-provided phase-adjust values. Reviewed-by: Michal Schmidt Reviewed-by: Petr Oros Tested-by: Prathosh Satish Signed-off-by: Ivan Vecera Reviewed-by: Jiri Pirko Reviewed-by: Arkadiusz Kubalewski Link: https://patch.msgid.link/20251029153207.178448-2-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/dpll.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index ab1725a954d7..69d35570ac4f 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -251,6 +251,7 @@ enum dpll_a_pin { DPLL_A_PIN_ESYNC_FREQUENCY_SUPPORTED, DPLL_A_PIN_ESYNC_PULSE, DPLL_A_PIN_REFERENCE_SYNC, + DPLL_A_PIN_PHASE_ADJUST_GRAN, __DPLL_A_PIN_MAX, DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1) -- cgit v1.2.3 From bc49af56eea866c34d21bf582f65b02fc8c06ec3 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 28 Oct 2025 20:34:23 -0700 Subject: blktrace: add support for REQ_OP_WRITE_ZEROES tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, REQ_OP_WRITE_ZEROES operations are not handled in the blktrace infrastructure, resulting in incorrect or missing operation labels in ftrace blktrace output. This manifests as write-zeroes operations appearing with incorrect labels like "N" instead of a proper "WZ" designation. This patch adds complete support for REQ_OP_WRITE_ZEROES across the blktrace infrastructure: Add BLK_TC_WRITE_ZEROES trace category in blktrace_api.h and update BLK_TC_END_V2 marker accordingly Map REQ_OP_WRITE_ZEROES to BLK_TC_WRITE_ZEROES in __blk_add_trace() to ensure proper trace event categorization Update fill_rwbs() to generate "WZ" label for write-zeroes operations in ftrace output, making them easily identifiable Add "write-zeroes" string mapping in act_to_str array for debugfs filter interface Update blk_fill_rwbs() to handle REQ_OP_WRITE_ZEROES for block layer event tracing With this fix, write-zeroes operations are now correctly traced and displayed. =========================================================== BEFORE THIS PATCH =========================================================== blkdiscard -z -o 0 -l 40960 /dev/nvme0n1 blkdiscard-3809 [030] ..... 1212.253701: block_bio_queue: 259,0 NS 0 + 80 [blkdiscard] blkdiscard-3809 [030] ..... 1212.253703: block_getrq: 259,0 NS 0 + 80 [blkdiscard] blkdiscard-3809 [030] ..... 1212.253704: block_io_start: 259,0 NS 40960 () 0 + 80 be,0,4 [blkdiscard] blkdiscard-3809 [030] ..... 1212.253704: block_plug: [blkdiscard] blkdiscard-3809 [030] ..... 1212.253706: block_unplug: [blkdiscard] 1 blkdiscard-3809 [030] ..... 1212.253706: block_rq_insert: 259,0 NS 40960 () 0 + 80 be,0,4 [blkdiscard] kworker/30:1H-566 [030] ..... 1212.253726: block_rq_issue: 259,0 NS 40960 () 0 + 80 be,0,4 [kworker/30:1H] -0 [030] d.h1. 1212.253957: block_rq_complete: 259,0 NS () 0 + 80 be,0,4 [0] -0 [030] dNh1. 1212.253960: block_io_done: 259,0 NS 0 () 0 + 0 none,0,0 [swapper/30] Trace Event Breakdown: Event | Device | Op | Sector | Sectors | Byte Size | Calculation block_bio_queue | 259,0 | NS | 0 | 80 | - | 80 × 512 = 40,960 block_getrq | 259,0 | NS | 0 | 80 | - | 80 × 512 = 40,960 block_io_start | 259,0 | NS | 0 | 80 | 40960 | Direct from trace block_rq_insert | 259,0 | NS | 0 | 80 | 40960 | Direct from trace block_rq_issue | 259,0 | NS | 0 | 80 | 40960 | Direct from trace block_rq_complete | 259,0 | NS | 0 | 80 | - | 80 × 512 = 40,960 block_io_done | 259,0 | NS | 0 | 0 | 0 | Completion (no data) Total Bytes Transferred: Sectors: 80 Bytes: 80 × 512 = 40,960 bytes =========================================================== AFTER THIS PATCH =========================================================== blkdiscard -z -o 0 -l 40960 /dev/nvme0n1 blkdiscard-2477 [020] ..... 960.989131: block_bio_queue: 259,0 WZS 0 + 80 [blkdiscard] blkdiscard-2477 [020] ..... 960.989134: block_getrq: 259,0 WZS 0 + 80 [blkdiscard] blkdiscard-2477 [020] ..... 960.989135: block_io_start: 259,0 WZS 40960 () 0 + 80 be,0,4 [blkdiscard] blkdiscard-2477 [020] ..... 960.989138: block_plug: [blkdiscard] blkdiscard-2477 [020] ..... 960.989140: block_unplug: [blkdiscard] 1 blkdiscard-2477 [020] ..... 960.989141: block_rq_insert: 259,0 WZS 40960 () 0 + 80 be,0,4 [blkdiscard] kworker/20:1H-736 [020] ..... 960.989166: block_rq_issue: 259,0 WZS 40960 () 0 + 80 be,0,4 [kworker/20:1H] -0 [020] d.h1. 960.989476: block_rq_complete: 259,0 WZS () 0 + 80 be,0,4 [0] -0 [020] dNh1. 960.989482: block_io_done: 259,0 WZS 0 () 0 + 0 none,0,0 [swapper/20] Trace Event Breakdown: Event | Device | Op | Sector | Sectors | Byte Size | Calculation block_bio_queue | 259,0 | WZS | 0 | 80 | - | 80 × 512 = 40,960 block_getrq | 259,0 | WZS | 0 | 80 | - | 80 × 512 = 40,960 block_io_start | 259,0 | WZS | 0 | 80 | 40960 | Direct from trace block_rq_insert | 259,0 | WZS | 0 | 80 | 40960 | Direct from trace block_rq_issue | 259,0 | WZS | 0 | 80 | 40960 | Direct from trace block_rq_complete | 259,0 | WZS | 0 | 80 | - | 80 × 512 = 40,960 block_io_done | 259,0 | WZS | 0 | 0 | 0 | Completion (no data) Total Bytes Transferred: Sectors: 80 Bytes: 80 × 512 = 40,960 bytes Tested with ftrace blktrace on NVMe devices using blkdiscard with the -z (write-zeroes) flag. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/uapi/linux/blktrace_api.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index 30f3d2589365..7c092d9f3aa4 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -35,7 +35,9 @@ enum blktrace_cat { BLK_TC_ZONE_OPEN = 1ull << 20, /* zone open */ BLK_TC_ZONE_CLOSE = 1ull << 21, /* zone close */ - BLK_TC_END_V2 = 1ull << 21, + BLK_TC_WRITE_ZEROES = 1ull << 22, /* write-zeroes */ + + BLK_TC_END_V2 = 1ull << 22, }; #define BLK_TC_SHIFT (16) -- cgit v1.2.3 From 3760342fd6312416491d536144e39297fa5b1950 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 29 Oct 2025 13:20:28 +0100 Subject: nstree: assign fixed ids to the initial namespaces The initial set of namespace comes with fixed inode numbers making it easy for userspace to identify them solely based on that information. This has long preceeded anything here. Similarly, let's assign fixed namespace ids for the initial namespaces. Kill the cookie and use a sequentially increasing number. This has the nice side-effect that the owning user namespace will always have a namespace id that is smaller than any of it's descendant namespaces. Link: https://patch.msgid.link/20251029-work-namespace-nstree-listns-v4-15-2e6f823ebdc0@kernel.org Signed-off-by: Christian Brauner --- include/uapi/linux/nsfs.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index e098759ec917..f8bc2aad74d6 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -67,4 +67,18 @@ struct nsfs_file_handle { #define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */ #define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */ +enum init_ns_id { + IPC_NS_INIT_ID = 1ULL, + UTS_NS_INIT_ID = 2ULL, + USER_NS_INIT_ID = 3ULL, + PID_NS_INIT_ID = 4ULL, + CGROUP_NS_INIT_ID = 5ULL, + TIME_NS_INIT_ID = 6ULL, + NET_NS_INIT_ID = 7ULL, + MNT_NS_INIT_ID = 8ULL, +#ifdef __KERNEL__ + NS_LAST_INIT_ID = MNT_NS_INIT_ID, +#endif +}; + #endif /* __LINUX_NSFS_H */ -- cgit v1.2.3 From 76b6f5dfb3fda76fce1f9990d6fa58adc711122b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 29 Oct 2025 13:20:32 +0100 Subject: nstree: add listns() Add a new listns() system call that allows userspace to iterate through namespaces in the system. This provides a programmatic interface to discover and inspect namespaces, enhancing existing namespace apis. Currently, there is no direct way for userspace to enumerate namespaces in the system. Applications must resort to scanning /proc//ns/ across all processes, which is: 1. Inefficient - requires iterating over all processes 2. Incomplete - misses inactive namespaces that aren't attached to any running process but are kept alive by file descriptors, bind mounts, or parent namespace references 3. Permission-heavy - requires access to /proc for many processes 4. No ordering or ownership. 5. No filtering per namespace type: Must always iterate and check all namespaces. The list goes on. The listns() system call solves these problems by providing direct kernel-level enumeration of namespaces. It is similar to listmount() but obviously tailored to namespaces. /* * @req: Pointer to struct ns_id_req specifying search parameters * @ns_ids: User buffer to receive namespace IDs * @nr_ns_ids: Size of ns_ids buffer (maximum number of IDs to return) * @flags: Reserved for future use (must be 0) */ ssize_t listns(const struct ns_id_req *req, u64 *ns_ids, size_t nr_ns_ids, unsigned int flags); Returns: - On success: Number of namespace IDs written to ns_ids - On error: Negative error code /* * @size: Structure size * @ns_id: Starting point for iteration; use 0 for first call, then * use the last returned ID for subsequent calls to paginate * @ns_type: Bitmask of namespace types to include (from enum ns_type): * 0: Return all namespace types * MNT_NS: Mount namespaces * NET_NS: Network namespaces * USER_NS: User namespaces * etc. Can be OR'd together * @user_ns_id: Filter results to namespaces owned by this user namespace: * 0: Return all namespaces (subject to permission checks) * LISTNS_CURRENT_USER: Namespaces owned by caller's user namespace * Other value: Namespaces owned by the specified user namespace ID */ struct ns_id_req { __u32 size; /* sizeof(struct ns_id_req) */ __u32 spare; /* Reserved, must be 0 */ __u64 ns_id; /* Last seen namespace ID (for pagination) */ __u32 ns_type; /* Filter by namespace type(s) */ __u32 spare2; /* Reserved, must be 0 */ __u64 user_ns_id; /* Filter by owning user namespace */ }; Example 1: List all namespaces void list_all_namespaces(void) { struct ns_id_req req = { .size = sizeof(req), .ns_id = 0, /* Start from beginning */ .ns_type = 0, /* All types */ .user_ns_id = 0, /* All user namespaces */ }; uint64_t ids[100]; ssize_t ret; printf("All namespaces in the system:\n"); do { ret = listns(&req, ids, 100, 0); if (ret < 0) { perror("listns"); break; } for (ssize_t i = 0; i < ret; i++) printf(" Namespace ID: %llu\n", (unsigned long long)ids[i]); /* Continue from last seen ID */ if (ret > 0) req.ns_id = ids[ret - 1]; } while (ret == 100); /* Buffer was full, more may exist */ } Example 2: List network namespaces only void list_network_namespaces(void) { struct ns_id_req req = { .size = sizeof(req), .ns_id = 0, .ns_type = NET_NS, /* Only network namespaces */ .user_ns_id = 0, }; uint64_t ids[100]; ssize_t ret; ret = listns(&req, ids, 100, 0); if (ret < 0) { perror("listns"); return; } printf("Network namespaces: %zd found\n", ret); for (ssize_t i = 0; i < ret; i++) printf(" netns ID: %llu\n", (unsigned long long)ids[i]); } Example 3: List namespaces owned by current user namespace void list_owned_namespaces(void) { struct ns_id_req req = { .size = sizeof(req), .ns_id = 0, .ns_type = 0, /* All types */ .user_ns_id = LISTNS_CURRENT_USER, /* Current userns */ }; uint64_t ids[100]; ssize_t ret; ret = listns(&req, ids, 100, 0); if (ret < 0) { perror("listns"); return; } printf("Namespaces owned by my user namespace: %zd\n", ret); for (ssize_t i = 0; i < ret; i++) printf(" ns ID: %llu\n", (unsigned long long)ids[i]); } Example 4: List multiple namespace types void list_network_and_mount_namespaces(void) { struct ns_id_req req = { .size = sizeof(req), .ns_id = 0, .ns_type = NET_NS | MNT_NS, /* Network and mount */ .user_ns_id = 0, }; uint64_t ids[100]; ssize_t ret; ret = listns(&req, ids, 100, 0); printf("Network and mount namespaces: %zd found\n", ret); } Example 5: Pagination through large namespace sets void list_all_with_pagination(void) { struct ns_id_req req = { .size = sizeof(req), .ns_id = 0, .ns_type = 0, .user_ns_id = 0, }; uint64_t ids[50]; size_t total = 0; ssize_t ret; printf("Enumerating all namespaces with pagination:\n"); while (1) { ret = listns(&req, ids, 50, 0); if (ret < 0) { perror("listns"); break; } if (ret == 0) break; /* No more namespaces */ total += ret; printf(" Batch: %zd namespaces\n", ret); /* Last ID in this batch becomes start of next batch */ req.ns_id = ids[ret - 1]; if (ret < 50) break; /* Partial batch = end of results */ } printf("Total: %zu namespaces\n", total); } Permission Model listns() respects namespace isolation and capabilities: (1) Global listing (user_ns_id = 0): - Requires CAP_SYS_ADMIN in the namespace's owning user namespace - OR the namespace must be in the caller's namespace context (e.g., a namespace the caller is currently using) - User namespaces additionally allow listing if the caller has CAP_SYS_ADMIN in that user namespace itself (2) Owner-filtered listing (user_ns_id != 0): - Requires CAP_SYS_ADMIN in the specified owner user namespace - OR the namespace must be in the caller's namespace context - This allows unprivileged processes to enumerate namespaces they own (3) Visibility: - Only "active" namespaces are listed - A namespace is active if it has a non-zero __ns_ref_active count - This includes namespaces used by running processes, held by open file descriptors, or kept active by bind mounts - Inactive namespaces (kept alive only by internal kernel references) are not visible via listns() Link: https://patch.msgid.link/20251029-work-namespace-nstree-listns-v4-19-2e6f823ebdc0@kernel.org Signed-off-by: Christian Brauner --- include/uapi/linux/nsfs.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index f8bc2aad74d6..a25e38d1c874 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -81,4 +81,48 @@ enum init_ns_id { #endif }; +enum ns_type { + TIME_NS = (1ULL << 7), /* CLONE_NEWTIME */ + MNT_NS = (1ULL << 17), /* CLONE_NEWNS */ + CGROUP_NS = (1ULL << 25), /* CLONE_NEWCGROUP */ + UTS_NS = (1ULL << 26), /* CLONE_NEWUTS */ + IPC_NS = (1ULL << 27), /* CLONE_NEWIPC */ + USER_NS = (1ULL << 28), /* CLONE_NEWUSER */ + PID_NS = (1ULL << 29), /* CLONE_NEWPID */ + NET_NS = (1ULL << 30), /* CLONE_NEWNET */ +}; + +/** + * struct ns_id_req - namespace ID request structure + * @size: size of this structure + * @spare: reserved for future use + * @filter: filter mask + * @ns_id: last namespace id + * @user_ns_id: owning user namespace ID + * + * Structure for passing namespace ID and miscellaneous parameters to + * statns(2) and listns(2). + * + * For statns(2) @param represents the request mask. + * For listns(2) @param represents the last listed mount id (or zero). + */ +struct ns_id_req { + __u32 size; + __u32 spare; + __u64 ns_id; + struct /* listns */ { + __u32 ns_type; + __u32 spare2; + __u64 user_ns_id; + }; +}; + +/* + * Special @user_ns_id value that can be passed to listns() + */ +#define LISTNS_CURRENT_USER 0xffffffffffffffff /* Caller's userns */ + +/* List of all ns_id_req versions. */ +#define NS_ID_REQ_SIZE_VER0 32 /* sizeof first published struct */ + #endif /* __LINUX_NSFS_H */ -- cgit v1.2.3 From b36d4b6aa88ef039647228b98c59a875e92f8c8e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 29 Oct 2025 13:20:33 +0100 Subject: arch: hookup listns() system call Add the listns() system call to all architectures. Link: https://patch.msgid.link/20251029-work-namespace-nstree-listns-v4-20-2e6f823ebdc0@kernel.org Tested-by: syzbot@syzkaller.appspotmail.com Reviewed-by: Arnd Bergmann Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/uapi/asm-generic/unistd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 04e0077fb4c9..942370b3f5d2 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -857,9 +857,11 @@ __SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) __SYSCALL(__NR_file_getattr, sys_file_getattr) #define __NR_file_setattr 469 __SYSCALL(__NR_file_setattr, sys_file_setattr) +#define __NR_listns 470 +__SYSCALL(__NR_listns, sys_listns) #undef __NR_syscalls -#define __NR_syscalls 470 +#define __NR_syscalls 471 /* * 32 bit systems traditionally used different -- cgit v1.2.3 From c18d4b190a46651726c9a952667c74d2deb33c28 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Tue, 28 Oct 2025 20:30:05 +0000 Subject: net: Extend NAPI threaded polling to allow kthread based busy polling Add a new state NAPI_STATE_THREADED_BUSY_POLL to the NAPI state enum to enable and disable threaded busy polling. When threaded busy polling is enabled for a NAPI, enable NAPI_STATE_THREADED also. When the threaded NAPI is scheduled, set NAPI_STATE_IN_BUSY_POLL to signal napi_complete_done not to rearm interrupts. Whenever NAPI_STATE_THREADED_BUSY_POLL is unset, the NAPI_STATE_IN_BUSY_POLL will be unset, napi_complete_done unsets the NAPI_STATE_SCHED_THREADED bit also, which in turn will make the kthread go to sleep. Signed-off-by: Samiullah Khawaja Reviewed-by: Willem de Bruijn Acked-by: Martin Karsten Tested-by: Martin Karsten Link: https://patch.msgid.link/20251028203007.575686-2-skhawaja@google.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 48eb49aa03d4..048c8de1a130 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -80,6 +80,7 @@ enum netdev_qstats_scope { enum netdev_napi_threaded { NETDEV_NAPI_THREADED_DISABLED, NETDEV_NAPI_THREADED_ENABLED, + NETDEV_NAPI_THREADED_BUSY_POLL, }; enum { -- cgit v1.2.3 From e6e93fb01302e9b7a15d17f3b8a00eff8a601654 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 27 Oct 2025 13:27:59 +0100 Subject: ethtool: netlink: add ETHTOOL_MSG_MSE_GET and wire up PHY MSE access Introduce the userspace entry point for PHY MSE diagnostics via ethtool netlink. This exposes the core API added previously and returns both capability information and one or more snapshots. Userspace sends ETHTOOL_MSG_MSE_GET. The reply carries: - ETHTOOL_A_MSE_CAPABILITIES: scale limits and timing information - ETHTOOL_A_MSE_CHANNEL_* nests: one or more snapshots (per-channel if available, otherwise WORST, otherwise LINK) Link down returns -ENETDOWN. Changes: - YAML: add attribute sets (mse, mse-capabilities, mse-snapshot) and the mse-get operation - UAPI (generated): add ETHTOOL_A_MSE_* enums and message IDs, ETHTOOL_MSG_MSE_GET/REPLY - ethtool core: add net/ethtool/mse.c implementing the request, register genl op, and hook into ethnl dispatch - docs: document MSE_GET in ethtool-netlink.rst The include/uapi/linux/ethtool_netlink_generated.h is generated from Documentation/netlink/specs/ethtool.yaml. Signed-off-by: Oleksij Rempel Link: https://patch.msgid.link/20251027122801.982364-3-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink_generated.h | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 0e8ac0d974e2..b71b175df46d 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -803,6 +803,39 @@ enum { ETHTOOL_A_PSE_NTF_MAX = (__ETHTOOL_A_PSE_NTF_CNT - 1) }; +enum { + ETHTOOL_A_MSE_CAPABILITIES_MAX_AVERAGE_MSE = 1, + ETHTOOL_A_MSE_CAPABILITIES_MAX_PEAK_MSE, + ETHTOOL_A_MSE_CAPABILITIES_REFRESH_RATE_PS, + ETHTOOL_A_MSE_CAPABILITIES_NUM_SYMBOLS, + + __ETHTOOL_A_MSE_CAPABILITIES_CNT, + ETHTOOL_A_MSE_CAPABILITIES_MAX = (__ETHTOOL_A_MSE_CAPABILITIES_CNT - 1) +}; + +enum { + ETHTOOL_A_MSE_SNAPSHOT_AVERAGE_MSE = 1, + ETHTOOL_A_MSE_SNAPSHOT_PEAK_MSE, + ETHTOOL_A_MSE_SNAPSHOT_WORST_PEAK_MSE, + + __ETHTOOL_A_MSE_SNAPSHOT_CNT, + ETHTOOL_A_MSE_SNAPSHOT_MAX = (__ETHTOOL_A_MSE_SNAPSHOT_CNT - 1) +}; + +enum { + ETHTOOL_A_MSE_HEADER = 1, + ETHTOOL_A_MSE_CAPABILITIES, + ETHTOOL_A_MSE_CHANNEL_A, + ETHTOOL_A_MSE_CHANNEL_B, + ETHTOOL_A_MSE_CHANNEL_C, + ETHTOOL_A_MSE_CHANNEL_D, + ETHTOOL_A_MSE_WORST_CHANNEL, + ETHTOOL_A_MSE_LINK, + + __ETHTOOL_A_MSE_CNT, + ETHTOOL_A_MSE_MAX = (__ETHTOOL_A_MSE_CNT - 1) +}; + enum { ETHTOOL_MSG_USER_NONE = 0, ETHTOOL_MSG_STRSET_GET = 1, @@ -855,6 +888,7 @@ enum { ETHTOOL_MSG_RSS_SET, ETHTOOL_MSG_RSS_CREATE_ACT, ETHTOOL_MSG_RSS_DELETE_ACT, + ETHTOOL_MSG_MSE_GET, __ETHTOOL_MSG_USER_CNT, ETHTOOL_MSG_USER_MAX = (__ETHTOOL_MSG_USER_CNT - 1) @@ -915,6 +949,7 @@ enum { ETHTOOL_MSG_RSS_CREATE_ACT_REPLY, ETHTOOL_MSG_RSS_CREATE_NTF, ETHTOOL_MSG_RSS_DELETE_NTF, + ETHTOOL_MSG_MSE_GET_REPLY, __ETHTOOL_MSG_KERNEL_CNT, ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1) -- cgit v1.2.3 From f16469ee733ac52b2373216803699cbb05e82786 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 31 Oct 2025 14:28:54 -0700 Subject: PCI/IDE: Enumerate Selective Stream IDE capabilities Link encryption is a new PCIe feature enumerated by "PCIe r7.0 section 7.9.26 IDE Extended Capability". It is both a standalone port + endpoint capability, and a building block for the security protocol defined by "PCIe r7.0 section 11 TEE Device Interface Security Protocol (TDISP)". That protocol coordinates device security setup between a platform TSM (TEE Security Manager) and a device DSM (Device Security Manager). While the platform TSM can allocate resources like Stream ID and manage keys, it still requires system software to manage the IDE capability register block. Add register definitions and basic enumeration in preparation for Selective IDE Stream establishment. A follow on change selects the new CONFIG_PCI_IDE symbol. Note that while the IDE specification defines both a point-to-point "Link Stream" and a Root Port to endpoint "Selective Stream", only "Selective Stream" is considered for Linux as that is the predominant mode expected by Trusted Execution Environment Security Managers (TSMs), and it is the security model that limits the number of PCI components within the TCB in a PCIe topology with switches. Co-developed-by: Alexey Kardashevskiy Signed-off-by: Alexey Kardashevskiy Co-developed-by: Xu Yilun Signed-off-by: Xu Yilun Reviewed-by: Jonathan Cameron Reviewed-by: Alexey Kardashevskiy Reviewed-by: Aneesh Kumar K.V Link: https://patch.msgid.link/20251031212902.2256310-3-dan.j.williams@intel.com Signed-off-by: Dan Williams --- include/uapi/linux/pci_regs.h | 81 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 07e06aafec50..05bd22d9e352 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -754,6 +754,7 @@ #define PCI_EXT_CAP_ID_NPEM 0x29 /* Native PCIe Enclosure Management */ #define PCI_EXT_CAP_ID_PL_32GT 0x2A /* Physical Layer 32.0 GT/s */ #define PCI_EXT_CAP_ID_DOE 0x2E /* Data Object Exchange */ +#define PCI_EXT_CAP_ID_IDE 0x30 /* Integrity and Data Encryption */ #define PCI_EXT_CAP_ID_PL_64GT 0x31 /* Physical Layer 64.0 GT/s */ #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_64GT @@ -1249,4 +1250,84 @@ #define PCI_DVSEC_CXL_PORT_CTL 0x0c #define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 +/* Integrity and Data Encryption Extended Capability */ +#define PCI_IDE_CAP 0x04 +#define PCI_IDE_CAP_LINK 0x1 /* Link IDE Stream Supported */ +#define PCI_IDE_CAP_SELECTIVE 0x2 /* Selective IDE Streams Supported */ +#define PCI_IDE_CAP_FLOWTHROUGH 0x4 /* Flow-Through IDE Stream Supported */ +#define PCI_IDE_CAP_PARTIAL_HEADER_ENC 0x8 /* Partial Header Encryption Supported */ +#define PCI_IDE_CAP_AGGREGATION 0x10 /* Aggregation Supported */ +#define PCI_IDE_CAP_PCRC 0x20 /* PCRC Supported */ +#define PCI_IDE_CAP_IDE_KM 0x40 /* IDE_KM Protocol Supported */ +#define PCI_IDE_CAP_SEL_CFG 0x80 /* Selective IDE for Config Request Support */ +#define PCI_IDE_CAP_ALG __GENMASK(12, 8) /* Supported Algorithms */ +#define PCI_IDE_CAP_ALG_AES_GCM_256 0 /* AES-GCM 256 key size, 96b MAC */ +#define PCI_IDE_CAP_LINK_TC_NUM __GENMASK(15, 13) /* Link IDE TCs */ +#define PCI_IDE_CAP_SEL_NUM __GENMASK(23, 16) /* Supported Selective IDE Streams */ +#define PCI_IDE_CAP_TEE_LIMITED 0x1000000 /* TEE-Limited Stream Supported */ +#define PCI_IDE_CTL 0x08 +#define PCI_IDE_CTL_FLOWTHROUGH_IDE 0x4 /* Flow-Through IDE Stream Enabled */ + +#define PCI_IDE_LINK_STREAM_0 0xc /* First Link Stream Register Block */ +#define PCI_IDE_LINK_BLOCK_SIZE 8 +/* Link IDE Stream block, up to PCI_IDE_CAP_LINK_TC_NUM */ +#define PCI_IDE_LINK_CTL_0 0x00 /* First Link Control Register Offset in block */ +#define PCI_IDE_LINK_CTL_EN 0x1 /* Link IDE Stream Enable */ +#define PCI_IDE_LINK_CTL_TX_AGGR_NPR __GENMASK(3, 2) /* Tx Aggregation Mode NPR */ +#define PCI_IDE_LINK_CTL_TX_AGGR_PR __GENMASK(5, 4) /* Tx Aggregation Mode PR */ +#define PCI_IDE_LINK_CTL_TX_AGGR_CPL __GENMASK(7, 6) /* Tx Aggregation Mode CPL */ +#define PCI_IDE_LINK_CTL_PCRC_EN 0x100 /* PCRC Enable */ +#define PCI_IDE_LINK_CTL_PART_ENC __GENMASK(13, 10) /* Partial Header Encryption Mode */ +#define PCI_IDE_LINK_CTL_ALG __GENMASK(18, 14) /* Selection from PCI_IDE_CAP_ALG */ +#define PCI_IDE_LINK_CTL_TC __GENMASK(21, 19) /* Traffic Class */ +#define PCI_IDE_LINK_CTL_ID __GENMASK(31, 24) /* Stream ID */ +#define PCI_IDE_LINK_STS_0 0x4 /* First Link Status Register Offset in block */ +#define PCI_IDE_LINK_STS_STATE __GENMASK(3, 0) /* Link IDE Stream State */ +#define PCI_IDE_LINK_STS_IDE_FAIL 0x80000000 /* IDE fail message received */ + +/* Selective IDE Stream block, up to PCI_IDE_CAP_SELECTIVE_STREAMS_NUM */ +/* Selective IDE Stream Capability Register */ +#define PCI_IDE_SEL_CAP 0x00 +#define PCI_IDE_SEL_CAP_ASSOC_NUM __GENMASK(3, 0) +/* Selective IDE Stream Control Register */ +#define PCI_IDE_SEL_CTL 0x04 +#define PCI_IDE_SEL_CTL_EN 0x1 /* Selective IDE Stream Enable */ +#define PCI_IDE_SEL_CTL_TX_AGGR_NPR __GENMASK(3, 2) /* Tx Aggregation Mode NPR */ +#define PCI_IDE_SEL_CTL_TX_AGGR_PR __GENMASK(5, 4) /* Tx Aggregation Mode PR */ +#define PCI_IDE_SEL_CTL_TX_AGGR_CPL __GENMASK(7, 6) /* Tx Aggregation Mode CPL */ +#define PCI_IDE_SEL_CTL_PCRC_EN 0x100 /* PCRC Enable */ +#define PCI_IDE_SEL_CTL_CFG_EN 0x200 /* Selective IDE for Configuration Requests */ +#define PCI_IDE_SEL_CTL_PART_ENC __GENMASK(13, 10) /* Partial Header Encryption Mode */ +#define PCI_IDE_SEL_CTL_ALG __GENMASK(18, 14) /* Selection from PCI_IDE_CAP_ALG */ +#define PCI_IDE_SEL_CTL_TC __GENMASK(21, 19) /* Traffic Class */ +#define PCI_IDE_SEL_CTL_DEFAULT 0x400000 /* Default Stream */ +#define PCI_IDE_SEL_CTL_TEE_LIMITED 0x800000 /* TEE-Limited Stream */ +#define PCI_IDE_SEL_CTL_ID __GENMASK(31, 24) /* Stream ID */ +#define PCI_IDE_SEL_CTL_ID_MAX 255 +/* Selective IDE Stream Status Register */ +#define PCI_IDE_SEL_STS 0x08 +#define PCI_IDE_SEL_STS_STATE __GENMASK(3, 0) /* Selective IDE Stream State */ +#define PCI_IDE_SEL_STS_STATE_INSECURE 0 +#define PCI_IDE_SEL_STS_STATE_SECURE 2 +#define PCI_IDE_SEL_STS_IDE_FAIL 0x80000000 /* IDE fail message received */ +/* IDE RID Association Register 1 */ +#define PCI_IDE_SEL_RID_1 0x0c +#define PCI_IDE_SEL_RID_1_LIMIT __GENMASK(23, 8) +/* IDE RID Association Register 2 */ +#define PCI_IDE_SEL_RID_2 0x10 +#define PCI_IDE_SEL_RID_2_VALID 0x1 +#define PCI_IDE_SEL_RID_2_BASE __GENMASK(23, 8) +#define PCI_IDE_SEL_RID_2_SEG __GENMASK(31, 24) +/* Selective IDE Address Association Register Block, up to PCI_IDE_SEL_CAP_ASSOC_NUM */ +#define PCI_IDE_SEL_ADDR_BLOCK_SIZE 12 +#define PCI_IDE_SEL_ADDR_1(x) (20 + (x) * PCI_IDE_SEL_ADDR_BLOCK_SIZE) +#define PCI_IDE_SEL_ADDR_1_VALID 0x1 +#define PCI_IDE_SEL_ADDR_1_BASE_LOW __GENMASK(19, 8) +#define PCI_IDE_SEL_ADDR_1_LIMIT_LOW __GENMASK(31, 20) +/* IDE Address Association Register 2 is "Memory Limit Upper" */ +#define PCI_IDE_SEL_ADDR_2(x) (24 + (x) * PCI_IDE_SEL_ADDR_BLOCK_SIZE) +/* IDE Address Association Register 3 is "Memory Base Upper" */ +#define PCI_IDE_SEL_ADDR_3(x) (28 + (x) * PCI_IDE_SEL_ADDR_BLOCK_SIZE) +#define PCI_IDE_SEL_BLOCK_SIZE(nr_assoc) (20 + PCI_IDE_SEL_ADDR_BLOCK_SIZE * (nr_assoc)) + #endif /* LINUX_PCI_REGS_H */ -- cgit v1.2.3 From 3225f52cde56f46789a4972d3c54df8a4d75f022 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 31 Oct 2025 14:28:56 -0700 Subject: PCI/TSM: Establish Secure Sessions and Link Encryption The PCIe 7.0 specification, section 11, defines the Trusted Execution Environment (TEE) Device Interface Security Protocol (TDISP). This protocol definition builds upon Component Measurement and Authentication (CMA), and link Integrity and Data Encryption (IDE). It adds support for assigning devices (PCI physical or virtual function) to a confidential VM such that the assigned device is enabled to access guest private memory protected by technologies like Intel TDX, AMD SEV-SNP, RISCV COVE, or ARM CCA. The "TSM" (TEE Security Manager) is a concept in the TDISP specification of an agent that mediates between a "DSM" (Device Security Manager) and system software in both a VMM and a confidential VM. A VMM uses TSM ABIs to setup link security and assign devices. A confidential VM uses TSM ABIs to transition an assigned device into the TDISP "RUN" state and validate its configuration. From a Linux perspective the TSM abstracts many of the details of TDISP, IDE, and CMA. Some of those details leak through at times, but for the most part TDISP is an internal implementation detail of the TSM. CONFIG_PCI_TSM adds an "authenticated" attribute and "tsm/" subdirectory to pci-sysfs. Consider that the TSM driver may itself be a PCI driver. Userspace can watch for the arrival of a "TSM" device, /sys/class/tsm/tsm0/uevent KOBJ_CHANGE, to know when the PCI core has initialized TSM services. The operations that can be executed against a PCI device are split into two mutually exclusive operation sets, "Link" and "Security" (struct pci_tsm_{link,security}_ops). The "Link" operations manage physical link security properties and communication with the device's Device Security Manager firmware. These are the host side operations in TDISP. The "Security" operations coordinate the security state of the assigned virtual device (TDI). These are the guest side operations in TDISP. Only "link" (Secure Session and physical Link Encryption) operations are defined at this stage. There are placeholders for the device security (Trusted Computing Base entry / exit) operations. The locking allows for multiple devices to be executing commands simultaneously, one outstanding command per-device and an rwsem synchronizes the implementation relative to TSM registration/unregistration events. Thanks to Wu Hao for his work on an early draft of this support. Cc: Lukas Wunner Cc: Samuel Ortiz Acked-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Alexey Kardashevskiy Co-developed-by: Xu Yilun Signed-off-by: Xu Yilun Link: https://patch.msgid.link/20251031212902.2256310-5-dan.j.williams@intel.com Signed-off-by: Dan Williams --- include/uapi/linux/pci_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 05bd22d9e352..f2759c1097bc 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -503,6 +503,7 @@ #define PCI_EXP_DEVCAP_PWR_VAL 0x03fc0000 /* Slot Power Limit Value */ #define PCI_EXP_DEVCAP_PWR_SCL 0x0c000000 /* Slot Power Limit Scale */ #define PCI_EXP_DEVCAP_FLR 0x10000000 /* Function Level Reset */ +#define PCI_EXP_DEVCAP_TEE 0x40000000 /* TEE I/O (TDISP) Support */ #define PCI_EXP_DEVCTL 0x08 /* Device Control */ #define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */ #define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ -- cgit v1.2.3 From c0c1262fbfbafe943dbccd5f97b500b72dbd2205 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 31 Oct 2025 14:28:57 -0700 Subject: PCI: Add PCIe Device 3 Extended Capability enumeration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCIe r7.0 Section 7.7.9 Device 3 Extended Capability Structure, defines the canonical location for determining the Flit Mode of a device. This status is a dependency for PCIe IDE enabling. Add a new fm_enabled flag to 'struct pci_dev'. Cc: Lukas Wunner Cc: Ilpo Järvinen Cc: Bjorn Helgaas Cc: Samuel Ortiz Cc: Alexey Kardashevskiy Cc: Xu Yilun Acked-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20251031212902.2256310-6-dan.j.williams@intel.com Signed-off-by: Dan Williams --- include/uapi/linux/pci_regs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index f2759c1097bc..3add74ae2594 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -755,6 +755,7 @@ #define PCI_EXT_CAP_ID_NPEM 0x29 /* Native PCIe Enclosure Management */ #define PCI_EXT_CAP_ID_PL_32GT 0x2A /* Physical Layer 32.0 GT/s */ #define PCI_EXT_CAP_ID_DOE 0x2E /* Data Object Exchange */ +#define PCI_EXT_CAP_ID_DEV3 0x2F /* Device 3 Capability/Control/Status */ #define PCI_EXT_CAP_ID_IDE 0x30 /* Integrity and Data Encryption */ #define PCI_EXT_CAP_ID_PL_64GT 0x31 /* Physical Layer 64.0 GT/s */ #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_64GT @@ -1246,6 +1247,12 @@ /* Deprecated old name, replaced with PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE */ #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE +/* Device 3 Extended Capability */ +#define PCI_DEV3_CAP 0x04 /* Device 3 Capabilities Register */ +#define PCI_DEV3_CTL 0x08 /* Device 3 Control Register */ +#define PCI_DEV3_STA 0x0c /* Device 3 Status Register */ +#define PCI_DEV3_STA_SEGMENT 0x8 /* Segment Captured (end-to-end flit-mode detected) */ + /* Compute Express Link (CXL r3.1, sec 8.1.5) */ #define PCI_DVSEC_CXL_PORT 3 #define PCI_DVSEC_CXL_PORT_CTL 0x0c -- cgit v1.2.3 From d923739e2e356424cc566143a3323c62cd6ed067 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 27 Oct 2025 09:44:26 +0100 Subject: rseq: Simplify the event notification Since commit 0190e4198e47 ("rseq: Deprecate RSEQ_CS_FLAG_NO_RESTART_ON_* flags") the bits in task::rseq_event_mask are meaningless and just extra work in terms of setting them individually. Aside of that the only relevant point where an event has to be raised is context switch. Neither the CPU nor MM CID can change without going through a context switch. Collapse them all into a single boolean which simplifies the code a lot and remove the pointless invocations which have been sprinkled all over the place for no value. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Mathieu Desnoyers Link: https://patch.msgid.link/20251027084306.336978188@linutronix.de --- include/uapi/linux/rseq.h | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index c233aae5eac9..1b76d508400c 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -114,20 +114,13 @@ struct rseq { /* * Restartable sequences flags field. * - * This field should only be updated by the thread which - * registered this data structure. Read by the kernel. - * Mainly used for single-stepping through rseq critical sections - * with debuggers. - * - * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT - * Inhibit instruction sequence block restart on preemption - * for this thread. - * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL - * Inhibit instruction sequence block restart on signal - * delivery for this thread. - * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE - * Inhibit instruction sequence block restart on migration for - * this thread. + * This field was initially intended to allow event masking for + * single-stepping through rseq critical sections with debuggers. + * The kernel does not support this anymore and the relevant bits + * are checked for being always false: + * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT + * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL + * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE */ __u32 flags; -- cgit v1.2.3 From f88191c7f3618405f1fc5c331a94ebfe601c5b08 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Sat, 1 Nov 2025 18:56:51 +0100 Subject: mptcp: pm: in-kernel: record fullmesh endp nb Instead of iterating over all endpoints, under RCU read lock, just to check if one of them as the fullmesh flag, we can keep a counter of fullmesh endpoint, similar to what is done with the other flags. This counter is now checked, before iterating over all endpoints. Similar to the other counters, this new one is also exposed. A userspace app can then know when it is being used in a fullmesh mode, with potentially (too) many subflows. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251101-net-next-mptcp-fm-endp-nb-bind-v1-1-b4166772d6bb@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 87cfab874e24..04eea6d1d0a9 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -70,7 +70,8 @@ struct mptcp_info { __u64 mptcpi_bytes_acked; __u8 mptcpi_subflows_total; __u8 mptcpi_endp_laminar_max; - __u8 reserved[2]; + __u8 mptcpi_endp_fullmesh_max; + __u8 reserved; __u32 mptcpi_last_data_sent; __u32 mptcpi_last_data_recv; __u32 mptcpi_last_ack_recv; -- cgit v1.2.3 From 0bf0e2e4666822b62d7ad6473dc37fd6b377b5f1 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 5 Nov 2025 06:22:41 +0900 Subject: block: track zone conditions The function blk_revalidate_zone_cond() already caches the condition of all zones of a zoned block device in the zones_cond array of a gendisk. However, the zone conditions are updated only when the device is scanned or revalidated. Implement tracking of the runtime changes to zone conditions using the new cond field in struct blk_zone_wplug. The size of this structure remains 112 Bytes as the new field replaces the 4 Bytes padding at the end of the structure. Beause zones that do not have a zone write plug can be in the empty, implicit open, explicit open or full condition, the zones_cond array of a disk is used to track the conditions, of zones that do not have a zone write plug. The condition of such zone is updated in the disk zones_cond array when a zone reset, reset all or finish operation is executed, and also when a zone write plug is removed from the disk hash table when the zone becomes full. Since a device may automatically close an implicitly open zone when writing to an empty or closed zone, if the total number of open zones has reached the device limit, the BLK_ZONE_COND_IMP_OPEN and BLK_ZONE_COND_CLOSED zone conditions cannot be precisely tracked. To overcome this, the zone condition BLK_ZONE_COND_ACTIVE is introduced to represent a zone that has the condition BLK_ZONE_COND_IMP_OPEN, BLK_ZONE_COND_EXP_OPEN or BLK_ZONE_COND_CLOSED. This follows the definition of an active zone as defined in the NVMe Zoned Namespace specifications. As such, for a zoned device that has a limit on the maximum number of open zones, we will never have more zones in the BLK_ZONE_COND_ACTIVE condition than the device limit. This is compatible with the SCSI ZBC and ATA ZAC specifications for SMR HDDs as these devices do not have a limit on the number of active zones. The function disk_zone_wplug_set_wp_offset() is modified to use the new helper disk_zone_wplug_update_cond() to update a zone write plug condition whenever a zone write plug write offset is updated on submission or merging of write BIOs to a zone. The functions blk_zone_reset_bio_endio(), blk_zone_reset_all_bio_endio() and blk_zone_finish_bio_endio() are modified to update the condition of the zones targeted by reset, reset_all and finish operations, either using though disk_zone_wplug_set_wp_offset() for zones that have a zone write plug, or using the disk_zone_set_cond() helper to update the zones_cond array of the disk for zones that do not have a zone write plug. When a zone write plug is removed from the disk hash table (when the zone becomes empty or full), the condition of struct blk_zone_wplug is used to update the disk zones_cond array. Conversely, when a zone write plug is added to the disk hash table, the zones_cond array is used to initialize the zone write plug condition. Signed-off-by: Damien Le Moal Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/uapi/linux/blkzoned.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index f85743ef6e7d..5c7662971414 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -48,6 +48,8 @@ enum blk_zone_type { * FINISH ZONE command. * @BLK_ZONE_COND_READONLY: The zone is read-only. * @BLK_ZONE_COND_OFFLINE: The zone is offline (sectors cannot be read/written). + * @BLK_ZONE_COND_ACTIVE: The zone is either implicitly open, explicitly open, + * or closed. * * The Zone Condition state machine in the ZBC/ZAC standards maps the above * deinitions as: @@ -61,6 +63,13 @@ enum blk_zone_type { * * Conditions 0x5 to 0xC are reserved by the current ZBC/ZAC spec and should * be considered invalid. + * + * The condition BLK_ZONE_COND_ACTIVE is used only with cached zone reports. + * It is used to report any of the BLK_ZONE_COND_IMP_OPEN, + * BLK_ZONE_COND_EXP_OPEN and BLK_ZONE_COND_CLOSED conditions. Conversely, a + * regular zone report will never report a zone condition using + * BLK_ZONE_COND_ACTIVE and instead use the conditions BLK_ZONE_COND_IMP_OPEN, + * BLK_ZONE_COND_EXP_OPEN or BLK_ZONE_COND_CLOSED as reported by the device. */ enum blk_zone_cond { BLK_ZONE_COND_NOT_WP = 0x0, @@ -71,6 +80,8 @@ enum blk_zone_cond { BLK_ZONE_COND_READONLY = 0xD, BLK_ZONE_COND_FULL = 0xE, BLK_ZONE_COND_OFFLINE = 0xF, + + BLK_ZONE_COND_ACTIVE = 0xFF, }; /** -- cgit v1.2.3 From b30ffcdc0c15a88f8866529d3532454e02571221 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 5 Nov 2025 06:22:45 +0900 Subject: block: introduce BLKREPORTZONESV2 ioctl Introduce the new BLKREPORTZONESV2 ioctl command to allow user applications access to the fast zone report implemented by blkdev_report_zones_cached(). This new ioctl is defined as number 142 and is documented in include/uapi/linux/fs.h. Unlike the existing BLKREPORTZONES ioctl, this new ioctl uses the flags field of struct blk_zone_report also as an input. If the user sets the BLK_ZONE_REP_CACHED flag as an input, then blkdev_report_zones_cached() is used to generate the zone report using cached zone information. If this flag is not set, then BLKREPORTZONESV2 behaves in the same manner as BLKREPORTZONES and the zone report is generated by accessing the zoned device. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/uapi/linux/blkzoned.h | 35 ++++++++++++++++++++++++++++++----- include/uapi/linux/fs.h | 2 +- 2 files changed, 31 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index 5c7662971414..e33f02703350 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -87,10 +87,20 @@ enum blk_zone_cond { /** * enum blk_zone_report_flags - Feature flags of reported zone descriptors. * - * @BLK_ZONE_REP_CAPACITY: Zone descriptor has capacity field. + * @BLK_ZONE_REP_CAPACITY: Output only. Indicates that zone descriptors in a + * zone report have a valid capacity field. + * @BLK_ZONE_REP_CACHED: Input only. Indicates that the zone report should be + * generated using cached zone information. In this case, + * the implicit open, explicit open and closed zone + * conditions are all reported with the + * BLK_ZONE_COND_ACTIVE condition. */ enum blk_zone_report_flags { - BLK_ZONE_REP_CAPACITY = (1 << 0), + /* Output flags */ + BLK_ZONE_REP_CAPACITY = (1U << 0), + + /* Input flags */ + BLK_ZONE_REP_CACHED = (1U << 31), }; /** @@ -133,6 +143,10 @@ struct blk_zone { * @sector: starting sector of report * @nr_zones: IN maximum / OUT actual * @flags: one or more flags as defined by enum blk_zone_report_flags. + * @flags: one or more flags as defined by enum blk_zone_report_flags. + * With BLKREPORTZONE, this field is ignored as an input and is valid + * only as an output. Using BLKREPORTZONEV2, this field is used as both + * input and output. * @zones: Space to hold @nr_zones @zones entries on reply. * * The array of at most @nr_zones must follow this structure in memory. @@ -159,9 +173,19 @@ struct blk_zone_range { /** * Zoned block device ioctl's: * - * @BLKREPORTZONE: Get zone information. Takes a zone report as argument. - * The zone report will start from the zone containing the - * sector specified in the report request structure. + * @BLKREPORTZONE: Get zone information from a zoned device. Takes a zone report + * as argument. The zone report will start from the zone + * containing the sector specified in struct blk_zone_report. + * The flags field of struct blk_zone_report is used as an + * output only and ignored as an input. + * DEPRECATED, use BLKREPORTZONEV2 instead. + * @BLKREPORTZONEV2: Same as @BLKREPORTZONE but uses the flags field of + * struct blk_zone_report as an input, allowing to get a zone + * report using cached zone information if the flag + * BLK_ZONE_REP_CACHED is set. In such case, the zone report + * may include zones with the condition @BLK_ZONE_COND_ACTIVE + * (c.f. the description of this condition above for more + * details). * @BLKRESETZONE: Reset the write pointer of the zones in the specified * sector range. The sector range must be zone aligned. * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors. @@ -180,5 +204,6 @@ struct blk_zone_range { #define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range) #define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range) #define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range) +#define BLKREPORTZONEV2 _IOWR(0x12, 142, struct blk_zone_report) #endif /* _UAPI_BLKZONED_H */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 957ce3343a4f..66ca526cf786 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -298,7 +298,7 @@ struct file_attr { #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) #define BLKGETDISKSEQ _IOR(0x12,128,__u64) -/* 130-136 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */ +/* 130-136 and 142 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */ /* 137-141 are used by blk-crypto ioctls (uapi/linux/blk-crypto.h) */ #define BLKTRACESETUP2 _IOWR(0x12, 142, struct blk_user_trace_setup2) -- cgit v1.2.3 From b4ce5923e780d6896d4aaf19de5a27652b8bf1ea Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Wed, 5 Nov 2025 09:03:59 +0000 Subject: bpf, x86: add new map type: instructions array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On bpf(BPF_PROG_LOAD) syscall user-supplied BPF programs are translated by the verifier into "xlated" BPF programs. During this process the original instructions offsets might be adjusted and/or individual instructions might be replaced by new sets of instructions, or deleted. Add a new BPF map type which is aimed to keep track of how, for a given program, the original instructions were relocated during the verification. Also, besides keeping track of the original -> xlated mapping, make x86 JIT to build the xlated -> jitted mapping for every instruction listed in an instruction array. This is required for every future application of instruction arrays: static keys, indirect jumps and indirect calls. A map of the BPF_MAP_TYPE_INSN_ARRAY type must be created with a u32 keys and value of size 8. The values have different semantics for userspace and for BPF space. For userspace a value consists of two u32 values – xlated and jitted offsets. For BPF side the value is a real pointer to a jitted instruction. On map creation/initialization, before loading the program, each element of the map should be initialized to point to an instruction offset within the program. Before the program load such maps should be made frozen. After the program verification xlated and jitted offsets can be read via the bpf(2) syscall. If a tracked instruction is removed by the verifier, then the xlated offset is set to (u32)-1 which is considered to be too big for a valid BPF program offset. One such a map can, obviously, be used to track one and only one BPF program. If the verification process was unsuccessful, then the same map can be re-used to verify the program with a different log level. However, if the program was loaded fine, then such a map, being frozen in any case, can't be reused by other programs even after the program release. Example. Consider the following original and xlated programs: Original prog: Xlated prog: 0: r1 = 0x0 0: r1 = 0 1: *(u32 *)(r10 - 0x4) = r1 1: *(u32 *)(r10 -4) = r1 2: r2 = r10 2: r2 = r10 3: r2 += -0x4 3: r2 += -4 4: r1 = 0x0 ll 4: r1 = map[id:88] 6: call 0x1 6: r1 += 272 7: r0 = *(u32 *)(r2 +0) 8: if r0 >= 0x1 goto pc+3 9: r0 <<= 3 10: r0 += r1 11: goto pc+1 12: r0 = 0 7: r6 = r0 13: r6 = r0 8: if r6 == 0x0 goto +0x2 14: if r6 == 0x0 goto pc+4 9: call 0x76 15: r0 = 0xffffffff8d2079c0 17: r0 = *(u64 *)(r0 +0) 10: *(u64 *)(r6 + 0x0) = r0 18: *(u64 *)(r6 +0) = r0 11: r0 = 0x0 19: r0 = 0x0 12: exit 20: exit An instruction array map, containing, e.g., instructions [0,4,7,12] will be translated by the verifier to [0,4,13,20]. A map with index 5 (the middle of 16-byte instruction) or indexes greater than 12 (outside the program boundaries) would be rejected. The functionality provided by this patch will be extended in consequent patches to implement BPF Static Keys, indirect jumps, and indirect calls. Signed-off-by: Anton Protopopov Reviewed-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251105090410.1250500-2-a.s.protopopov@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1d73f165394d..f5713f59ac10 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1026,6 +1026,7 @@ enum bpf_map_type { BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, BPF_MAP_TYPE_ARENA, + BPF_MAP_TYPE_INSN_ARRAY, __MAX_BPF_MAP_TYPE }; @@ -7649,4 +7650,24 @@ enum bpf_kfunc_flags { BPF_F_PAD_ZEROS = (1ULL << 0), }; +/* + * Values of a BPF_MAP_TYPE_INSN_ARRAY entry must be of this type. + * + * Before the map is used the orig_off field should point to an + * instruction inside the program being loaded. The other fields + * must be set to 0. + * + * After the program is loaded, the xlated_off will be adjusted + * by the verifier to point to the index of the original instruction + * in the xlated program. If the instruction is deleted, it will + * be set to (u32)-1. The jitted_off will be set to the corresponding + * offset in the jitted image of the program. + */ +struct bpf_insn_array_value { + __u32 orig_off; + __u32 xlated_off; + __u32 jitted_off; + __u32 :32; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 5f20bc206beb902e32b77216cb7935b46ca00b0a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 23 Oct 2025 12:46:14 -0700 Subject: platform/x86: ISST: isst_if.h: fix all kernel-doc warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix all kernel-doc warnings in : - don't use "[]" in the variable name in kernel-doc - add a few missing entries - change "power_domain" to "power_domain_id" in kernel-doc to match the struct member name - add a leading '@' on a few existing kernel-doc lines - use '_' instead of '-' in struct member names Examples (but not all 27 warnings): Warning: include/uapi/linux/isst_if.h:63 struct member 'cpu_map' not described in 'isst_if_cpu_maps' Warning: ../include/uapi/linux/isst_if.h:95 struct member 'req_count' not described in 'isst_if_io_regs' Warning: include/uapi/linux/isst_if.h:132 struct member 'mbox_cmd' not described in 'isst_if_mbox_cmds' Warning: ../include/uapi/linux/isst_if.h:183 struct member 'supported' not described in 'isst_core_power' Warning: ../include/uapi/linux/isst_if.h:206 struct member 'power_domain_id' not described in 'isst_clos_param' Warning: ../include/uapi/linux/isst_if.h:239 struct member 'assoc_info' not described in 'isst_if_clos_assoc_cmds' Warning: ../include/uapi/linux/isst_if.h:286 struct member 'sst_tf_support' not described in 'isst_perf_level_info' Warning: ../include/uapi/linux/isst_if.h:375 struct member 'trl_freq_mhz' not described in 'isst_perf_level_data_info' Warning: ../include/uapi/linux/isst_if.h:475 struct member 'max_buckets' not described in 'isst_turbo_freq_info' Signed-off-by: Randy Dunlap Link: https://patch.msgid.link/20251023194615.180824-1-rdunlap@infradead.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/uapi/linux/isst_if.h | 50 ++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 23 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/isst_if.h b/include/uapi/linux/isst_if.h index 8197a4800604..40aa545101a3 100644 --- a/include/uapi/linux/isst_if.h +++ b/include/uapi/linux/isst_if.h @@ -52,7 +52,7 @@ struct isst_if_cpu_map { /** * struct isst_if_cpu_maps - structure for CPU map IOCTL * @cmd_count: Number of CPU mapping command in cpu_map[] - * @cpu_map[]: Holds one or more CPU map data structure + * @cpu_map: Holds one or more CPU map data structure * * This structure used with ioctl ISST_IF_GET_PHY_ID to send * one or more CPU mapping commands. Here IOCTL return value indicates @@ -82,8 +82,8 @@ struct isst_if_io_reg { /** * struct isst_if_io_regs - structure for IO register commands - * @cmd_count: Number of io reg commands in io_reg[] - * @io_reg[]: Holds one or more io_reg command structure + * @req_count: Number of io reg commands in io_reg[] + * @io_reg: Holds one or more io_reg command structure * * This structure used with ioctl ISST_IF_IO_CMD to send * one or more read/write commands to PUNIT. Here IOCTL return value @@ -120,7 +120,7 @@ struct isst_if_mbox_cmd { /** * struct isst_if_mbox_cmds - structure for mailbox commands * @cmd_count: Number of mailbox commands in mbox_cmd[] - * @mbox_cmd[]: Holds one or more mbox commands + * @mbox_cmd: Holds one or more mbox commands * * This structure used with ioctl ISST_IF_MBOX_COMMAND to send * one or more mailbox commands to PUNIT. Here IOCTL return value @@ -152,7 +152,7 @@ struct isst_if_msr_cmd { /** * struct isst_if_msr_cmds - structure for msr commands * @cmd_count: Number of mailbox commands in msr_cmd[] - * @msr_cmd[]: Holds one or more msr commands + * @msr_cmd: Holds one or more msr commands * * This structure used with ioctl ISST_IF_MSR_COMMAND to send * one or more MSR commands. IOCTL return value indicates number of @@ -167,8 +167,9 @@ struct isst_if_msr_cmds { * struct isst_core_power - Structure to get/set core_power feature * @get_set: 0: Get, 1: Set * @socket_id: Socket/package id - * @power_domain: Power Domain id + * @power_domain_id: Power Domain id * @enable: Feature enable status + * @supported: Power domain supports SST_CP interface * @priority_type: Priority type for the feature (ordered/proportional) * * Structure to get/set core_power feature state using IOCTL @@ -187,11 +188,11 @@ struct isst_core_power { * struct isst_clos_param - Structure to get/set clos praram * @get_set: 0: Get, 1: Set * @socket_id: Socket/package id - * @power_domain: Power Domain id - * clos: Clos ID for the parameters - * min_freq_mhz: Minimum frequency in MHz - * max_freq_mhz: Maximum frequency in MHz - * prop_prio: Proportional priority from 0-15 + * @power_domain_id: Power Domain id + * @clos: Clos ID for the parameters + * @min_freq_mhz: Minimum frequency in MHz + * @max_freq_mhz: Maximum frequency in MHz + * @prop_prio: Proportional priority from 0-15 * * Structure to get/set per clos property using IOCTL * ISST_IF_CLOS_PARAM. @@ -209,7 +210,7 @@ struct isst_clos_param { /** * struct isst_if_clos_assoc - Structure to assign clos to a CPU * @socket_id: Socket/package id - * @power_domain: Power Domain id + * @power_domain_id: Power Domain id * @logical_cpu: CPU number * @clos: Clos ID to assign to the logical CPU * @@ -228,6 +229,7 @@ struct isst_if_clos_assoc { * @get_set: Request is for get or set * @punit_cpu_map: Set to 1 if the CPU number is punit numbering not * Linux CPU number + * @assoc_info: CLOS data for this CPU * * Structure used to get/set associate CPUs to clos using IOCTL * ISST_IF_CLOS_ASSOC. @@ -257,7 +259,7 @@ struct isst_tpmi_instance_count { /** * struct isst_perf_level_info - Structure to get information on SST-PP levels * @socket_id: Socket/package id - * @power_domain: Power Domain id + * @power_domain_id: Power Domain id * @logical_cpu: CPU number * @clos: Clos ID to assign to the logical CPU * @max_level: Maximum performance level supported by the platform @@ -267,8 +269,8 @@ struct isst_tpmi_instance_count { * @feature_state: SST-BF and SST-TF (enabled/disabled) status at current level * @locked: SST-PP performance level change is locked/unlocked * @enabled: SST-PP feature is enabled or not - * @sst-tf_support: SST-TF support status at this level - * @sst-bf_support: SST-BF support status at this level + * @sst_tf_support: SST-TF support status at this level + * @sst_bf_support: SST-BF support status at this level * * Structure to get SST-PP details using IOCTL ISST_IF_PERF_LEVELS. */ @@ -289,7 +291,7 @@ struct isst_perf_level_info { /** * struct isst_perf_level_control - Structure to set SST-PP level * @socket_id: Socket/package id - * @power_domain: Power Domain id + * @power_domain_id: Power Domain id * @level: level to set * * Structure used change SST-PP level using IOCTL ISST_IF_PERF_SET_LEVEL. @@ -303,7 +305,7 @@ struct isst_perf_level_control { /** * struct isst_perf_feature_control - Structure to activate SST-BF/SST-TF * @socket_id: Socket/package id - * @power_domain: Power Domain id + * @power_domain_id: Power Domain id * @feature: bit 0 = SST-BF state, bit 1 = SST-TF state * * Structure used to enable SST-BF/SST-TF using IOCTL ISST_IF_PERF_SET_FEATURE. @@ -320,7 +322,7 @@ struct isst_perf_feature_control { /** * struct isst_perf_level_data_info - Structure to get SST-PP level details * @socket_id: Socket/package id - * @power_domain: Power Domain id + * @power_domain_id: Power Domain id * @level: SST-PP level for which caller wants to get information * @tdp_ratio: TDP Ratio * @base_freq_mhz: Base frequency in MHz @@ -341,8 +343,8 @@ struct isst_perf_feature_control { * @pm_fabric_freq_mhz: Fabric (Uncore) minimum frequency * @max_buckets: Maximum trl buckets * @max_trl_levels: Maximum trl levels - * @bucket_core_counts[TRL_MAX_BUCKETS]: Number of cores per bucket - * @trl_freq_mhz[TRL_MAX_LEVELS][TRL_MAX_BUCKETS]: maximum frequency + * @bucket_core_counts: Number of cores per bucket + * @trl_freq_mhz: maximum frequency * for a bucket and trl level * * Structure used to get information on frequencies and TDP for a SST-PP @@ -402,7 +404,7 @@ struct isst_perf_level_fabric_info { /** * struct isst_perf_level_cpu_mask - Structure to get SST-PP level CPU mask * @socket_id: Socket/package id - * @power_domain: Power Domain id + * @power_domain_id: Power Domain id * @level: SST-PP level for which caller wants to get information * @punit_cpu_map: Set to 1 if the CPU number is punit numbering not * Linux CPU number. If 0 CPU buffer is copied to user space @@ -430,7 +432,7 @@ struct isst_perf_level_cpu_mask { /** * struct isst_base_freq_info - Structure to get SST-BF frequencies * @socket_id: Socket/package id - * @power_domain: Power Domain id + * @power_domain_id: Power Domain id * @level: SST-PP level for which caller wants to get information * @high_base_freq_mhz: High priority CPU base frequency * @low_base_freq_mhz: Low priority CPU base frequency @@ -453,9 +455,11 @@ struct isst_base_freq_info { /** * struct isst_turbo_freq_info - Structure to get SST-TF frequencies * @socket_id: Socket/package id - * @power_domain: Power Domain id + * @power_domain_id: Power Domain id * @level: SST-PP level for which caller wants to get information * @max_clip_freqs: Maximum number of low priority core clipping frequencies + * @max_buckets: Maximum trl buckets + * @max_trl_levels: Maximum trl levels * @lp_clip_freq_mhz: Clip frequencies per trl level * @bucket_core_counts: Maximum number of cores for a bucket * @trl_freq_mhz: Frequencies per trl level for each bucket -- cgit v1.2.3 From c6230446b1a6f3c91effafd99f604de455da52e5 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 3 Nov 2025 12:20:20 +0000 Subject: net: dsa: add tagging driver for MaxLinear GSW1xx switch family Add support for a new DSA tagging protocol driver for the MaxLinear GSW1xx switch family. The GSW1xx switches use a proprietary 8-byte special tag inserted between the source MAC address and the EtherType field to indicate the source and destination ports for frames traversing the CPU port. Implement the tag handling logic to insert the special tag on transmit and parse it on receive. Signed-off-by: Daniel Golle Reviewed-by: Alexander Sverdlin Tested-by: Alexander Sverdlin Link: https://patch.msgid.link/0e973ebfd9433c30c96f50670da9e9449a0d98f2.1762170107.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index cfd200c87e5e..2c93b7b731c8 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -92,6 +92,7 @@ #define ETH_P_ETHERCAT 0x88A4 /* EtherCAT */ #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ +#define ETH_P_MXLGSW 0x88C3 /* MaxLinear GSW DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_PREAUTH 0x88C7 /* 802.11 Preauthentication */ #define ETH_P_TIPC 0x88CA /* TIPC */ #define ETH_P_LLDP 0x88CC /* Link Layer Discovery Protocol */ -- cgit v1.2.3 From dae4a92399fa8d68aa917db6bb3245f83021e762 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 5 Nov 2025 16:26:02 -0800 Subject: psp: report basic stats from the core Track and report stats common to all psp devices from the core. A 'stale-event' is when the core marks the rx state of an active psp_assoc as incapable of authenticating psp encapsulated data. Signed-off-by: Daniel Zahka Link: https://patch.msgid.link/20251106002608.1578518-2-daniel.zahka@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/psp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/psp.h b/include/uapi/linux/psp.h index 607c42c39ba5..31592760ad79 100644 --- a/include/uapi/linux/psp.h +++ b/include/uapi/linux/psp.h @@ -45,6 +45,15 @@ enum { PSP_A_KEYS_MAX = (__PSP_A_KEYS_MAX - 1) }; +enum { + PSP_A_STATS_DEV_ID = 1, + PSP_A_STATS_KEY_ROTATIONS, + PSP_A_STATS_STALE_EVENTS, + + __PSP_A_STATS_MAX, + PSP_A_STATS_MAX = (__PSP_A_STATS_MAX - 1) +}; + enum { PSP_CMD_DEV_GET = 1, PSP_CMD_DEV_ADD_NTF, @@ -55,6 +64,7 @@ enum { PSP_CMD_KEY_ROTATE_NTF, PSP_CMD_RX_ASSOC, PSP_CMD_TX_ASSOC, + PSP_CMD_GET_STATS, __PSP_CMD_MAX, PSP_CMD_MAX = (__PSP_CMD_MAX - 1) -- cgit v1.2.3 From f05d26198cf2c71f25f6bbe62ca4481c15543922 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 5 Nov 2025 16:26:04 -0800 Subject: psp: add stats from psp spec to driver facing api Provide a driver api for reporting device statistics required by the "Implementation Requirements" section of the PSP Architecture Specification. Use a warning to ensure drivers report stats required by the spec. Signed-off-by: Daniel Zahka Link: https://patch.msgid.link/20251106002608.1578518-4-daniel.zahka@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/psp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/psp.h b/include/uapi/linux/psp.h index 31592760ad79..d8449c043ba1 100644 --- a/include/uapi/linux/psp.h +++ b/include/uapi/linux/psp.h @@ -49,6 +49,14 @@ enum { PSP_A_STATS_DEV_ID = 1, PSP_A_STATS_KEY_ROTATIONS, PSP_A_STATS_STALE_EVENTS, + PSP_A_STATS_RX_PACKETS, + PSP_A_STATS_RX_BYTES, + PSP_A_STATS_RX_AUTH_FAIL, + PSP_A_STATS_RX_ERROR, + PSP_A_STATS_RX_BAD, + PSP_A_STATS_TX_PACKETS, + PSP_A_STATS_TX_BYTES, + PSP_A_STATS_TX_ERROR, __PSP_A_STATS_MAX, PSP_A_STATS_MAX = (__PSP_A_STATS_MAX - 1) -- cgit v1.2.3 From aaf46c6a6df6052881c2e75cba65aeb6f1cfa88a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 23 Oct 2025 22:21:02 -0700 Subject: tee: : - add ending ':' to some struct members as needed for kernel-doc - change struct name in kernel-doc to match the actual struct name (2x) - add a @params: kernel-doc entry multiple times Warning: tee.h:265 struct member 'ret_origin' not described in 'tee_ioctl_open_session_arg' Warning: tee.h:265 struct member 'num_params' not described in 'tee_ioctl_open_session_arg' Warning: tee.h:265 struct member 'params' not described in 'tee_ioctl_open_session_arg' Warning: tee.h:351 struct member 'num_params' not described in 'tee_iocl_supp_recv_arg' Warning: tee.h:351 struct member 'params' not described in 'tee_iocl_supp_recv_arg' Warning: tee.h:372 struct member 'num_params' not described in 'tee_iocl_supp_send_arg' Warning: tee.h:372 struct member 'params' not described in 'tee_iocl_supp_send_arg' Warning: tee.h:298: expecting prototype for struct tee_ioctl_invoke_func_arg. Prototype was for struct tee_ioctl_invoke_arg instead Warning: tee.h:473: expecting prototype for struct tee_ioctl_invoke_func_arg. Prototype was for struct tee_ioctl_object_invoke_arg instead Signed-off-by: Randy Dunlap Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/uapi/linux/tee.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h index 386ad36f1a0a..cab5cadca8ef 100644 --- a/include/uapi/linux/tee.h +++ b/include/uapi/linux/tee.h @@ -249,8 +249,9 @@ struct tee_ioctl_param { * @cancel_id: [in] Cancellation id, a unique value to identify this request * @session: [out] Session id * @ret: [out] return value - * @ret_origin [out] origin of the return value - * @num_params [in] number of parameters following this struct + * @ret_origin: [out] origin of the return value + * @num_params: [in] number of &struct tee_ioctl_param entries in @params + * @params: array of ioctl parameters */ struct tee_ioctl_open_session_arg { __u8 uuid[TEE_IOCTL_UUID_LEN]; @@ -276,14 +277,14 @@ struct tee_ioctl_open_session_arg { struct tee_ioctl_buf_data) /** - * struct tee_ioctl_invoke_func_arg - Invokes a function in a Trusted - * Application + * struct tee_ioctl_invoke_arg - Invokes a function in a Trusted Application * @func: [in] Trusted Application function, specific to the TA * @session: [in] Session id * @cancel_id: [in] Cancellation id, a unique value to identify this request * @ret: [out] return value - * @ret_origin [out] origin of the return value - * @num_params [in] number of parameters following this struct + * @ret_origin: [out] origin of the return value + * @num_params: [in] number of parameters following this struct + * @params: array of ioctl parameters */ struct tee_ioctl_invoke_arg { __u32 func; @@ -338,7 +339,8 @@ struct tee_ioctl_close_session_arg { /** * struct tee_iocl_supp_recv_arg - Receive a request for a supplicant function * @func: [in] supplicant function - * @num_params [in/out] number of parameters following this struct + * @num_params: [in/out] number of &struct tee_ioctl_param entries in @params + * @params: array of ioctl parameters * * @num_params is the number of params that tee-supplicant has room to * receive when input, @num_params is the number of actual params @@ -363,7 +365,8 @@ struct tee_iocl_supp_recv_arg { /** * struct tee_iocl_supp_send_arg - Send a response to a received request * @ret: [out] return value - * @num_params [in] number of parameters following this struct + * @num_params: [in] number of &struct tee_ioctl_param entries in @params + * @params: array of ioctl parameters */ struct tee_iocl_supp_send_arg { __u32 ret; @@ -454,11 +457,13 @@ struct tee_ioctl_shm_register_fd_data { */ /** - * struct tee_ioctl_invoke_func_arg - Invokes an object in a Trusted Application + * struct tee_ioctl_object_invoke_arg - Invokes an object in a + * Trusted Application * @id: [in] Object id * @op: [in] Object operation, specific to the object * @ret: [out] return value * @num_params: [in] number of parameters following this struct + * @params: array of ioctl parameters */ struct tee_ioctl_object_invoke_arg { __u64 id; -- cgit v1.2.3 From 62ed1b58224636185fa689db81224b8c8af46473 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 3 Nov 2025 20:57:57 +0800 Subject: md: allow configuring logical block size Previously, raid array used the maximum logical block size (LBS) of all member disks. Adding a larger LBS disk at runtime could unexpectedly increase RAID's LBS, risking corruption of existing partitions. This can be reproduced by: ``` # LBS of sd[de] is 512 bytes, sdf is 4096 bytes. mdadm -CRq /dev/md0 -l1 -n3 /dev/sd[de] missing --assume-clean # LBS is 512 cat /sys/block/md0/queue/logical_block_size # create partition md0p1 parted -s /dev/md0 mklabel gpt mkpart primary 1MiB 100% lsblk | grep md0p1 # LBS becomes 4096 after adding sdf mdadm --add -q /dev/md0 /dev/sdf cat /sys/block/md0/queue/logical_block_size # partition lost partprobe /dev/md0 lsblk | grep md0p1 ``` Simply restricting larger-LBS disks is inflexible. In some scenarios, only disks with 512 bytes LBS are available currently, but later, disks with 4KB LBS may be added to the array. Making LBS configurable is the best way to solve this scenario. After this patch, the raid will: - store LBS in disk metadata - add a read-write sysfs 'mdX/logical_block_size' Future mdadm should support setting LBS via metadata field during RAID creation and the new sysfs. Though the kernel allows runtime LBS changes, users should avoid modifying it after creating partitions or filesystems to prevent compatibility issues. Only 1.x metadata supports configurable LBS. 0.90 metadata inits all fields to default values at auto-detect. Supporting 0.90 would require more extensive changes and no such use case has been observed. Note that many RAID paths rely on PAGE_SIZE alignment, including for metadata I/O. A larger LBS than PAGE_SIZE will result in metadata read/write failures. So this config should be prevented. Link: https://lore.kernel.org/linux-raid/20251103125757.1405796-6-linan666@huaweicloud.com Signed-off-by: Li Nan Reviewed-by: Xiao Ni Signed-off-by: Yu Kuai --- include/uapi/linux/raid/md_p.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index ac74133a4768..310068bb2a1d 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -291,7 +291,8 @@ struct mdp_superblock_1 { __le64 resync_offset; /* data before this offset (from data_offset) known to be in sync */ __le32 sb_csum; /* checksum up to devs[max_dev] */ __le32 max_dev; /* size of devs[] array to consider */ - __u8 pad3[64-32]; /* set to 0 when writing */ + __le32 logical_block_size; /* same as q->limits->logical_block_size */ + __u8 pad3[64-36]; /* set to 0 when writing */ /* device state information. Indexed by dev_number. * 2 bytes per device -- cgit v1.2.3 From 0e535824d0bcf7c9bb0532d902283c31c78cd6f3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 7 Nov 2025 23:04:02 -0800 Subject: devlink: Introduce switchdev_inactive eswitch mode Adds DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE attribute to UAPI and documentation. Before having traffic flow through an eswitch, a user may want to have the ability to block traffic towards the FDB until FDB is fully programmed and the user is ready to send traffic to it. For example: when two eswitches are present for vports in a multi-PF setup, one eswitch may take over the traffic from the other when the user chooses. Before this take over, a user may want to first program the inactive eswitch and then once ready redirect traffic to this new eswitch. switchdev modes transition semantics: legacy->switchdev_inactive: Create switchdev mode normally, traffic not allowed to flow yet. switchdev_inactive->switchdev: Enable traffic to flow. switchdev->switchdev_inactive: Block traffic on the FDB, FDB and representros state and content is preserved. When eswitch is configured to this mode, traffic is ignored/dropped on this eswitch FDB, while current configuration is kept, e.g FDB rules and netdev representros are kept available, FDB programming is allowed. Example: # start inactive switchdev devlink dev eswitch set pci/0000:08:00.1 mode switchdev_inactive # setup TC rules, representors etc .. # activate devlink dev eswitch set pci/0000:08:00.1 mode switchdev Signed-off-by: Saeed Mahameed Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20251108070404.1551708-2-saeed@kernel.org Signed-off-by: Paolo Abeni --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index bcad11a787a5..157f11d3fb72 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -181,6 +181,7 @@ enum devlink_sb_threshold_type { enum devlink_eswitch_mode { DEVLINK_ESWITCH_MODE_LEGACY, DEVLINK_ESWITCH_MODE_SWITCHDEV, + DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE, }; enum devlink_eswitch_inline_mode { -- cgit v1.2.3 From 1602bad16d7df82faca6d7c70821117684a66f49 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Nov 2025 09:12:58 -0500 Subject: vfs: expose delegation support to userland Now that support for recallable directory delegations is available, expose this functionality to userland with new F_SETDELEG and F_GETDELEG commands for fcntl(). Note that this also allows userland to request a FL_DELEG type lease on files too. Userland applications that do will get signalled when there are metadata changes in addition to just data changes (which is a limitation of FL_LEASE leases). These commands accept a new "struct delegation" argument that contains a flags field for future expansion. Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20251111-dir-deleg-ro-v6-17-52f3feebb2f2@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 3741ea1b73d8..008fac15e573 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -79,6 +79,17 @@ */ #define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET +/* Set/Get delegations */ +#define F_GETDELEG (F_LINUX_SPECIFIC_BASE + 15) +#define F_SETDELEG (F_LINUX_SPECIFIC_BASE + 16) + +/* Argument structure for F_GETDELEG and F_SETDELEG */ +struct delegation { + uint32_t d_flags; /* Must be 0 */ + uint16_t d_type; /* F_RDLCK, F_WRLCK, F_UNLCK */ + uint16_t __pad; /* Must be 0 */ +}; + /* * Types of directory notifications that may be requested. */ -- cgit v1.2.3 From ad9c62bd8946621ed02ac94131a921222508a8bc Mon Sep 17 00:00:00 2001 From: Jiaqi Yan Date: Mon, 13 Oct 2025 18:59:01 +0000 Subject: KVM: arm64: VM exit to userspace to handle SEA When APEI fails to handle a stage-2 synchronous external abort (SEA), today KVM injects an asynchronous SError to the VCPU then resumes it, which usually results in unpleasant guest kernel panic. One major situation of guest SEA is when vCPU consumes recoverable uncorrected memory error (UER). Although SError and guest kernel panic effectively stops the propagation of corrupted memory, guest may re-use the corrupted memory if auto-rebooted; in worse case, guest boot may run into poisoned memory. So there is room to recover from an UER in a more graceful manner. Alternatively KVM can redirect the synchronous SEA event to VMM to - Reduce blast radius if possible. VMM can inject a SEA to VCPU via KVM's existing KVM_SET_VCPU_EVENTS API. If the memory poison consumption or fault is not from guest kernel, blast radius can be limited to the triggering thread in guest userspace, so VM can keep running. - Allow VMM to protect from future memory poison consumption by unmapping the page from stage-2, or to interrupt guest of the poisoned page so guest kernel can unmap it from stage-1 page table. - Allow VMM to track SEA events that VM customers care about, to restart VM when certain number of distinct poison events have happened, to provide observability to customers in log management UI. Introduce an userspace-visible feature to enable VMM handle SEA: - KVM_CAP_ARM_SEA_TO_USER. As the alternative fallback behavior when host APEI fails to claim a SEA, userspace can opt in this new capability to let KVM exit to userspace during SEA if it is not owned by host. - KVM_EXIT_ARM_SEA. A new exit reason is introduced for this. KVM fills kvm_run.arm_sea with as much as possible information about the SEA, enabling VMM to emulate SEA to guest by itself. - Sanitized ESR_EL2. The general rule is to keep only the bits useful for userspace and relevant to guest memory. - Flags indicating if faulting guest physical address is valid. - Faulting guest physical and virtual addresses if valid. Signed-off-by: Jiaqi Yan Co-developed-by: Oliver Upton Signed-off-by: Oliver Upton Link: https://msgid.link/20251013185903.1372553-2-jiaqiyan@google.com Signed-off-by: Oliver Upton --- include/uapi/linux/kvm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 52f6000ab020..1e541193e98d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -179,6 +179,7 @@ struct kvm_xen_exit { #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 #define KVM_EXIT_TDX 40 +#define KVM_EXIT_ARM_SEA 41 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -473,6 +474,14 @@ struct kvm_run { } setup_event_notify; }; } tdx; + /* KVM_EXIT_ARM_SEA */ + struct { +#define KVM_EXIT_ARM_SEA_FLAG_GPA_VALID (1ULL << 0) + __u64 flags; + __u64 esr; + __u64 gva; + __u64 gpa; + } arm_sea; /* Fix the size of the union. */ char padding[256]; }; @@ -963,6 +972,7 @@ struct kvm_enable_cap { #define KVM_CAP_RISCV_MP_STATE_RESET 242 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 #define KVM_CAP_GUEST_MEMFD_FLAGS 244 +#define KVM_CAP_ARM_SEA_TO_USER 245 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3 From 78f0e33cd6c939a555aa80dbed2fec6b333a7660 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 11 Nov 2025 06:28:15 +0000 Subject: fs/namespace: correctly handle errors returned by grab_requested_mnt_ns grab_requested_mnt_ns was changed to return error codes on failure, but its callers were not updated to check for error pointers, still checking only for a NULL return value. This commit updates the callers to use IS_ERR() or IS_ERR_OR_NULL() and PTR_ERR() to correctly check for and propagate errors. This also makes sure that the logic actually works and mount namespace file descriptors can be used to refere to mounts. Christian Brauner says: Rework the patch to be more ergonomic and in line with our overall error handling patterns. Fixes: 7b9d14af8777 ("fs: allow mount namespace fd") Cc: Christian Brauner Signed-off-by: Andrei Vagin Link: https://patch.msgid.link/20251111062815.2546189-1-avagin@google.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 7fa67c2031a5..5d3f8c9e3a62 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -197,7 +197,7 @@ struct statmount { */ struct mnt_id_req { __u32 size; - __u32 spare; + __u32 mnt_ns_fd; __u64 mnt_id; __u64 param; __u64 mnt_ns_id; -- cgit v1.2.3 From 2647e2ecc096d2330d6b6a34a3a1f0a99828c14c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 13 Nov 2025 10:48:57 +0000 Subject: io_uring/query: introduce zcrx query Add a new query type IO_URING_QUERY_ZCRX returning the user some basic information about the interface, which includes allowed flags for areas and registration and supported IORING_REGISTER_ZCRX_CTRL subcodes. There is also a chicken-egg problem with user provided refill queue memory, where offsets and size information is returned after registration, but to properly allocate memory you need to know it beforehand, which is why the userspace currently has to guess the RQ headers size and severely overestimates it. Return the size information. It's split into "size" and "alignment" fields because for default placement modes the user is interested in the aligned size, however if it gets support for more flexible placement, it'll need to only know the actual header size. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/query.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h index 3539ccbfd064..fc0cb1580e47 100644 --- a/include/uapi/linux/io_uring/query.h +++ b/include/uapi/linux/io_uring/query.h @@ -18,6 +18,7 @@ struct io_uring_query_hdr { enum { IO_URING_QUERY_OPCODES = 0, + IO_URING_QUERY_ZCRX = 1, __IO_URING_QUERY_MAX, }; @@ -41,4 +42,19 @@ struct io_uring_query_opcode { __u32 __pad; }; +struct io_uring_query_zcrx { + /* Bitmask of supported ZCRX_REG_* flags, */ + __u64 register_flags; + /* Bitmask of all supported IORING_ZCRX_AREA_* flags */ + __u64 area_flags; + /* The number of supported ZCRX_CTRL_* opcodes */ + __u32 nr_ctrl_opcodes; + __u32 __resv1; + /* The refill ring header size */ + __u32 rq_hdr_size; + /* The alignment for the header */ + __u32 rq_hdr_alignment; + __u64 __resv2; +}; + #endif -- cgit v1.2.3 From 4aaa9bc4d5921363490d95fe66c4db086a915799 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 13 Nov 2025 10:48:58 +0000 Subject: io_uring/query: introduce rings info query Same problem as with zcrx in the previous patch, the user needs to know SQ/CQ header sizes to allocated memory before setup to use it for user provided rings, i.e. IORING_SETUP_NO_MMAP, however that information is only returned after registration, hence the user is guessing kernel implementation details. Return the header size and alignment, which is split with the same motivation, to allow the user to know the real structure size without alignment in case there will be more flexible placement schemes in the future. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/query.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h index fc0cb1580e47..2456e6c5ebb5 100644 --- a/include/uapi/linux/io_uring/query.h +++ b/include/uapi/linux/io_uring/query.h @@ -19,6 +19,7 @@ struct io_uring_query_hdr { enum { IO_URING_QUERY_OPCODES = 0, IO_URING_QUERY_ZCRX = 1, + IO_URING_QUERY_SCQ = 2, __IO_URING_QUERY_MAX, }; @@ -57,4 +58,11 @@ struct io_uring_query_zcrx { __u64 __resv2; }; +struct io_uring_query_scq { + /* The SQ/CQ rings header size */ + __u64 hdr_size; + /* The alignment for the header */ + __u64 hdr_alignment; +}; + #endif -- cgit v1.2.3 From d663976dad68de9b2e3df59cc31f0a24ee4c4511 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 13 Nov 2025 10:46:12 +0000 Subject: io_uring/zcrx: introduce IORING_REGISTER_ZCRX_CTRL It'll be annoying and take enough of boilerplate code to implement new zcrx features as separate io_uring register opcode. Introduce IORING_REGISTER_ZCRX_CTRL that will multiplex such calls to zcrx. Note, there are no real users of the opcode in this patch. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index e96080db3e4d..0e1d353fab1d 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -697,6 +697,9 @@ enum io_uring_register_op { /* query various aspects of io_uring, see linux/io_uring/query.h */ IORING_REGISTER_QUERY = 35, + /* auxiliary zcrx configuration, see enum zcrx_ctrl_op */ + IORING_REGISTER_ZCRX_CTRL = 36, + /* this goes last */ IORING_REGISTER_LAST, @@ -1078,6 +1081,16 @@ struct io_uring_zcrx_ifq_reg { __u64 __resv[3]; }; +enum zcrx_ctrl_op { + __ZCRX_CTRL_LAST, +}; + +struct zcrx_ctrl { + __u32 zcrx_id; + __u32 op; /* see enum zcrx_ctrl_op */ + __u64 __resv[8]; +}; + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 475eb39b00478b1898bc9080344dcd8e86c53c7a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 13 Nov 2025 10:46:13 +0000 Subject: io_uring/zcrx: add sync refill queue flushing Add an zcrx interface via IORING_REGISTER_ZCRX_CTRL that forces the kernel to flush / consume entries from the refill queue. Just as with the IORING_REGISTER_ZCRX_REFILL attempt, the motivation is to address cases where the refill queue becomes full, and the user can't return buffers and needs to stash them. It's still a slow path, and the user should size refill queue appropriately, but it should be helpful for handling temporary traffic spikes and other unpredictable conditions. The interface is simpler comparing to ZCRX_REFILL as it doesn't need temporary refill entry arrays and gives natural batching, whereas ZCRX_REFILL requires even more user logic to be somewhat efficient. Also, add a structure for the operation. It's not currently used but can serve for future improvements like limiting the number of buffers to process, etc. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 0e1d353fab1d..db47fced2cc6 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -1082,13 +1082,21 @@ struct io_uring_zcrx_ifq_reg { }; enum zcrx_ctrl_op { + ZCRX_CTRL_FLUSH_RQ, + __ZCRX_CTRL_LAST, }; +struct zcrx_ctrl_flush_rq { + __u64 __resv[6]; +}; + struct zcrx_ctrl { __u32 zcrx_id; __u32 op; /* see enum zcrx_ctrl_op */ - __u64 __resv[8]; + __u64 __resv[2]; + + struct zcrx_ctrl_flush_rq zc_flush; }; #ifdef __cplusplus -- cgit v1.2.3 From d7af80b213e5675664b14f12240cb282e81773d5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 13 Nov 2025 10:46:16 +0000 Subject: io_uring/zcrx: export zcrx via a file Add an option to wrap a zcrx instance into a file and expose it to the user space. Currently, users can't do anything meaningful with the file, but it'll be used in a next patch to import it into another io_uring instance. It's implemented as a new op called ZCRX_CTRL_EXPORT for the IORING_REGISTER_ZCRX_CTRL registration opcode. Signed-off-by: David Wei Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index db47fced2cc6..4bedc0310a55 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -1083,6 +1083,7 @@ struct io_uring_zcrx_ifq_reg { enum zcrx_ctrl_op { ZCRX_CTRL_FLUSH_RQ, + ZCRX_CTRL_EXPORT, __ZCRX_CTRL_LAST, }; @@ -1091,12 +1092,20 @@ struct zcrx_ctrl_flush_rq { __u64 __resv[6]; }; +struct zcrx_ctrl_export { + __u32 zcrx_fd; + __u32 __resv1[11]; +}; + struct zcrx_ctrl { __u32 zcrx_id; __u32 op; /* see enum zcrx_ctrl_op */ __u64 __resv[2]; - struct zcrx_ctrl_flush_rq zc_flush; + union { + struct zcrx_ctrl_export zc_export; + struct zcrx_ctrl_flush_rq zc_flush; + }; }; #ifdef __cplusplus -- cgit v1.2.3 From 00d91481279fb2df8c46d19090578afd523ca630 Mon Sep 17 00:00:00 2001 From: David Wei Date: Thu, 13 Nov 2025 10:46:18 +0000 Subject: io_uring/zcrx: share an ifq between rings Add a way to share an ifq from a src ring that is real (i.e. bound to a HW RX queue) with other rings. This is done by passing a new flag IORING_ZCRX_IFQ_REG_IMPORT in the registration struct io_uring_zcrx_ifq_reg, alongside the fd of an exported zcrx ifq. Signed-off-by: David Wei Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 4bedc0310a55..deb772222b6d 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -1063,6 +1063,10 @@ struct io_uring_zcrx_area_reg { __u64 __resv2[2]; }; +enum zcrx_reg_flags { + ZCRX_REG_IMPORT = 1, +}; + /* * Argument for IORING_REGISTER_ZCRX_IFQ */ -- cgit v1.2.3 From e36dbd1cf3dfc4ce18e9f7a80183b53cae257e30 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Fri, 4 Jul 2025 17:43:19 +0200 Subject: media: uapi: Introduce V4L2 generic ISP types Introduce v4l2-isp.h in the Linux kernel uAPI. The header includes types for generic ISP configuration parameters and will be extended in the future with support for generic ISP statistics formats. Generic ISP parameters support is provided by introducing two new types that represent an extensible and versioned buffer of ISP configuration parameters. The v4l2_params_buffer represents the container for the ISP configuration data block. The generic type is defined with a 0-sized data member that the ISP driver implementations shall properly size according to their capabilities. The v4l2_params_block_header structure represents the header to be prepend to each ISP configuration block. Signed-off-by: Daniel Scally Reviewed-by: Daniel Scally Reviewed-by: Laurent Pinchart Reviewed-by: Michael Riesch Acked-by: Sakari Ailus Tested-by: Lad Prabhakar Signed-off-by: Jacopo Mondi Signed-off-by: Hans Verkuil --- include/uapi/linux/media/v4l2-isp.h | 102 ++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 include/uapi/linux/media/v4l2-isp.h (limited to 'include/uapi') diff --git a/include/uapi/linux/media/v4l2-isp.h b/include/uapi/linux/media/v4l2-isp.h new file mode 100644 index 000000000000..779168f9058e --- /dev/null +++ b/include/uapi/linux/media/v4l2-isp.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Video4Linux2 generic ISP parameters and statistics support + * + * Copyright (C) 2025 Ideas On Board Oy + * Author: Jacopo Mondi + */ + +#ifndef _UAPI_V4L2_ISP_H_ +#define _UAPI_V4L2_ISP_H_ + +#include +#include + +/** + * enum v4l2_isp_params_version - V4L2 ISP parameters versioning + * + * @V4L2_ISP_PARAMS_VERSION_V0: First version of the V4L2 ISP parameters format + * (for compatibility) + * @V4L2_ISP_PARAMS_VERSION_V1: First version of the V4L2 ISP parameters format + * + * V0 and V1 are identical in order to support drivers compatible with the V4L2 + * ISP parameters format already upstreamed which use either 0 or 1 as their + * versioning identifier. Both V0 and V1 refers to the first version of the + * V4L2 ISP parameters format. + * + * Future revisions of the V4L2 ISP parameters format should start from the + * value of 2. + */ +enum v4l2_isp_params_version { + V4L2_ISP_PARAMS_VERSION_V0 = 0, + V4L2_ISP_PARAMS_VERSION_V1 +}; + +#define V4L2_ISP_PARAMS_FL_BLOCK_DISABLE (1U << 0) +#define V4L2_ISP_PARAMS_FL_BLOCK_ENABLE (1U << 1) + +/* + * Reserve the first 8 bits for V4L2_ISP_PARAMS_FL_* flag. + * + * Driver-specific flags should be defined as: + * #define DRIVER_SPECIFIC_FLAG0 ((1U << V4L2_ISP_PARAMS_FL_DRIVER_FLAGS(0)) + * #define DRIVER_SPECIFIC_FLAG1 ((1U << V4L2_ISP_PARAMS_FL_DRIVER_FLAGS(1)) + */ +#define V4L2_ISP_PARAMS_FL_DRIVER_FLAGS(n) ((n) + 8) + +/** + * struct v4l2_isp_params_block_header - V4L2 extensible parameters block header + * @type: The parameters block type (driver-specific) + * @flags: A bitmask of block flags (driver-specific) + * @size: Size (in bytes) of the parameters block, including this header + * + * This structure represents the common part of all the ISP configuration + * blocks. Each parameters block shall embed an instance of this structure type + * as its first member, followed by the block-specific configuration data. + * + * The @type field is an ISP driver-specific value that identifies the block + * type. The @size field specifies the size of the parameters block. + * + * The @flags field is a bitmask of per-block flags V4L2_PARAMS_ISP_FL_* and + * driver-specific flags specified by the driver header. + */ +struct v4l2_isp_params_block_header { + __u16 type; + __u16 flags; + __u32 size; +} __attribute__((aligned(8))); + +/** + * struct v4l2_isp_params_buffer - V4L2 extensible parameters configuration + * @version: The parameters buffer version (driver-specific) + * @data_size: The configuration data effective size, excluding this header + * @data: The configuration data + * + * This structure contains the configuration parameters of the ISP algorithms, + * serialized by userspace into a data buffer. Each configuration parameter + * block is represented by a block-specific structure which contains a + * :c:type:`v4l2_isp_params_block_header` entry as first member. Userspace + * populates the @data buffer with configuration parameters for the blocks that + * it intends to configure. As a consequence, the data buffer effective size + * changes according to the number of ISP blocks that userspace intends to + * configure and is set by userspace in the @data_size field. + * + * The parameters buffer is versioned by the @version field to allow modifying + * and extending its definition. Userspace shall populate the @version field to + * inform the driver about the version it intends to use. The driver will parse + * and handle the @data buffer according to the data layout specific to the + * indicated version and return an error if the desired version is not + * supported. + * + * For each ISP block that userspace wants to configure, a block-specific + * structure is appended to the @data buffer, one after the other without gaps + * in between. Userspace shall populate the @data_size field with the effective + * size, in bytes, of the @data buffer. + */ +struct v4l2_isp_params_buffer { + __u32 version; + __u32 data_size; + __u8 data[] __counted_by(data_size); +}; + +#endif /* _UAPI_V4L2_ISP_H_ */ -- cgit v1.2.3 From 1e8152db64bdee9f13e84e516c2b8a9bb10f025e Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Mon, 7 Jul 2025 14:13:53 +0200 Subject: media: uapi: Convert RkISP1 to V4L2 extensible params With the introduction of common types for extensible parameters format, convert the rkisp1-config.h header to use the new types. Factor out the documentation that is now part of the common header and only keep the driver-specific on in place. The conversion to use common types doesn't impact userspace as the new types are either identical to the ones already existing in the RkISP1 uAPI or are 1-to-1 type convertible. Reviewed-by: Daniel Scally Reviewed-by: Laurent Pinchart Reviewed-by: Michael Riesch Acked-by: Sakari Ailus Tested-by: Lad Prabhakar Signed-off-by: Jacopo Mondi Signed-off-by: Hans Verkuil --- include/uapi/linux/rkisp1-config.h | 107 +++++++++---------------------------- 1 file changed, 24 insertions(+), 83 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h index 3b060ea6eed7..b2d2a71f7baf 100644 --- a/include/uapi/linux/rkisp1-config.h +++ b/include/uapi/linux/rkisp1-config.h @@ -7,8 +7,13 @@ #ifndef _UAPI_RKISP1_CONFIG_H #define _UAPI_RKISP1_CONFIG_H +#ifdef __KERNEL__ +#include +#endif /* __KERNEL__ */ #include +#include + /* Defect Pixel Cluster Detection */ #define RKISP1_CIF_ISP_MODULE_DPCC (1U << 0) /* Black Level Subtraction */ @@ -1158,79 +1163,26 @@ enum rkisp1_ext_params_block_type { RKISP1_EXT_PARAMS_BLOCK_TYPE_WDR, }; -#define RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE (1U << 0) -#define RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE (1U << 1) +/* For backward compatibility */ +#define RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE V4L2_ISP_PARAMS_FL_BLOCK_DISABLE +#define RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE V4L2_ISP_PARAMS_FL_BLOCK_ENABLE /* A bitmask of parameters blocks supported on the current hardware. */ #define RKISP1_CID_SUPPORTED_PARAMS_BLOCKS (V4L2_CID_USER_RKISP1_BASE + 0x01) /** - * struct rkisp1_ext_params_block_header - RkISP1 extensible parameters block - * header + * rkisp1_ext_params_block_header - RkISP1 extensible parameters block header * * This structure represents the common part of all the ISP configuration - * blocks. Each parameters block shall embed an instance of this structure type - * as its first member, followed by the block-specific configuration data. The - * driver inspects this common header to discern the block type and its size and - * properly handle the block content by casting it to the correct block-specific - * type. + * blocks and is identical to :c:type:`v4l2_isp_params_block_header`. * - * The @type field is one of the values enumerated by + * The type field is one of the values enumerated by * :c:type:`rkisp1_ext_params_block_type` and specifies how the data should be - * interpreted by the driver. The @size field specifies the size of the - * parameters block and is used by the driver for validation purposes. - * - * The @flags field is a bitmask of per-block flags RKISP1_EXT_PARAMS_FL_*. - * - * When userspace wants to configure and enable an ISP block it shall fully - * populate the block configuration and set the - * RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE bit in the @flags field. - * - * When userspace simply wants to disable an ISP block the - * RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE bit should be set in @flags field. The - * driver ignores the rest of the block configuration structure in this case. - * - * If a new configuration of an ISP block has to be applied userspace shall - * fully populate the ISP block configuration and omit setting the - * RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE and RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE bits - * in the @flags field. - * - * Setting both the RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE and - * RKISP1_EXT_PARAMS_FL_BLOCK_DISABLE bits in the @flags field is not allowed - * and not accepted by the driver. - * - * Userspace is responsible for correctly populating the parameters block header - * fields (@type, @flags and @size) and the block-specific parameters. - * - * For example: + * interpreted by the driver. * - * .. code-block:: c - * - * void populate_bls(struct rkisp1_ext_params_block_header *block) { - * struct rkisp1_ext_params_bls_config *bls = - * (struct rkisp1_ext_params_bls_config *)block; - * - * bls->header.type = RKISP1_EXT_PARAMS_BLOCK_ID_BLS; - * bls->header.flags = RKISP1_EXT_PARAMS_FL_BLOCK_ENABLE; - * bls->header.size = sizeof(*bls); - * - * bls->config.enable_auto = 0; - * bls->config.fixed_val.r = blackLevelRed_; - * bls->config.fixed_val.gr = blackLevelGreenR_; - * bls->config.fixed_val.gb = blackLevelGreenB_; - * bls->config.fixed_val.b = blackLevelBlue_; - * } - * - * @type: The parameters block type, see - * :c:type:`rkisp1_ext_params_block_type` - * @flags: A bitmask of block flags - * @size: Size (in bytes) of the parameters block, including this header + * The flags field is a bitmask of per-block flags RKISP1_EXT_PARAMS_FL_*. */ -struct rkisp1_ext_params_block_header { - __u16 type; - __u16 flags; - __u32 size; -}; +#define rkisp1_ext_params_block_header v4l2_isp_params_block_header /** * struct rkisp1_ext_params_bls_config - RkISP1 extensible params BLS config @@ -1588,27 +1540,14 @@ struct rkisp1_ext_params_wdr_config { * @RKISP1_EXT_PARAM_BUFFER_V1: First version of RkISP1 extensible parameters */ enum rksip1_ext_param_buffer_version { - RKISP1_EXT_PARAM_BUFFER_V1 = 1, + RKISP1_EXT_PARAM_BUFFER_V1 = V4L2_ISP_PARAMS_VERSION_V1, }; /** * struct rkisp1_ext_params_cfg - RkISP1 extensible parameters configuration * - * This struct contains the configuration parameters of the RkISP1 ISP - * algorithms, serialized by userspace into a data buffer. Each configuration - * parameter block is represented by a block-specific structure which contains a - * :c:type:`rkisp1_ext_params_block_header` entry as first member. Userspace - * populates the @data buffer with configuration parameters for the blocks that - * it intends to configure. As a consequence, the data buffer effective size - * changes according to the number of ISP blocks that userspace intends to - * configure and is set by userspace in the @data_size field. - * - * The parameters buffer is versioned by the @version field to allow modifying - * and extending its definition. Userspace shall populate the @version field to - * inform the driver about the version it intends to use. The driver will parse - * and handle the @data buffer according to the data layout specific to the - * indicated version and return an error if the desired version is not - * supported. + * This is the driver-specific implementation of + * :c:type:`v4l2_isp_params_buffer`. * * Currently the single RKISP1_EXT_PARAM_BUFFER_V1 version is supported. * When a new format version will be added, a mechanism for userspace to query @@ -1624,11 +1563,6 @@ enum rksip1_ext_param_buffer_version { * the maximum value represents the blocks supported by the kernel driver, * independently of the device instance. * - * For each ISP block that userspace wants to configure, a block-specific - * structure is appended to the @data buffer, one after the other without gaps - * in between nor overlaps. Userspace shall populate the @data_size field with - * the effective size, in bytes, of the @data buffer. - * * The expected memory layout of the parameters buffer is:: * * +-------------------- struct rkisp1_ext_params_cfg -------------------+ @@ -1678,4 +1612,11 @@ struct rkisp1_ext_params_cfg { __u8 data[RKISP1_EXT_PARAMS_MAX_SIZE]; }; +#ifdef __KERNEL__ +/* Make sure the header is type-convertible to the generic v4l2 params one */ +static_assert((sizeof(struct rkisp1_ext_params_cfg) - + RKISP1_EXT_PARAMS_MAX_SIZE) == + sizeof(struct v4l2_isp_params_buffer)); +#endif /* __KERNEL__ */ + #endif /* _UAPI_RKISP1_CONFIG_H */ -- cgit v1.2.3 From 45662082855c6acd1719c11e077388cbccf3baf2 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Mon, 7 Jul 2025 14:18:52 +0200 Subject: media: uapi: Convert Amlogic C3 to V4L2 extensible params With the introduction of common types for extensible parameters format, convert the c3-isp-config.h header to use the new types. Factor-out the documentation that is now part of the common header and only keep the driver-specific on in place. The conversion to use common types doesn't impact userspace as the new types are either identical to the ones already existing in the C3 ISP uAPI or are 1-to-1 type convertible. Reviewed-by: Daniel Scally Reviewed-by: Keke Li Reviewed-by: Laurent Pinchart Acked-by: Sakari Ailus Tested-by: Lad Prabhakar Signed-off-by: Jacopo Mondi Signed-off-by: Hans Verkuil --- include/uapi/linux/media/amlogic/c3-isp-config.h | 92 +++++++----------------- 1 file changed, 24 insertions(+), 68 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/media/amlogic/c3-isp-config.h b/include/uapi/linux/media/amlogic/c3-isp-config.h index ed085ea62a57..0a3c1cc55ccb 100644 --- a/include/uapi/linux/media/amlogic/c3-isp-config.h +++ b/include/uapi/linux/media/amlogic/c3-isp-config.h @@ -6,8 +6,13 @@ #ifndef _UAPI_C3_ISP_CONFIG_H_ #define _UAPI_C3_ISP_CONFIG_H_ +#ifdef __KERNEL__ +#include +#endif /* __KERNEL__ */ #include +#include + /* * Frames are split into zones of almost equal width and height - a zone is a * rectangular tile of a frame. The metering blocks within the ISP collect @@ -141,7 +146,7 @@ struct c3_isp_stats_info { * @C3_ISP_PARAMS_BUFFER_V0: First version of C3 ISP parameters block */ enum c3_isp_params_buffer_version { - C3_ISP_PARAMS_BUFFER_V0, + C3_ISP_PARAMS_BUFFER_V0 = V4L2_ISP_PARAMS_VERSION_V0, }; /** @@ -176,62 +181,23 @@ enum c3_isp_params_block_type { C3_ISP_PARAMS_BLOCK_SENTINEL }; -#define C3_ISP_PARAMS_BLOCK_FL_DISABLE (1U << 0) -#define C3_ISP_PARAMS_BLOCK_FL_ENABLE (1U << 1) +/* For backward compatibility */ +#define C3_ISP_PARAMS_BLOCK_FL_DISABLE V4L2_ISP_PARAMS_FL_BLOCK_DISABLE +#define C3_ISP_PARAMS_BLOCK_FL_ENABLE V4L2_ISP_PARAMS_FL_BLOCK_ENABLE /** * struct c3_isp_params_block_header - C3 ISP parameter block header * * This structure represents the common part of all the ISP configuration - * blocks. Each parameters block shall embed an instance of this structure type - * as its first member, followed by the block-specific configuration data. The - * driver inspects this common header to discern the block type and its size and - * properly handle the block content by casting it to the correct block-specific - * type. + * blocks and is identical to :c:type:`v4l2_isp_params_block_header`. * - * The @type field is one of the values enumerated by + * The type field is one of the values enumerated by * :c:type:`c3_isp_params_block_type` and specifies how the data should be - * interpreted by the driver. The @size field specifies the size of the - * parameters block and is used by the driver for validation purposes. The - * @flags field is a bitmask of per-block flags C3_ISP_PARAMS_FL*. - * - * When userspace wants to disable an ISP block the - * C3_ISP_PARAMS_BLOCK_FL_DISABLED bit should be set in the @flags field. In - * this case userspace may optionally omit the remainder of the configuration - * block, which will be ignored by the driver. - * - * When a new configuration of an ISP block needs to be applied userspace - * shall fully populate the ISP block and omit setting the - * C3_ISP_PARAMS_BLOCK_FL_DISABLED bit in the @flags field. - * - * Userspace is responsible for correctly populating the parameters block header - * fields (@type, @flags and @size) and the block-specific parameters. - * - * For example: - * - * .. code-block:: c + * interpreted by the driver. * - * void populate_pst_gamma(struct c3_isp_params_block_header *block) { - * struct c3_isp_params_pst_gamma *gamma = - * (struct c3_isp_params_pst_gamma *)block; - * - * gamma->header.type = C3_ISP_PARAMS_BLOCK_PST_GAMMA; - * gamma->header.flags = C3_ISP_PARAMS_BLOCK_FL_ENABLE; - * gamma->header.size = sizeof(*gamma); - * - * for (unsigned int i = 0; i < 129; i++) - * gamma->pst_gamma_lut[i] = i; - * } - * - * @type: The parameters block type from :c:type:`c3_isp_params_block_type` - * @flags: A bitmask of block flags - * @size: Size (in bytes) of the parameters block, including this header + * The flags field is a bitmask of per-block flags C3_ISP_PARAMS_FL_*. */ -struct c3_isp_params_block_header { - __u16 type; - __u16 flags; - __u32 size; -}; +#define c3_isp_params_block_header v4l2_isp_params_block_header /** * struct c3_isp_params_awb_gains - Gains for auto-white balance @@ -498,26 +464,10 @@ struct c3_isp_params_blc { /** * struct c3_isp_params_cfg - C3 ISP configuration parameters * - * This struct contains the configuration parameters of the C3 ISP - * algorithms, serialized by userspace into an opaque data buffer. Each - * configuration parameter block is represented by a block-specific structure - * which contains a :c:type:`c3_isp_param_block_header` entry as first - * member. Userspace populates the @data buffer with configuration parameters - * for the blocks that it intends to configure. As a consequence, the data - * buffer effective size changes according to the number of ISP blocks that - * userspace intends to configure. - * - * The parameters buffer is versioned by the @version field to allow modifying - * and extending its definition. Userspace should populate the @version field to - * inform the driver about the version it intends to use. The driver will parse - * and handle the @data buffer according to the data layout specific to the - * indicated revision and return an error if the desired revision is not - * supported. - * - * For each ISP block that userspace wants to configure, a block-specific - * structure is appended to the @data buffer, one after the other without gaps - * in between nor overlaps. Userspace shall populate the @total_size field with - * the effective size, in bytes, of the @data buffer. + * This is the driver-specific implementation of + * :c:type:`v4l2_isp_params_buffer`. + * + * Currently only C3_ISP_PARAM_BUFFER_V0 is supported. * * The expected memory layout of the parameters buffer is:: * @@ -561,4 +511,10 @@ struct c3_isp_params_cfg { __u8 data[C3_ISP_PARAMS_MAX_SIZE]; }; +#ifdef __KERNEL__ +/* Make sure the header is type-convertible to the generic v4l2 params one */ +static_assert((sizeof(struct c3_isp_params_cfg) - C3_ISP_PARAMS_MAX_SIZE) == + sizeof(struct v4l2_isp_params_buffer)); +#endif /* __KERNEL__ */ + #endif -- cgit v1.2.3 From ec4ac3cb7198070611987a6e91829fce0f4ce6d0 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Tue, 11 Nov 2025 16:15:45 +0000 Subject: media: uapi: Add MEDIA_BUS_FMT_RGB202020_1X60 format code The Mali-C55 ISP by ARM requires 20-bits per colour channel input on the bus. Add a new media bus format code to represent it. Reviewed-by: Lad Prabhakar Tested-by: Lad Prabhakar Reviewed-by: Laurent Pinchart Acked-by: Nayden Kanchev Co-developed-by: Jacopo Mondi Signed-off-by: Jacopo Mondi Signed-off-by: Daniel Scally Signed-off-by: Hans Verkuil --- include/uapi/linux/media-bus-format.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index ff62056feed5..62ad82fd285a 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -34,7 +34,7 @@ #define MEDIA_BUS_FMT_FIXED 0x0001 -/* RGB - next is 0x1028 */ +/* RGB - next is 0x1029 */ #define MEDIA_BUS_FMT_RGB444_1X12 0x1016 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE 0x1001 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE 0x1002 @@ -74,6 +74,7 @@ #define MEDIA_BUS_FMT_RGB888_1X36_CPADLO 0x1021 #define MEDIA_BUS_FMT_RGB121212_1X36 0x1019 #define MEDIA_BUS_FMT_RGB161616_1X48 0x101a +#define MEDIA_BUS_FMT_RGB202020_1X60 0x1028 /* YUV (including grey) - next is 0x202f */ #define MEDIA_BUS_FMT_Y8_1X8 0x2001 -- cgit v1.2.3 From 2477ab037621632c3ec167187dc9e7afac2ba7f2 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Tue, 11 Nov 2025 16:15:46 +0000 Subject: media: uapi: Add 20-bit bayer formats The Mali-C55 requires input data be in 20-bit format, MSB aligned. Add some new media bus format macros to represent that input format. Reviewed-by: Lad Prabhakar Tested-by: Lad Prabhakar Reviewed-by: Laurent Pinchart Co-developed-by: Jacopo Mondi Signed-off-by: Jacopo Mondi Signed-off-by: Daniel Scally Signed-off-by: Hans Verkuil --- include/uapi/linux/media-bus-format.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index 62ad82fd285a..6005f033e62c 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -124,7 +124,7 @@ #define MEDIA_BUS_FMT_YUV16_1X48 0x202a #define MEDIA_BUS_FMT_UYYVYY16_0_5X48 0x202b -/* Bayer - next is 0x3021 */ +/* Bayer - next is 0x3025 */ #define MEDIA_BUS_FMT_SBGGR8_1X8 0x3001 #define MEDIA_BUS_FMT_SGBRG8_1X8 0x3013 #define MEDIA_BUS_FMT_SGRBG8_1X8 0x3002 @@ -157,6 +157,10 @@ #define MEDIA_BUS_FMT_SGBRG16_1X16 0x301e #define MEDIA_BUS_FMT_SGRBG16_1X16 0x301f #define MEDIA_BUS_FMT_SRGGB16_1X16 0x3020 +#define MEDIA_BUS_FMT_SBGGR20_1X20 0x3021 +#define MEDIA_BUS_FMT_SGBRG20_1X20 0x3022 +#define MEDIA_BUS_FMT_SGRBG20_1X20 0x3023 +#define MEDIA_BUS_FMT_SRGGB20_1X20 0x3024 /* JPEG compressed formats - next is 0x4002 */ #define MEDIA_BUS_FMT_JPEG_1X8 0x4001 -- cgit v1.2.3 From 8d0bbed21ef737195277c0af8c30511fb72e608b Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Tue, 11 Nov 2025 16:15:48 +0000 Subject: media: uapi: Add controls for Mali-C55 ISP Add definitions and documentation for the custom control that will be needed by the Mali-C55 ISP driver. This will be a read only bitmask of the driver's capabilities, informing userspace of which blocks are fitted and which are absent. Tested-by: Lad Prabhakar Reviewed-by: Lad Prabhakar Reviewed-by: Jacopo Mondi Signed-off-by: Daniel Scally Signed-off-by: Jacopo Mondi Signed-off-by: Hans Verkuil --- include/uapi/linux/media/arm/mali-c55-config.h | 26 ++++++++++++++++++++++++++ include/uapi/linux/v4l2-controls.h | 6 ++++++ 2 files changed, 32 insertions(+) create mode 100644 include/uapi/linux/media/arm/mali-c55-config.h (limited to 'include/uapi') diff --git a/include/uapi/linux/media/arm/mali-c55-config.h b/include/uapi/linux/media/arm/mali-c55-config.h new file mode 100644 index 000000000000..7fddece54ada --- /dev/null +++ b/include/uapi/linux/media/arm/mali-c55-config.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * ARM Mali-C55 ISP Driver - Userspace API + * + * Copyright (C) 2023 Ideas on Board Oy + */ + +#ifndef __UAPI_MALI_C55_CONFIG_H +#define __UAPI_MALI_C55_CONFIG_H + +#include + +#define V4L2_CID_MALI_C55_CAPABILITIES (V4L2_CID_USER_MALI_C55_BASE + 0x0) +#define MALI_C55_GPS_PONG (1U << 0) +#define MALI_C55_GPS_WDR (1U << 1) +#define MALI_C55_GPS_COMPRESSION (1U << 2) +#define MALI_C55_GPS_TEMPER (1U << 3) +#define MALI_C55_GPS_SINTER_LITE (1U << 4) +#define MALI_C55_GPS_SINTER (1U << 5) +#define MALI_C55_GPS_IRIDIX_LTM (1U << 6) +#define MALI_C55_GPS_IRIDIX_GTM (1U << 7) +#define MALI_C55_GPS_CNR (1U << 8) +#define MALI_C55_GPS_FRSCALER (1U << 9) +#define MALI_C55_GPS_DS_PIPE (1U << 10) + +#endif /* __UAPI_MALI_C55_CONFIG_H */ diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 2d30107e047e..f84ed133a6c9 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -228,6 +228,12 @@ enum v4l2_colorfx { */ #define V4L2_CID_USER_RKISP1_BASE (V4L2_CID_USER_BASE + 0x1220) +/* + * The base for the Arm Mali-C55 ISP driver controls. + * We reserve 16 controls for this driver + */ +#define V4L2_CID_USER_MALI_C55_BASE (V4L2_CID_USER_BASE + 0x1230) + /* MPEG-class control IDs */ /* The MPEG controls are applicable to all codec controls * and the 'MPEG' part of the define is historical */ -- cgit v1.2.3 From 4d36f732366aeb32bf3486545e597500a3bf0994 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Tue, 11 Nov 2025 16:15:52 +0000 Subject: media: Add MALI_C55_3A_STATS meta format Add a new meta format for the Mali-C55 ISP's 3A Statistics along with a new descriptor entry. Tested-by: Lad Prabhakar Reviewed-by: Laurent Pinchart Acked-by: Nayden Kanchev Co-developed-by: Jacopo Mondi Signed-off-by: Jacopo Mondi Signed-off-by: Daniel Scally Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index becd08fdbddb..cba4b1311667 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -884,6 +884,9 @@ struct v4l2_pix_format { #define V4L2_META_FMT_RPI_FE_CFG v4l2_fourcc('R', 'P', 'F', 'C') /* PiSP FE configuration */ #define V4L2_META_FMT_RPI_FE_STATS v4l2_fourcc('R', 'P', 'F', 'S') /* PiSP FE stats */ +/* Vendor specific - used for Arm Mali-C55 ISP */ +#define V4L2_META_FMT_MALI_C55_STATS v4l2_fourcc('C', '5', '5', 'S') /* ARM Mali-C55 3A Statistics */ + #ifdef __KERNEL__ /* * Line-based metadata formats. Remember to update v4l_fill_fmtdesc() when -- cgit v1.2.3 From c7f832f6f8129bb666346cb4805805ad056059b7 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Tue, 11 Nov 2025 16:15:53 +0000 Subject: media: uapi: Add 3a stats buffer for mali-c55 Describe the format of the 3A statistics buffers in the userspace API header for the mali-c55 ISP. Tested-by: Lad Prabhakar Reviewed-by: Laurent Pinchart Acked-by: Nayden Kanchev Co-developed-by: Jacopo Mondi Signed-off-by: Jacopo Mondi Signed-off-by: Daniel Scally Signed-off-by: Hans Verkuil --- include/uapi/linux/media/arm/mali-c55-config.h | 170 +++++++++++++++++++++++++ 1 file changed, 170 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/media/arm/mali-c55-config.h b/include/uapi/linux/media/arm/mali-c55-config.h index 7fddece54ada..e31fb8ffa10a 100644 --- a/include/uapi/linux/media/arm/mali-c55-config.h +++ b/include/uapi/linux/media/arm/mali-c55-config.h @@ -8,6 +8,7 @@ #ifndef __UAPI_MALI_C55_CONFIG_H #define __UAPI_MALI_C55_CONFIG_H +#include #include #define V4L2_CID_MALI_C55_CAPABILITIES (V4L2_CID_USER_MALI_C55_BASE + 0x0) @@ -23,4 +24,173 @@ #define MALI_C55_GPS_FRSCALER (1U << 9) #define MALI_C55_GPS_DS_PIPE (1U << 10) +/* + * Frames are split into zones of almost equal width and height - a zone is a + * rectangular tile of a frame. The metering blocks within the ISP collect + * aggregated statistics per zone. A maximum of 15x15 zones can be configured, + * and so the statistics buffer within the hardware is sized to accommodate + * that. + * + * The utilised number of zones is runtime configurable. + */ +#define MALI_C55_MAX_ZONES (15 * 15) + +/** + * struct mali_c55_ae_1024bin_hist - Auto Exposure 1024-bin histogram statistics + * + * @bins: 1024 element array of 16-bit pixel counts. + * + * The 1024-bin histogram module collects image-global but zone-weighted + * intensity distributions of pixels in fixed-width bins. The modules can be + * configured into different "plane modes" which affect the contents of the + * collected statistics. In plane mode 0, pixel intensities are taken regardless + * of colour plane into a single 1024-bin histogram with a bin width of 4. In + * plane mode 1, four 256-bin histograms with a bin width of 16 are collected - + * one for each CFA colour plane. In plane modes 4, 5, 6 and 7 two 512-bin + * histograms with a bin width of 8 are collected - in each mode one of the + * colour planes is collected into the first histogram and all the others are + * combined into the second. The histograms are stored consecutively in the bins + * array. + * + * The 16-bit pixel counts are stored as a 4-bit exponent in the most + * significant bits followed by a 12-bit mantissa. Conversion to a usable + * format can be done according to the following pseudo-code:: + * + * if (e == 0) { + * bin = m * 2; + * } else { + * bin = (m + 4096) * 2^e + * } + * + * where + * e is the exponent value in range 0..15 + * m is the mantissa value in range 0..4095 + * + * The pixels used in calculating the statistics can be masked using three + * methods: + * + * 1. Pixels can be skipped in X and Y directions independently. + * 2. Minimum/Maximum intensities can be configured + * 3. Zones can be differentially weighted, including 0 weighted to mask them + * + * The data for this histogram can be collected from different tap points in the + * ISP depending on configuration - after the white balance or digital gain + * blocks, or immediately after the input crossbar. + */ +struct mali_c55_ae_1024bin_hist { + __u16 bins[1024]; +} __attribute__((packed)); + +/** + * struct mali_c55_ae_5bin_hist - Auto Exposure 5-bin histogram statistics + * + * @hist0: 16-bit normalised pixel count for the 0th intensity bin + * @hist1: 16-bit normalised pixel count for the 1st intensity bin + * @hist3: 16-bit normalised pixel count for the 3rd intensity bin + * @hist4: 16-bit normalised pixel count for the 4th intensity bin + * + * The ISP generates a 5-bin histogram of normalised pixel counts within bins of + * pixel intensity for each of 225 possible zones within a frame. The centre bin + * of the histogram for each zone is not available from the hardware and must be + * calculated by subtracting the values of hist0, hist1, hist3 and hist4 from + * 0xffff as in the following equation: + * + * hist2 = 0xffff - (hist0 + hist1 + hist3 + hist4) + */ +struct mali_c55_ae_5bin_hist { + __u16 hist0; + __u16 hist1; + __u16 hist3; + __u16 hist4; +} __attribute__((packed)); + +/** + * struct mali_c55_awb_average_ratios - Auto White Balance colour ratios + * + * @avg_rg_gr: Average R/G or G/R ratio in Q4.8 format. + * @avg_bg_br: Average B/G or B/R ratio in Q4.8 format. + * @num_pixels: The number of pixels used in the AWB calculation + * + * The ISP calculates and collects average colour ratios for each zone in an + * image and stores them in Q4.8 format (the lowest 8 bits are fractional, with + * bits [11:8] representing the integer). The exact ratios collected (either + * R/G, B/G or G/R, B/R) are configurable through the parameters buffer. The + * value of the 4 high bits is undefined. + */ +struct mali_c55_awb_average_ratios { + __u16 avg_rg_gr; + __u16 avg_bg_br; + __u32 num_pixels; +} __attribute__((packed)); + +/** + * struct mali_c55_af_statistics - Auto Focus edge and intensity statistics + * + * @intensity_stats: Packed mantissa and exponent value for pixel intensity + * @edge_stats: Packed mantissa and exponent values for edge intensity + * + * The ISP collects the squared sum of pixel intensities for each zone within a + * configurable Region of Interest on the frame. Additionally, the same data are + * collected after being passed through a bandpass filter which removes high and + * low frequency components - these are referred to as the edge statistics. + * + * The intensity and edge statistics for a zone can be used to calculate the + * contrast information for a zone + * + * C = E2 / I2 + * + * Where I2 is the intensity statistic for a zone and E2 is the edge statistic + * for that zone. Optimum focus is reached when C is at its maximum. + * + * The intensity and edge statistics are stored packed into a non-standard 16 + * bit floating point format, where the 7 most significant bits represent the + * exponent and the 9 least significant bits the mantissa. This format can be + * unpacked with the following pseudocode:: + * + * if (e == 0) { + * x = m; + * } else { + * x = 2^e-1 * (m + 2^9) + * } + * + * where + * e is the exponent value in range 0..127 + * m is the mantissa value in range 0..511 + */ +struct mali_c55_af_statistics { + __u16 intensity_stats; + __u16 edge_stats; +} __attribute__((packed)); + +/** + * struct mali_c55_stats_buffer - 3A statistics for the mali-c55 ISP + * + * @ae_1024bin_hist: 1024-bin frame-global pixel intensity histogram + * @iridix_1024bin_hist: Post-Iridix block 1024-bin histogram + * @ae_5bin_hists: 5-bin pixel intensity histograms for AEC + * @reserved1: Undefined buffer space + * @awb_ratios: Color balance ratios for Auto White Balance + * @reserved2: Undefined buffer space + * @af_statistics: Pixel intensity statistics for Auto Focus + * @reserved3: Undefined buffer space + * + * This struct describes the metering statistics space in the Mali-C55 ISP's + * hardware in its entirety. The space between each defined area is marked as + * "unknown" and may not be 0, but should not be used. The @ae_5bin_hists, + * @awb_ratios and @af_statistics members are arrays of statistics per-zone. + * The zones are arranged in the array in raster order starting from the top + * left corner of the image. + */ + +struct mali_c55_stats_buffer { + struct mali_c55_ae_1024bin_hist ae_1024bin_hist; + struct mali_c55_ae_1024bin_hist iridix_1024bin_hist; + struct mali_c55_ae_5bin_hist ae_5bin_hists[MALI_C55_MAX_ZONES]; + __u32 reserved1[14]; + struct mali_c55_awb_average_ratios awb_ratios[MALI_C55_MAX_ZONES]; + __u32 reserved2[14]; + struct mali_c55_af_statistics af_statistics[MALI_C55_MAX_ZONES]; + __u32 reserved3[15]; +} __attribute__((packed)); + #endif /* __UAPI_MALI_C55_CONFIG_H */ -- cgit v1.2.3 From 1ab3cb233d61131b2d02650f8ed9e4e077fd4508 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Tue, 11 Nov 2025 16:15:56 +0000 Subject: media: mali-c55: Add image formats for Mali-C55 parameters buffer Add a new V4L2 meta format code for the Mali-C55 parameters. Tested-by: Lad Prabhakar Reviewed-by: Laurent Pinchart Acked-by: Nayden Kanchev Signed-off-by: Jacopo Mondi Signed-off-by: Daniel Scally Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index cba4b1311667..add08188f068 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -885,6 +885,7 @@ struct v4l2_pix_format { #define V4L2_META_FMT_RPI_FE_STATS v4l2_fourcc('R', 'P', 'F', 'S') /* PiSP FE stats */ /* Vendor specific - used for Arm Mali-C55 ISP */ +#define V4L2_META_FMT_MALI_C55_PARAMS v4l2_fourcc('C', '5', '5', 'P') /* ARM Mali-C55 Parameters */ #define V4L2_META_FMT_MALI_C55_STATS v4l2_fourcc('C', '5', '5', 'S') /* ARM Mali-C55 3A Statistics */ #ifdef __KERNEL__ -- cgit v1.2.3 From 08a99369f44eeb63eacc56fe42f4c67a6c7dbc37 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Tue, 11 Nov 2025 16:15:57 +0000 Subject: media: uapi: Add parameters structs to mali-c55-config.h Add structures describing the ISP parameters to mali-c55-config.h Tested-by: Lad Prabhakar Acked-by: Nayden Kanchev Co-developed-by: Jacopo Mondi Signed-off-by: Jacopo Mondi Signed-off-by: Daniel Scally Signed-off-by: Hans Verkuil --- include/uapi/linux/media/arm/mali-c55-config.h | 598 +++++++++++++++++++++++++ 1 file changed, 598 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/media/arm/mali-c55-config.h b/include/uapi/linux/media/arm/mali-c55-config.h index e31fb8ffa10a..109082c5694f 100644 --- a/include/uapi/linux/media/arm/mali-c55-config.h +++ b/include/uapi/linux/media/arm/mali-c55-config.h @@ -10,6 +10,7 @@ #include #include +#include #define V4L2_CID_MALI_C55_CAPABILITIES (V4L2_CID_USER_MALI_C55_BASE + 0x0) #define MALI_C55_GPS_PONG (1U << 0) @@ -193,4 +194,601 @@ struct mali_c55_stats_buffer { __u32 reserved3[15]; } __attribute__((packed)); +/** + * enum mali_c55_param_buffer_version - Mali-C55 parameters block versioning + * + * @MALI_C55_PARAM_BUFFER_V1: First version of Mali-C55 parameters block + */ +enum mali_c55_param_buffer_version { + MALI_C55_PARAM_BUFFER_V1, +}; + +/** + * enum mali_c55_param_block_type - Enumeration of Mali-C55 parameter blocks + * + * This enumeration defines the types of Mali-C55 parameters block. Each block + * configures a specific processing block of the Mali-C55 ISP. The block + * type allows the driver to correctly interpret the parameters block data. + * + * It is the responsibility of userspace to correctly set the type of each + * parameters block. + * + * @MALI_C55_PARAM_BLOCK_SENSOR_OFFS: Sensor pre-shading black level offset + * @MALI_C55_PARAM_BLOCK_AEXP_HIST: Auto-exposure 1024-bin histogram + * configuration + * @MALI_C55_PARAM_BLOCK_AEXP_IHIST: Post-Iridix auto-exposure 1024-bin + * histogram configuration + * @MALI_C55_PARAM_BLOCK_AEXP_HIST_WEIGHTS: Auto-exposure 1024-bin histogram + * weighting + * @MALI_C55_PARAM_BLOCK_AEXP_IHIST_WEIGHTS: Post-Iridix auto-exposure 1024-bin + * histogram weighting + * @MALI_C55_PARAM_BLOCK_DIGITAL_GAIN: Digital gain + * @MALI_C55_PARAM_BLOCK_AWB_GAINS: Auto-white balance gains + * @MALI_C55_PARAM_BLOCK_AWB_CONFIG: Auto-white balance statistics config + * @MALI_C55_PARAM_BLOCK_AWB_GAINS_AEXP: Auto-white balance gains for AEXP-0 tap + * @MALI_C55_PARAM_MESH_SHADING_CONFIG : Mesh shading tables configuration + * @MALI_C55_PARAM_MESH_SHADING_SELECTION: Mesh shading table selection + */ +enum mali_c55_param_block_type { + MALI_C55_PARAM_BLOCK_SENSOR_OFFS, + MALI_C55_PARAM_BLOCK_AEXP_HIST, + MALI_C55_PARAM_BLOCK_AEXP_IHIST, + MALI_C55_PARAM_BLOCK_AEXP_HIST_WEIGHTS, + MALI_C55_PARAM_BLOCK_AEXP_IHIST_WEIGHTS, + MALI_C55_PARAM_BLOCK_DIGITAL_GAIN, + MALI_C55_PARAM_BLOCK_AWB_GAINS, + MALI_C55_PARAM_BLOCK_AWB_CONFIG, + MALI_C55_PARAM_BLOCK_AWB_GAINS_AEXP, + MALI_C55_PARAM_MESH_SHADING_CONFIG, + MALI_C55_PARAM_MESH_SHADING_SELECTION, +}; + +/** + * struct mali_c55_params_sensor_off_preshading - offset subtraction for each + * color channel + * + * Provides removal of the sensor black level from the sensor data. Separate + * offsets are provided for each of the four Bayer component color channels + * which are defaulted to R, Gr, Gb, B. + * + * header.type should be set to MALI_C55_PARAM_BLOCK_SENSOR_OFFS from + * :c:type:`mali_c55_param_block_type` for this block. + * + * @header: The Mali-C55 parameters block header + * @chan00: Offset for color channel 00 (default: R) + * @chan01: Offset for color channel 01 (default: Gr) + * @chan10: Offset for color channel 10 (default: Gb) + * @chan11: Offset for color channel 11 (default: B) + */ +struct mali_c55_params_sensor_off_preshading { + struct v4l2_isp_params_block_header header; + __u32 chan00; + __u32 chan01; + __u32 chan10; + __u32 chan11; +}; + +/** + * enum mali_c55_aexp_hist_tap_points - Tap points for the AEXP histogram + * @MALI_C55_AEXP_HIST_TAP_WB: After static white balance + * @MALI_C55_AEXP_HIST_TAP_FS: After WDR Frame Stitch + * @MALI_C55_AEXP_HIST_TAP_TPG: After the test pattern generator + */ +enum mali_c55_aexp_hist_tap_points { + MALI_C55_AEXP_HIST_TAP_WB = 0, + MALI_C55_AEXP_HIST_TAP_FS, + MALI_C55_AEXP_HIST_TAP_TPG, +}; + +/** + * enum mali_c55_aexp_skip_x - Horizontal pixel skipping + * @MALI_C55_AEXP_SKIP_X_EVERY_2ND: Collect every 2nd pixel horizontally + * @MALI_C55_AEXP_SKIP_X_EVERY_3RD: Collect every 3rd pixel horizontally + * @MALI_C55_AEXP_SKIP_X_EVERY_4TH: Collect every 4th pixel horizontally + * @MALI_C55_AEXP_SKIP_X_EVERY_5TH: Collect every 5th pixel horizontally + * @MALI_C55_AEXP_SKIP_X_EVERY_8TH: Collect every 8th pixel horizontally + * @MALI_C55_AEXP_SKIP_X_EVERY_9TH: Collect every 9th pixel horizontally + */ +enum mali_c55_aexp_skip_x { + MALI_C55_AEXP_SKIP_X_EVERY_2ND, + MALI_C55_AEXP_SKIP_X_EVERY_3RD, + MALI_C55_AEXP_SKIP_X_EVERY_4TH, + MALI_C55_AEXP_SKIP_X_EVERY_5TH, + MALI_C55_AEXP_SKIP_X_EVERY_8TH, + MALI_C55_AEXP_SKIP_X_EVERY_9TH +}; + +/** + * enum mali_c55_aexp_skip_y - Vertical pixel skipping + * @MALI_C55_AEXP_SKIP_Y_ALL: Collect every single pixel vertically + * @MALI_C55_AEXP_SKIP_Y_EVERY_2ND: Collect every 2nd pixel vertically + * @MALI_C55_AEXP_SKIP_Y_EVERY_3RD: Collect every 3rd pixel vertically + * @MALI_C55_AEXP_SKIP_Y_EVERY_4TH: Collect every 4th pixel vertically + * @MALI_C55_AEXP_SKIP_Y_EVERY_5TH: Collect every 5th pixel vertically + * @MALI_C55_AEXP_SKIP_Y_EVERY_8TH: Collect every 8th pixel vertically + * @MALI_C55_AEXP_SKIP_Y_EVERY_9TH: Collect every 9th pixel vertically + */ +enum mali_c55_aexp_skip_y { + MALI_C55_AEXP_SKIP_Y_ALL, + MALI_C55_AEXP_SKIP_Y_EVERY_2ND, + MALI_C55_AEXP_SKIP_Y_EVERY_3RD, + MALI_C55_AEXP_SKIP_Y_EVERY_4TH, + MALI_C55_AEXP_SKIP_Y_EVERY_5TH, + MALI_C55_AEXP_SKIP_Y_EVERY_8TH, + MALI_C55_AEXP_SKIP_Y_EVERY_9TH +}; + +/** + * enum mali_c55_aexp_row_column_offset - Start from the first or second row or + * column + * @MALI_C55_AEXP_FIRST_ROW_OR_COL: Start from the first row / column + * @MALI_C55_AEXP_SECOND_ROW_OR_COL: Start from the second row / column + */ +enum mali_c55_aexp_row_column_offset { + MALI_C55_AEXP_FIRST_ROW_OR_COL = 1, + MALI_C55_AEXP_SECOND_ROW_OR_COL = 2, +}; + +/** + * enum mali_c55_aexp_hist_plane_mode - Mode for the AEXP Histograms + * @MALI_C55_AEXP_HIST_COMBINED: All color planes in one 1024-bin histogram + * @MALI_C55_AEXP_HIST_SEPARATE: Each color plane in one 256-bin histogram with a bin width of 16 + * @MALI_C55_AEXP_HIST_FOCUS_00: Top left plane in the first bank, rest in second bank + * @MALI_C55_AEXP_HIST_FOCUS_01: Top right plane in the first bank, rest in second bank + * @MALI_C55_AEXP_HIST_FOCUS_10: Bottom left plane in the first bank, rest in second bank + * @MALI_C55_AEXP_HIST_FOCUS_11: Bottom right plane in the first bank, rest in second bank + * + * In the "focus" modes statistics are collected into two 512-bin histograms + * with a bin width of 8. One colour plane is in the first histogram with the + * remainder combined into the second. The four options represent which of the + * four positions in a bayer pattern are the focused plane. + */ +enum mali_c55_aexp_hist_plane_mode { + MALI_C55_AEXP_HIST_COMBINED = 0, + MALI_C55_AEXP_HIST_SEPARATE = 1, + MALI_C55_AEXP_HIST_FOCUS_00 = 4, + MALI_C55_AEXP_HIST_FOCUS_01 = 5, + MALI_C55_AEXP_HIST_FOCUS_10 = 6, + MALI_C55_AEXP_HIST_FOCUS_11 = 7, +}; + +/** + * struct mali_c55_params_aexp_hist - configuration for AEXP metering hists + * + * This struct allows users to configure the 1024-bin AEXP histograms. Broadly + * speaking the parameters allow you to mask particular regions of the image and + * to select different kinds of histogram. + * + * The skip_x, offset_x, skip_y and offset_y fields allow users to ignore or + * mask pixels in the frame by their position relative to the top left pixel. + * First, the skip_y, offset_x and offset_y fields define which of the pixels + * within each 2x2 region will be counted in the statistics. + * + * If skip_y == 0 then two pixels from each covered region will be counted. If + * both offset_x and offset_y are zero, then the two left-most pixels in each + * 2x2 pixel region will be counted. Setting offset_x = 1 will discount the top + * left pixel and count the top right pixel. Setting offset_y = 1 will discount + * the bottom left pixel and count the bottom right pixel. + * + * If skip_y != 0 then only a single pixel from each region covered by the + * pattern will be counted. In this case offset_x controls whether the pixel + * that's counted is in the left (if offset_x == 0) or right (if offset_x == 1) + * column and offset_y controls whether the pixel that's counted is in the top + * (if offset_y == 0) or bottom (if offset_y == 1) row. + * + * The skip_x and skip_y fields control how the 2x2 pixel region is repeated + * across the image data. The first instance of the region is always in the top + * left of the image data. The skip_x field controls how many pixels are ignored + * in the x direction before the pixel masking region is repeated. The skip_y + * field controls how many pixels are ignored in the y direction before the + * pixel masking region is repeated. + * + * These fields can be used to reduce the number of pixels counted for the + * statistics, but it's important to be careful to configure them correctly. + * Some combinations of values will result in colour components from the input + * data being ignored entirely, for example in the following configuration: + * + * skip_x = 0 + * offset_x = 0 + * skip_y = 0 + * offset_y = 0 + * + * Only the R and Gb components of RGGB data that was input would be collected. + * Similarly in the following configuration: + * + * skip_x = 0 + * offset_x = 0 + * skip_y = 1 + * offset_y = 1 + * + * Only the Gb component of RGGB data that was input would be collected. To + * correct things such that all 4 colour components were included it would be + * necessary to set the skip_x and skip_y fields in a way that resulted in all + * four colour components being collected: + * + * skip_x = 1 + * offset_x = 0 + * skip_y = 1 + * offset_y = 1 + * + * header.type should be set to one of either MALI_C55_PARAM_BLOCK_AEXP_HIST or + * MALI_C55_PARAM_BLOCK_AEXP_IHIST from :c:type:`mali_c55_param_block_type`. + * + * @header: The Mali-C55 parameters block header + * @skip_x: Horizontal decimation. See enum mali_c55_aexp_skip_x + * @offset_x: Skip the first column, or not. See enum mali_c55_aexp_row_column_offset + * @skip_y: Vertical decimation. See enum mali_c55_aexp_skip_y + * @offset_y: Skip the first row, or not. See enum mali_c55_aexp_row_column_offset + * @scale_bottom: Scale pixels in bottom half of intensity range: 0=1x ,1=2x, 2=4x, 4=8x, 4=16x + * @scale_top: scale pixels in top half of intensity range: 0=1x ,1=2x, 2=4x, 4=8x, 4=16x + * @plane_mode: Plane separation mode. See enum mali_c55_aexp_hist_plane_mode + * @tap_point: Tap point for histogram from enum mali_c55_aexp_hist_tap_points. + * This parameter is unused for the post-Iridix Histogram + */ +struct mali_c55_params_aexp_hist { + struct v4l2_isp_params_block_header header; + __u8 skip_x; + __u8 offset_x; + __u8 skip_y; + __u8 offset_y; + __u8 scale_bottom; + __u8 scale_top; + __u8 plane_mode; + __u8 tap_point; +}; + +/** + * struct mali_c55_params_aexp_weights - Array of weights for AEXP metering + * + * This struct allows users to configure the weighting for both of the 1024-bin + * AEXP histograms. The pixel data collected for each zone is multiplied by the + * corresponding weight from this array, which may be zero if the intention is + * to mask off the zone entirely. + * + * header.type should be set to one of either MALI_C55_PARAM_BLOCK_AEXP_HIST_WEIGHTS + * or MALI_C55_PARAM_BLOCK_AEXP_IHIST_WEIGHTS from :c:type:`mali_c55_param_block_type`. + * + * @header: The Mali-C55 parameters block header + * @nodes_used_horiz: Number of active zones horizontally [0..15] + * @nodes_used_vert: Number of active zones vertically [0..15] + * @zone_weights: Zone weighting. Index is row*col where 0,0 is the top + * left zone continuing in raster order. Each zone can be + * weighted in the range [0..15]. The number of rows and + * columns is defined by @nodes_used_vert and + * @nodes_used_horiz + */ +struct mali_c55_params_aexp_weights { + struct v4l2_isp_params_block_header header; + __u8 nodes_used_horiz; + __u8 nodes_used_vert; + __u8 zone_weights[MALI_C55_MAX_ZONES]; +}; + +/** + * struct mali_c55_params_digital_gain - Digital gain value + * + * This struct carries a digital gain value to set in the ISP. + * + * header.type should be set to MALI_C55_PARAM_BLOCK_DIGITAL_GAIN from + * :c:type:`mali_c55_param_block_type` for this block. + * + * @header: The Mali-C55 parameters block header + * @gain: The digital gain value to apply, in Q5.8 format. + */ +struct mali_c55_params_digital_gain { + struct v4l2_isp_params_block_header header; + __u16 gain; +}; + +/** + * enum mali_c55_awb_stats_mode - Statistics mode for AWB + * @MALI_C55_AWB_MODE_GRBR: Statistics collected as Green/Red and Blue/Red ratios + * @MALI_C55_AWB_MODE_RGBG: Statistics collected as Red/Green and Blue/Green ratios + */ +enum mali_c55_awb_stats_mode { + MALI_C55_AWB_MODE_GRBR = 0, + MALI_C55_AWB_MODE_RGBG, +}; + +/** + * struct mali_c55_params_awb_gains - Gain settings for auto white balance + * + * This struct allows users to configure the gains for auto-white balance. There + * are four gain settings corresponding to each colour channel in the bayer + * domain. Although named generically, the association between the gain applied + * and the colour channel is done automatically within the ISP depending on the + * input format, and so the following mapping always holds true:: + * + * gain00 = R + * gain01 = Gr + * gain10 = Gb + * gain11 = B + * + * All of the gains are stored in Q4.8 format. + * + * header.type should be set to one of either MALI_C55_PARAM_BLOCK_AWB_GAINS or + * MALI_C55_PARAM_BLOCK_AWB_GAINS_AEXP from :c:type:`mali_c55_param_block_type`. + * + * @header: The Mali-C55 parameters block header + * @gain00: Multiplier for colour channel 00 + * @gain01: Multiplier for colour channel 01 + * @gain10: Multiplier for colour channel 10 + * @gain11: Multiplier for colour channel 11 + */ +struct mali_c55_params_awb_gains { + struct v4l2_isp_params_block_header header; + __u16 gain00; + __u16 gain01; + __u16 gain10; + __u16 gain11; +}; + +/** + * enum mali_c55_params_awb_tap_points - Tap points for the AWB statistics + * @MALI_C55_AWB_STATS_TAP_PF: Immediately after the Purple Fringe block + * @MALI_C55_AWB_STATS_TAP_CNR: Immediately after the CNR block + */ +enum mali_c55_params_awb_tap_points { + MALI_C55_AWB_STATS_TAP_PF = 0, + MALI_C55_AWB_STATS_TAP_CNR, +}; + +/** + * struct mali_c55_params_awb_config - Stats settings for auto-white balance + * + * This struct allows the configuration of the statistics generated for auto + * white balance. Pixel intensity limits can be set to exclude overly bright or + * dark regions of an image from the statistics entirely. Colour ratio minima + * and maxima can be set to discount pixels who's ratios fall outside the + * defined boundaries; there are two sets of registers to do this - the + * "min/max" ratios which bound a region and the "high/low" ratios which further + * trim the upper and lower ratios. For example with the boundaries configured + * as follows, only pixels whos colour ratios falls into the region marked "A" + * would be counted:: + * + * cr_high + * 2.0 | | + * | cb_max --> _________________________v_____ + * 1.8 | | \ | + * | | \ | + * 1.6 | | \ | + * | | \ | + * c 1.4 | cb_low -->|\ A \|<-- cb_high + * b | | \ | + * 1.2 | | \ | + * r | | \ | + * a 1.0 | cb_min --> |____\_________________________| + * t | ^ ^ ^ + * i 0.8 | | | | + * o | cr_min | cr_max + * s 0.6 | | + * | cr_low + * 0.4 | + * | + * 0.2 | + * | + * 0.0 |_______________________________________________________________ + * 0.0 0.2 0.4 0.6 0.8 1.0 1.2 1.4 1.6 1.8 2.0 + * cr ratios + * + * header.type should be set to MALI_C55_PARAM_BLOCK_AWB_CONFIG from + * :c:type:`mali_c55_param_block_type` for this block. + * + * @header: The Mali-C55 parameters block header + * @tap_point: The tap point from enum mali_c55_params_awb_tap_points + * @stats_mode: AWB statistics collection mode, see :c:type:`mali_c55_awb_stats_mode` + * @white_level: Upper pixel intensity (I.E. raw pixel values) limit + * @black_level: Lower pixel intensity (I.E. raw pixel values) limit + * @cr_max: Maximum R/G ratio (Q4.8 format) + * @cr_min: Minimum R/G ratio (Q4.8 format) + * @cb_max: Maximum B/G ratio (Q4.8 format) + * @cb_min: Minimum B/G ratio (Q4.8 format) + * @nodes_used_horiz: Number of active zones horizontally [0..15] + * @nodes_used_vert: Number of active zones vertically [0..15] + * @cr_high: R/G ratio trim high (Q4.8 format) + * @cr_low: R/G ratio trim low (Q4.8 format) + * @cb_high: B/G ratio trim high (Q4.8 format) + * @cb_low: B/G ratio trim low (Q4.8 format) + */ +struct mali_c55_params_awb_config { + struct v4l2_isp_params_block_header header; + __u8 tap_point; + __u8 stats_mode; + __u16 white_level; + __u16 black_level; + __u16 cr_max; + __u16 cr_min; + __u16 cb_max; + __u16 cb_min; + __u8 nodes_used_horiz; + __u8 nodes_used_vert; + __u16 cr_high; + __u16 cr_low; + __u16 cb_high; + __u16 cb_low; +}; + +#define MALI_C55_NUM_MESH_SHADING_ELEMENTS 3072 + +/** + * struct mali_c55_params_mesh_shading_config - Mesh shading configuration + * + * The mesh shading correction module allows programming a separate table of + * either 16x16 or 32x32 node coefficients for 3 different light sources. The + * final correction coefficients applied are computed by blending the + * coefficients from two tables together. + * + * A page of 1024 32-bit integers is associated to each colour channel, with + * pages stored consecutively in memory. Each 32-bit integer packs 3 8-bit + * correction coefficients for a single node, one for each of the three light + * sources. The 8 most significant bits are unused. The following table + * describes the layout:: + * + * +----------- Page (Colour Plane) 0 -------------+ + * | @mesh[i] | Mesh Point | Bits | Light Source | + * +-----------+------------+-------+--------------+ + * | 0 | 0,0 | 16,23 | LS2 | + * | | | 08-15 | LS1 | + * | | | 00-07 | LS0 | + * +-----------+------------+-------+--------------+ + * | 1 | 0,1 | 16,23 | LS2 | + * | | | 08-15 | LS1 | + * | | | 00-07 | LS0 | + * +-----------+------------+-------+--------------+ + * | ... | ... | ... | ... | + * +-----------+------------+-------+--------------+ + * | 1023 | 31,31 | 16,23 | LS2 | + * | | | 08-15 | LS1 | + * | | | 00-07 | LS0 | + * +----------- Page (Colour Plane) 1 -------------+ + * | @mesh[i] | Mesh Point | Bits | Light Source | + * +-----------+------------+-------+--------------+ + * | 1024 | 0,0 | 16,23 | LS2 | + * | | | 08-15 | LS1 | + * | | | 00-07 | LS0 | + * +-----------+------------+-------+--------------+ + * | 1025 | 0,1 | 16,23 | LS2 | + * | | | 08-15 | LS1 | + * | | | 00-07 | LS0 | + * +-----------+------------+-------+--------------+ + * | ... | ... | ... | ... | + * +-----------+------------+-------+--------------+ + * | 2047 | 31,31 | 16,23 | LS2 | + * | | | 08-15 | LS1 | + * | | | 00-07 | LS0 | + * +----------- Page (Colour Plane) 2 -------------+ + * | @mesh[i] | Mesh Point | Bits | Light Source | + * +-----------+------------+-------+--------------+ + * | 2048 | 0,0 | 16,23 | LS2 | + * | | | 08-15 | LS1 | + * | | | 00-07 | LS0 | + * +-----------+------------+-------+--------------+ + * | 2049 | 0,1 | 16,23 | LS2 | + * | | | 08-15 | LS1 | + * | | | 00-07 | LS0 | + * +-----------+------------+-------+--------------+ + * | ... | ... | ... | ... | + * +-----------+------------+-------+--------------+ + * | 3071 | 31,31 | 16,23 | LS2 | + * | | | 08-15 | LS1 | + * | | | 00-07 | LS0 | + * +-----------+------------+-------+--------------+ + * + * The @mesh_scale member determines the precision and minimum and maximum gain. + * For example if @mesh_scale is 0 and therefore selects 0 - 2x gain, a value of + * 0 in a coefficient means 0.0 gain, a value of 128 means 1.0 gain and 255 + * means 2.0 gain. + * + * header.type should be set to MALI_C55_PARAM_MESH_SHADING_CONFIG from + * :c:type:`mali_c55_param_block_type` for this block. + * + * @header: The Mali-C55 parameters block header + * @mesh_show: Output the mesh data rather than image data + * @mesh_scale: Set the precision and maximum gain range of mesh shading + * - 0 = 0-2x gain + * - 1 = 0-4x gain + * - 2 = 0-8x gain + * - 3 = 0-16x gain + * - 4 = 1-2x gain + * - 5 = 1-3x gain + * - 6 = 1-5x gain + * - 7 = 1-9x gain + * @mesh_page_r: Mesh page select for red colour plane [0..2] + * @mesh_page_g: Mesh page select for green colour plane [0..2] + * @mesh_page_b: Mesh page select for blue colour plane [0..2] + * @mesh_width: Number of horizontal nodes minus 1 [15,31] + * @mesh_height: Number of vertical nodes minus 1 [15,31] + * @mesh: Mesh shading correction tables + */ +struct mali_c55_params_mesh_shading_config { + struct v4l2_isp_params_block_header header; + __u8 mesh_show; + __u8 mesh_scale; + __u8 mesh_page_r; + __u8 mesh_page_g; + __u8 mesh_page_b; + __u8 mesh_width; + __u8 mesh_height; + __u32 mesh[MALI_C55_NUM_MESH_SHADING_ELEMENTS]; +}; + +/** enum mali_c55_params_mesh_alpha_bank - Mesh shading table bank selection + * @MALI_C55_MESH_ALPHA_BANK_LS0_AND_LS1 - Select Light Sources 0 and 1 + * @MALI_C55_MESH_ALPHA_BANK_LS1_AND_LS2 - Select Light Sources 1 and 2 + * @MALI_C55_MESH_ALPHA_BANK_LS0_AND_LS2 - Select Light Sources 0 and 2 + */ +enum mali_c55_params_mesh_alpha_bank { + MALI_C55_MESH_ALPHA_BANK_LS0_AND_LS1 = 0, + MALI_C55_MESH_ALPHA_BANK_LS1_AND_LS2 = 1, + MALI_C55_MESH_ALPHA_BANK_LS0_AND_LS2 = 4 +}; + +/** + * struct mali_c55_params_mesh_shading_selection - Mesh table selection + * + * The module computes the final correction coefficients by blending the ones + * from two light source tables, which are selected (independently for each + * colour channel) by the @mesh_alpha_bank_r/g/b fields. + * + * The final blended coefficients for each node are calculated using the + * following equation: + * + * Final coefficient = (a * LS\ :sub:`b`\ + (256 - a) * LS\ :sub:`a`\) / 256 + * + * Where a is the @mesh_alpha_r/g/b value, and LS\ :sub:`a`\ and LS\ :sub:`b`\ + * are the node cofficients for the two tables selected by the + * @mesh_alpha_bank_r/g/b value. + * + * The scale of the applied correction may also be controlled by tuning the + * @mesh_strength member. This is a modifier to the final coefficients which can + * be used to globally reduce the gains applied. + * + * header.type should be set to MALI_C55_PARAM_MESH_SHADING_SELECTION from + * :c:type:`mali_c55_param_block_type` for this block. + * + * @header: The Mali-C55 parameters block header + * @mesh_alpha_bank_r: Red mesh table select (c:type:`enum mali_c55_params_mesh_alpha_bank`) + * @mesh_alpha_bank_g: Green mesh table select (c:type:`enum mali_c55_params_mesh_alpha_bank`) + * @mesh_alpha_bank_b: Blue mesh table select (c:type:`enum mali_c55_params_mesh_alpha_bank`) + * @mesh_alpha_r: Blend coefficient for R [0..255] + * @mesh_alpha_g: Blend coefficient for G [0..255] + * @mesh_alpha_b: Blend coefficient for B [0..255] + * @mesh_strength: Mesh strength in Q4.12 format [0..4096] + */ +struct mali_c55_params_mesh_shading_selection { + struct v4l2_isp_params_block_header header; + __u8 mesh_alpha_bank_r; + __u8 mesh_alpha_bank_g; + __u8 mesh_alpha_bank_b; + __u8 mesh_alpha_r; + __u8 mesh_alpha_g; + __u8 mesh_alpha_b; + __u16 mesh_strength; +}; + +/** + * define MALI_C55_PARAMS_MAX_SIZE - Maximum size of all Mali C55 Parameters + * + * Though the parameters for the Mali-C55 are passed as optional blocks, the + * driver still needs to know the absolute maximum size so that it can allocate + * a buffer sized appropriately to accommodate userspace attempting to set all + * possible parameters in a single frame. + * + * Some structs are in this list multiple times. Where that's the case, it just + * reflects the fact that the same struct can be used with multiple different + * header types from :c:type:`mali_c55_param_block_type`. + */ +#define MALI_C55_PARAMS_MAX_SIZE \ + (sizeof(struct mali_c55_params_sensor_off_preshading) + \ + sizeof(struct mali_c55_params_aexp_hist) + \ + sizeof(struct mali_c55_params_aexp_weights) + \ + sizeof(struct mali_c55_params_aexp_hist) + \ + sizeof(struct mali_c55_params_aexp_weights) + \ + sizeof(struct mali_c55_params_digital_gain) + \ + sizeof(struct mali_c55_params_awb_gains) + \ + sizeof(struct mali_c55_params_awb_config) + \ + sizeof(struct mali_c55_params_awb_gains) + \ + sizeof(struct mali_c55_params_mesh_shading_config) + \ + sizeof(struct mali_c55_params_mesh_shading_selection)) + #endif /* __UAPI_MALI_C55_CONFIG_H */ -- cgit v1.2.3 From f91bc8f61abf0e1d23108ae9871c60d7612a09b2 Mon Sep 17 00:00:00 2001 From: Magnus Kulke Date: Thu, 6 Nov 2025 14:13:31 -0800 Subject: mshv: Allow mappings that overlap in uaddr Currently the MSHV driver rejects mappings that would overlap in userspace. Some VMMs require the same memory to be mapped to different parts of the guest's address space, and so working around this restriction is difficult. The hypervisor itself doesn't prohibit mappings that overlap in uaddr, (really in SPA; system physical addresses), so supporting this in the driver doesn't require any extra work: only the checks need to be removed. Since no userspace code until now has been able to overlap regions in userspace, relaxing this constraint can't break any existing code. Signed-off-by: Magnus Kulke Signed-off-by: Nuno Das Neves Reviewed-by: Michael Kelley Signed-off-by: Wei Liu --- include/uapi/linux/mshv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h index 876bfe4e4227..374f75e198bc 100644 --- a/include/uapi/linux/mshv.h +++ b/include/uapi/linux/mshv.h @@ -89,7 +89,7 @@ enum { * @rsvd: MBZ * * Map or unmap a region of userspace memory to Guest Physical Addresses (GPA). - * Mappings can't overlap in GPA space or userspace. + * Mappings can't overlap in GPA space. * To unmap, these fields must match an existing mapping. */ struct mshv_user_mem_region { -- cgit v1.2.3 From c91fe5f162f278d4aa960d06d2dbc42f9857593a Mon Sep 17 00:00:00 2001 From: Muminul Islam Date: Thu, 13 Nov 2025 11:45:33 -0800 Subject: mshv: Extend create partition ioctl to support cpu features The existing mshv create partition ioctl does not provide a way to specify which cpu features are enabled in the guest. Instead, it attempts to enable all features and those that are not supported are silently disabled by the hypervisor. This was done to reduce unnecessary complexity and is sufficient for many cases. However, new scenarios require fine-grained control over these features. Define a new mshv_create_partition_v2 structure which supports passing the disabled processor and xsave feature bits through to the create partition hypercall directly. Introduce a new flag MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES which enables the new structure. If unset, the original mshv_create_partition struct is used, with the old behavior of enabling all features. Co-developed-by: Jinank Jain Signed-off-by: Jinank Jain Signed-off-by: Muminul Islam Signed-off-by: Nuno Das Neves Reviewed-by: Michael Kelley Signed-off-by: Wei Liu --- include/uapi/linux/mshv.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h index 374f75e198bc..b645d17cc531 100644 --- a/include/uapi/linux/mshv.h +++ b/include/uapi/linux/mshv.h @@ -26,6 +26,7 @@ enum { MSHV_PT_BIT_LAPIC, MSHV_PT_BIT_X2APIC, MSHV_PT_BIT_GPA_SUPER_PAGES, + MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES, MSHV_PT_BIT_COUNT, }; @@ -41,6 +42,8 @@ enum { * @pt_flags: Bitmask of 1 << MSHV_PT_BIT_* * @pt_isolation: MSHV_PT_ISOLATION_* * + * This is the initial/v1 version for backward compatibility. + * * Returns a file descriptor to act as a handle to a guest partition. * At this point the partition is not yet initialized in the hypervisor. * Some operations must be done with the partition in this state, e.g. setting @@ -52,6 +55,37 @@ struct mshv_create_partition { __u64 pt_isolation; }; +#define MSHV_NUM_CPU_FEATURES_BANKS 2 + +/** + * struct mshv_create_partition_v2 + * + * This is extended version of the above initial MSHV_CREATE_PARTITION + * ioctl and allows for following additional parameters: + * + * @pt_num_cpu_fbanks: Must be set to MSHV_NUM_CPU_FEATURES_BANKS. + * @pt_cpu_fbanks: Disabled processor feature banks array. + * @pt_disabled_xsave: Disabled xsave feature bits. + * + * pt_cpu_fbanks and pt_disabled_xsave are passed through as-is to the create + * partition hypercall. + * + * Returns : same as above original mshv_create_partition + */ +struct mshv_create_partition_v2 { + __u64 pt_flags; + __u64 pt_isolation; + __u16 pt_num_cpu_fbanks; + __u8 pt_rsvd[6]; /* MBZ */ + __u64 pt_cpu_fbanks[MSHV_NUM_CPU_FEATURES_BANKS]; + __u64 pt_rsvd1[2]; /* MBZ */ +#if defined(__x86_64__) + __u64 pt_disabled_xsave; +#else + __u64 pt_rsvd2; /* MBZ */ +#endif +} __packed; + /* /dev/mshv */ #define MSHV_CREATE_PARTITION _IOW(MSHV_IOCTL, 0x00, struct mshv_create_partition) -- cgit v1.2.3 From dd9896d41fdf1050934d6a46a1c5ca2164284e72 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Sat, 15 Nov 2025 19:06:26 +0100 Subject: ASoC: Intel: avs: Allow the topology to carry NHLT data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Typically the hardware configuration for I2S and DMIC devices resides in the Non-HDAudio Link Table (NHLT) that is part of the ACPI tree. As the NHLTs existing in the field are not always perfect, workaround mechanisms are provided to patch them. Currently the avs-driver is utilizing the ->blob_fmt override (see topology.h and struct avs_tplg_modcfg_ext) when there is a valid entry within a NHLT to configure the hardware for specific format but its descriptor (header) is invalid. A separate case is when there is no correct hardware configuration at all within the NHLT available in the system. Patching the header won't help and forcing ad-hoc BIOS updates for dated system is not feasible. Allowing the topology to carry the data is the solution of choice as replacing a userspace file that is part of /lib/firmware/intel/ is less invasive than BIOS update and solves the problem. Co-developed-by: Amadeusz Sławiński Signed-off-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20251115180627.3589520-2-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- include/uapi/sound/intel/avs/tokens.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/sound/intel/avs/tokens.h b/include/uapi/sound/intel/avs/tokens.h index f3ff6aae09a9..f7cbbfb00227 100644 --- a/include/uapi/sound/intel/avs/tokens.h +++ b/include/uapi/sound/intel/avs/tokens.h @@ -21,6 +21,7 @@ enum avs_tplg_token { AVS_TKN_MANIFEST_NUM_BINDINGS_U32 = 8, AVS_TKN_MANIFEST_NUM_CONDPATH_TMPLS_U32 = 9, AVS_TKN_MANIFEST_NUM_INIT_CONFIGS_U32 = 10, + AVS_TKN_MANIFEST_NUM_NHLT_CONFIGS_U32 = 11, /* struct avs_tplg_library */ AVS_TKN_LIBRARY_ID_U32 = 101, @@ -160,6 +161,10 @@ enum avs_tplg_token { AVS_TKN_INIT_CONFIG_ID_U32 = 2401, AVS_TKN_INIT_CONFIG_PARAM_U8 = 2402, AVS_TKN_INIT_CONFIG_LENGTH_U32 = 2403, + + /* struct avs_tplg_nhlt_config */ + AVS_TKN_NHLT_CONFIG_ID_U32 = 2501, + AVS_TKN_NHLT_CONFIG_SIZE_U32 = 2502, }; #endif -- cgit v1.2.3 From d5c8b7902a41625ea328b52c78ebe750fbf6fef7 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Sat, 15 Nov 2025 19:06:27 +0100 Subject: ASoC: Intel: avs: Honor NHLT override when setting up a path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case topology provides NHLT configuration, use it instead of relying on the table in ACPI tree. Only gateway-related modules e.g.: Copier care about the process. For those the order of fetching for hardware configuration becomes: 1) check if NHLT override is set, 2) check if NHLT descriptor override is set, 3) use NHLT from ACPI directly Such approach ensures no conflicts exist between 1) and 2) and that 1) always takes precedence. Co-developed-by: Amadeusz Sławiński Signed-off-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20251115180627.3589520-3-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- include/uapi/sound/intel/avs/tokens.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/sound/intel/avs/tokens.h b/include/uapi/sound/intel/avs/tokens.h index f7cbbfb00227..3ff6d9150822 100644 --- a/include/uapi/sound/intel/avs/tokens.h +++ b/include/uapi/sound/intel/avs/tokens.h @@ -125,6 +125,7 @@ enum avs_tplg_token { AVS_TKN_MOD_KCONTROL_ID_U32 = 1707, AVS_TKN_MOD_INIT_CONFIG_NUM_IDS_U32 = 1708, AVS_TKN_MOD_INIT_CONFIG_ID_U32 = 1709, + AVS_TKN_MOD_NHLT_CONFIG_ID_U32 = 1710, /* struct avs_tplg_path_template */ AVS_TKN_PATH_TMPL_ID_U32 = 1801, -- cgit v1.2.3 From ae8966b7b5bd69b86209cc34bcca1ba9f18b68e6 Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Thu, 6 Nov 2025 21:45:34 +1000 Subject: Input: rename INPUT_PROP_HAPTIC_TOUCHPAD to INPUT_PROP_PRESSUREPAD And expand it to encompass all pressure pads. Definition: "pressure pad" as used here as includes all touchpads that use physical pressure to convert to click, without physical hinges. Also called haptic touchpads in general parlance, Synaptics calls them ForcePads. Most (all?) pressure pads are currently advertised as INPUT_PROP_BUTTONPAD. The suggestion to identify them as pressure pads by defining the resolution on ABS_MT_PRESSURE has been in the docs since commit 20ccc8dd38a3 ("Documentation: input: define ABS_PRESSURE/ABS_MT_PRESSURE resolution as grams") but few devices provide this information. In userspace it's thus impossible to determine whether a device is a true pressure pad (pressure equals pressure) or a normal clickpad with (pressure equals finger size). Commit 7075ae4ac9db ("Input: add INPUT_PROP_HAPTIC_TOUCHPAD") introduces INPUT_PROP_HAPTIC_TOUCHPAD but restricted it to those touchpads that have support for userspace-controlled effects. Let's expand and rename that definition to include all pressure pad touchpads since those that do support FF effects can be identified by the presence of the FF_HAPTIC bit. This means: - clickpad: INPUT_PROP_BUTTONPAD - pressurepad: INPUT_PROP_BUTTONPAD + INPUT_PROP_PRESSUREPAD - pressurepad with configurable haptics: INPUT_PROP_BUTTONPAD + INPUT_PROP_PRESSUREPAD + FF_HAPTIC Signed-off-by: Peter Hutterer Acked-by: Benjamin Tissoires Link: https://patch.msgid.link/20251106114534.GA405512@tassie Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 9cd89bcc1d9c..30f3c9eaafaa 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -27,7 +27,7 @@ #define INPUT_PROP_TOPBUTTONPAD 0x04 /* softbuttons at top of pad */ #define INPUT_PROP_POINTING_STICK 0x05 /* is a pointing stick */ #define INPUT_PROP_ACCELEROMETER 0x06 /* has accelerometer */ -#define INPUT_PROP_HAPTIC_TOUCHPAD 0x07 /* is a haptic touchpad */ +#define INPUT_PROP_PRESSUREPAD 0x07 /* pressure triggers clicks */ #define INPUT_PROP_MAX 0x1f #define INPUT_PROP_CNT (INPUT_PROP_MAX + 1) -- cgit v1.2.3 From 491c5dc98b848c4781addd514caed95039e5366c Mon Sep 17 00:00:00 2001 From: Yael Chemla Date: Wed, 19 Nov 2025 22:48:15 +0200 Subject: net: ethtool: Add support for 1600Gbps speed Add support for 1600Gbps link modes based on 200Gbps per lane [1]. This includes the adopted IEEE 802.3dj copper and optical PMDs that use 200G/lane signaling [2]. Add the following PMD types: - KR8 (backplane) - CR8 (copper cable) - DR8 (SMF 500m) - DR8-2 (SMF 2km) These modes are defined in the 802.3dj specifications. References: [1] https://www.ieee802.org/3/dj/public/23_03/opsasnick_3dj_01a_2303.pdf [2] https://www.ieee802.org/3/dj/projdoc/objectives_P802d3dj_240314.pdf Signed-off-by: Yael Chemla Reviewed-by: Shahar Shitrit Signed-off-by: Tariq Toukan Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/1763585297-1243980-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 8bd5ea5469d9..eb7ff2602fbb 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2077,6 +2077,10 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT = 118, ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT = 119, ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT = 120, + ETHTOOL_LINK_MODE_1600000baseCR8_Full_BIT = 121, + ETHTOOL_LINK_MODE_1600000baseKR8_Full_BIT = 122, + ETHTOOL_LINK_MODE_1600000baseDR8_Full_BIT = 123, + ETHTOOL_LINK_MODE_1600000baseDR8_2_Full_BIT = 124, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS @@ -2190,6 +2194,7 @@ enum ethtool_link_mode_bit_indices { #define SPEED_200000 200000 #define SPEED_400000 400000 #define SPEED_800000 800000 +#define SPEED_1600000 1600000 #define SPEED_UNKNOWN -1 -- cgit v1.2.3 From 2a367002ed321e884276c3d7232a362ddd1bf7d6 Mon Sep 17 00:00:00 2001 From: Daniel Zahka Date: Tue, 18 Nov 2025 18:50:33 -0800 Subject: devlink: support default values for param-get and param-set Support querying and resetting to default param values. Introduce two new devlink netlink attrs: DEVLINK_ATTR_PARAM_VALUE_DEFAULT and DEVLINK_ATTR_PARAM_RESET_DEFAULT. The former is used to contain an optional parameter value inside of the param_value nested attribute. The latter is used in param-set requests from userspace to indicate that the driver should reset the param to its default value. To implement this, two new functions are added to the devlink driver api: devlink_param::get_default() and devlink_param::reset_default(). These callbacks allow drivers to implement default param actions for runtime and permanent cmodes. For driverinit params, the core latches the last value set by a driver via devl_param_driverinit_value_set(), and uses that as the default value for a param. Because default parameter values are optional, it would be impossible to discern whether or not a param of type bool has default value of false or not provided if the default value is encoded using a netlink flag type. For this reason, when a DEVLINK_PARAM_TYPE_BOOL has an associated default value, the default value is encoded using a u8 type. Signed-off-by: Daniel Zahka Link: https://patch.msgid.link/20251119025038.651131-4-daniel.zahka@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 157f11d3fb72..e7d6b6d13470 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -639,6 +639,9 @@ enum devlink_attr { DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD, /* u64 */ + DEVLINK_ATTR_PARAM_VALUE_DEFAULT, /* dynamic */ + DEVLINK_ATTR_PARAM_RESET_DEFAULT, /* flag */ + /* Add new attributes above here, update the spec in * Documentation/netlink/specs/devlink.yaml and re-generate * net/devlink/netlink_gen.c. -- cgit v1.2.3 From 5d74781ebc86c5fa9e9d6934024c505412de9b52 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 20 Nov 2025 11:28:29 +0200 Subject: vfio/pci: Add dma-buf export support for MMIO regions Add support for exporting PCI device MMIO regions through dma-buf, enabling safe sharing of non-struct page memory with controlled lifetime management. This allows RDMA and other subsystems to import dma-buf FDs and build them into memory regions for PCI P2P operations. The implementation provides a revocable attachment mechanism using dma-buf move operations. MMIO regions are normally pinned as BARs don't change physical addresses, but access is revoked when the VFIO device is closed or a PCI reset is issued. This ensures kernel self-defense against potentially hostile userspace. Currently VFIO can take MMIO regions from the device's BAR and map them into a PFNMAP VMA with special PTEs. This mapping type ensures the memory cannot be used with things like pin_user_pages(), hmm, and so on. In practice only the user process CPU and KVM can safely make use of these VMA. When VFIO shuts down these VMAs are cleaned by unmap_mapping_range() to prevent any UAF of the MMIO beyond driver unbind. However, VFIO type 1 has an insecure behavior where it uses follow_pfnmap_*() to fish a MMIO PFN out of a VMA and program it back into the IOMMU. This has a long history of enabling P2P DMA inside VMs, but has serious lifetime problems by allowing a UAF of the MMIO after the VFIO driver has been unbound. Introduce DMABUF as a new safe way to export a FD based handle for the MMIO regions. This can be consumed by existing DMABUF importers like RDMA or DRM without opening an UAF. A following series will add an importer to iommufd to obsolete the type 1 code and allow safe UAF-free MMIO P2P in VM cases. DMABUF has a built in synchronous invalidation mechanism called move_notify. VFIO keeps track of all drivers importing its MMIO and can invoke a synchronous invalidation callback to tell the importing drivers to DMA unmap and forget about the MMIO pfns. This process is being called revoke. This synchronous invalidation fully prevents any lifecycle problems. VFIO will do this before unbinding its driver ensuring there is no UAF of the MMIO beyond the driver lifecycle. Further, VFIO has additional behavior to block access to the MMIO during things like Function Level Reset. This is because some poor platforms may experience a MCE type crash when touching MMIO of a PCI device that is undergoing a reset. Today this is done by using unmap_mapping_range() on the VMAs. Extend that into the DMABUF world and temporarily revoke the MMIO from the DMABUF importers during FLR as well. This will more robustly prevent an errant P2P from possibly upsetting the platform. A DMABUF FD is a preferred handle for MMIO compared to using something like a pgmap because: - VFIO is supported, including its P2P feature, on archs that don't support pgmap - PCI devices have all sorts of BAR sizes, including ones smaller than a section so a pgmap cannot always be created - It is undesirable to waste a lot of memory for struct pages, especially for a case like a GPU with ~100GB of BAR size - We want a synchronous revoke semantic to support FLR with light hardware requirements Use the P2P subsystem to help generate the DMA mapping. This is a significant upgrade over the abuse of dma_map_resource() that has historically been used by DMABUF exporters. Experience with an OOT version of this patch shows that real systems do need this. This approach deals with all the P2P scenarios: - Non-zero PCI bus_offset - ACS flags routing traffic to the IOMMU - ACS flags that bypass the IOMMU - though vfio noiommu is required to hit this. There will be further work to formalize the revoke semantic in DMABUF. For now this acts like a move_notify dynamic exporter where importer fault handling will get a failure when they attempt to map. This means that only fully restartable fault capable importers can import the VFIO DMABUFs. A future revoke semantic should open this up to more HW as the HW only needs to invalidate, not handle restartable faults. Signed-off-by: Jason Gunthorpe Signed-off-by: Vivek Kasireddy Reviewed-by: Kevin Tian Signed-off-by: Leon Romanovsky Acked-by: Ankit Agrawal Link: https://lore.kernel.org/r/20251120-dmabuf-vfio-v9-10-d7f71607f371@nvidia.com Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 75100bf009ba..ac2329f24141 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -14,6 +14,7 @@ #include #include +#include #define VFIO_API_VERSION 0 @@ -1478,6 +1479,33 @@ struct vfio_device_feature_bus_master { }; #define VFIO_DEVICE_FEATURE_BUS_MASTER 10 +/** + * Upon VFIO_DEVICE_FEATURE_GET create a dma_buf fd for the + * regions selected. + * + * open_flags are the typical flags passed to open(2), eg O_RDWR, O_CLOEXEC, + * etc. offset/length specify a slice of the region to create the dmabuf from. + * nr_ranges is the total number of (P2P DMA) ranges that comprise the dmabuf. + * + * flags should be 0. + * + * Return: The fd number on success, -1 and errno is set on failure. + */ +#define VFIO_DEVICE_FEATURE_DMA_BUF 11 + +struct vfio_region_dma_range { + __u64 offset; + __u64 length; +}; + +struct vfio_device_feature_dma_buf { + __u32 region_index; + __u32 open_flags; + __u32 flags; + __u32 nr_ranges; + struct vfio_region_dma_range dma_ranges[] __counted_by(nr_ranges); +}; + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- cgit v1.2.3 From 8e8678e740ecde2ae4a0404fd9b4ed2b726e236d Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 8 Jul 2025 12:57:57 +0000 Subject: KVM: s390: Add capability that forwards operation exceptions Setting KVM_CAP_S390_USER_OPEREXEC will forward all operation exceptions to user space. This also includes the 0x0000 instructions managed by KVM_CAP_S390_USER_INSTR0. It's helpful if user space wants to emulate instructions which do not (yet) have an opcode. While we're at it refine the documentation for KVM_CAP_S390_USER_INSTR0. Signed-off-by: Janosch Frank Reviewed-by: Claudio Imbrenda Acked-by: Christian Borntraeger Signed-off-by: Janosch Frank --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 52f6000ab020..8ab07396ce3b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -963,6 +963,7 @@ struct kvm_enable_cap { #define KVM_CAP_RISCV_MP_STATE_RESET 242 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 #define KVM_CAP_GUEST_MEMFD_FLAGS 244 +#define KVM_CAP_S390_USER_OPEREXEC 245 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3 From a67df6d1b939ca98e1ad403f53e3ee57299b8c44 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 11 Nov 2025 14:46:10 +0100 Subject: uapi: cdc.h: cleanly provide for more interfaces and countries The spec requires at least one interface respectively country. It allows multiple ones. This needs to be clearly said in the UAPI. This is subject to sanity checking in cdc_parse_cdc_header(), thus we can trust the length. Signed-off-by: Oliver Neukum Link: https://patch.msgid.link/20251111134641.4118827-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/cdc.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/usb/cdc.h b/include/uapi/linux/usb/cdc.h index 1924cf665448..7bd5d12d8b26 100644 --- a/include/uapi/linux/usb/cdc.h +++ b/include/uapi/linux/usb/cdc.h @@ -104,8 +104,10 @@ struct usb_cdc_union_desc { __u8 bDescriptorSubType; __u8 bMasterInterface0; - __u8 bSlaveInterface0; - /* ... and there could be other slave interfaces */ + union { + __u8 bSlaveInterface0; + __DECLARE_FLEX_ARRAY(__u8, bSlaveInterfaces); + }; } __attribute__ ((packed)); /* "Country Selection Functional Descriptor" from CDC spec 5.2.3.9 */ @@ -115,8 +117,10 @@ struct usb_cdc_country_functional_desc { __u8 bDescriptorSubType; __u8 iCountryCodeRelDate; - __le16 wCountyCode0; - /* ... and there can be a lot of country codes */ + union { + __le16 wCountryCode0; + __DECLARE_FLEX_ARRAY(__le16, wCountryCodes); + }; } __attribute__ ((packed)); /* "Network Channel Terminal Functional Descriptor" from CDC spec 5.2.3.11 */ -- cgit v1.2.3 From cbbfba4847b8a5299d36e002bf864b21bb83295d Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 11 Nov 2025 11:37:55 +0000 Subject: perf: Add perf_event_attr::config4 Arm FEAT_SPE_FDS adds the ability to filter on the data source of a packet using another 64-bits of event filtering control. As the existing perf_event_attr::configN fields are all used up for SPE PMU, an additional field is needed. Add a new 'config4' field. Reviewed-by: Leo Yan Tested-by: Leo Yan Reviewed-by: Ian Rogers Acked-by: Peter Zijlstra (Intel) Signed-off-by: James Clark Signed-off-by: Will Deacon --- include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 78a362b80027..0d0ed85ad8cb 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -382,6 +382,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */ #define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */ #define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */ +#define PERF_ATTR_SIZE_VER9 144 /* add: config4 */ /* * 'struct perf_event_attr' contains various attributes that define @@ -543,6 +544,7 @@ struct perf_event_attr { __u64 sig_data; __u64 config3; /* extension of config2 */ + __u64 config4; /* extension of config3 */ }; /* -- cgit v1.2.3 From e6ab504633e4c06e35377ecf3c8cbc304de79858 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Mon, 17 Nov 2025 15:40:21 +0100 Subject: staging: gpib: Destage gpib Move the gpib drivers out of staging and into the "real" part of the kernel. This entails: - Remove the gpib Kconfig menu and Makefile build rule from staging. - Remove gpib/uapi from the header file search path in subdir-ccflags of the gpib Makefile - move the gpib/uapi files to include/uapi/linux - Move the gpib tree out of staging to drivers. - Remove the word "Linux" from the gpib Kconfig file. - Add the gpib Kconfig menu and Makefile build rule to drivers Signed-off-by: Dave Penkler Link: https://patch.msgid.link/20251117144021.23569-5-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/gpib.h | 104 +++++++++++++++++++++++++ include/uapi/linux/gpib_ioctl.h | 167 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 271 insertions(+) create mode 100644 include/uapi/linux/gpib.h create mode 100644 include/uapi/linux/gpib_ioctl.h (limited to 'include/uapi') diff --git a/include/uapi/linux/gpib.h b/include/uapi/linux/gpib.h new file mode 100644 index 000000000000..2a7f5eeb9777 --- /dev/null +++ b/include/uapi/linux/gpib.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +/*************************************************************************** + * copyright : (C) 2002 by Frank Mori Hess + ***************************************************************************/ + +#ifndef _GPIB_H +#define _GPIB_H + +#define GPIB_MAX_NUM_BOARDS 16 +#define GPIB_MAX_NUM_DESCRIPTORS 0x1000 + +enum ibsta_bit_numbers { + DCAS_NUM = 0, + DTAS_NUM = 1, + LACS_NUM = 2, + TACS_NUM = 3, + ATN_NUM = 4, + CIC_NUM = 5, + REM_NUM = 6, + LOK_NUM = 7, + CMPL_NUM = 8, + EVENT_NUM = 9, + SPOLL_NUM = 10, + RQS_NUM = 11, + SRQI_NUM = 12, + END_NUM = 13, + TIMO_NUM = 14, + ERR_NUM = 15 +}; + +/* IBSTA status bits (returned by all functions) */ +enum ibsta_bits { + DCAS = (1 << DCAS_NUM), /* device clear state */ + DTAS = (1 << DTAS_NUM), /* device trigger state */ + LACS = (1 << LACS_NUM), /* GPIB interface is addressed as Listener */ + TACS = (1 << TACS_NUM), /* GPIB interface is addressed as Talker */ + ATN = (1 << ATN_NUM), /* Attention is asserted */ + CIC = (1 << CIC_NUM), /* GPIB interface is Controller-in-Charge */ + REM = (1 << REM_NUM), /* remote state */ + LOK = (1 << LOK_NUM), /* lockout state */ + CMPL = (1 << CMPL_NUM), /* I/O is complete */ + EVENT = (1 << EVENT_NUM), /* DCAS, DTAS, or IFC has occurred */ + SPOLL = (1 << SPOLL_NUM), /* board serial polled by busmaster */ + RQS = (1 << RQS_NUM), /* Device requesting service */ + SRQI = (1 << SRQI_NUM), /* SRQ is asserted */ + END = (1 << END_NUM), /* EOI or EOS encountered */ + TIMO = (1 << TIMO_NUM), /* Time limit on I/O or wait function exceeded */ + ERR = (1 << ERR_NUM), /* Function call terminated on error */ + + device_status_mask = ERR | TIMO | END | CMPL | RQS, + board_status_mask = ERR | TIMO | END | CMPL | SPOLL | + EVENT | LOK | REM | CIC | ATN | TACS | LACS | DTAS | DCAS | SRQI, +}; + +/* End-of-string (EOS) modes for use with ibeos */ + +enum eos_flags { + EOS_MASK = 0x1c00, + REOS = 0x0400, /* Terminate reads on EOS */ + XEOS = 0x800, /* assert EOI when EOS char is sent */ + BIN = 0x1000 /* Do 8-bit compare on EOS */ +}; + +/* GPIB Bus Control Lines bit vector */ +enum bus_control_line { + VALID_DAV = 0x01, + VALID_NDAC = 0x02, + VALID_NRFD = 0x04, + VALID_IFC = 0x08, + VALID_REN = 0x10, + VALID_SRQ = 0x20, + VALID_ATN = 0x40, + VALID_EOI = 0x80, + VALID_ALL = 0xff, + BUS_DAV = 0x0100, /* DAV line status bit */ + BUS_NDAC = 0x0200, /* NDAC line status bit */ + BUS_NRFD = 0x0400, /* NRFD line status bit */ + BUS_IFC = 0x0800, /* IFC line status bit */ + BUS_REN = 0x1000, /* REN line status bit */ + BUS_SRQ = 0x2000, /* SRQ line status bit */ + BUS_ATN = 0x4000, /* ATN line status bit */ + BUS_EOI = 0x8000 /* EOI line status bit */ +}; + +enum ppe_bits { + PPC_DISABLE = 0x10, + PPC_SENSE = 0x8, /* parallel poll sense bit */ + PPC_DIO_MASK = 0x7 +}; + +enum { + request_service_bit = 0x40, +}; + +enum gpib_events { + EVENT_NONE = 0, + EVENT_DEV_TRG = 1, + EVENT_DEV_CLR = 2, + EVENT_IFC = 3 +}; + +#endif /* _GPIB_H */ + diff --git a/include/uapi/linux/gpib_ioctl.h b/include/uapi/linux/gpib_ioctl.h new file mode 100644 index 000000000000..d544d8e4362c --- /dev/null +++ b/include/uapi/linux/gpib_ioctl.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +/*************************************************************************** + * copyright : (C) 2002 by Frank Mori Hess + ***************************************************************************/ + +#ifndef _GPIB_IOCTL_H +#define _GPIB_IOCTL_H + +#include +#include + +#define GPIB_CODE 160 + +struct gpib_board_type_ioctl { + char name[100]; +}; + +/* argument for read/write/command ioctls */ +struct gpib_read_write_ioctl { + __u64 buffer_ptr; + __u32 requested_transfer_count; + __u32 completed_transfer_count; + __s32 end; /* end flag return for reads, end io suppression request for cmd*/ + __s32 handle; +}; + +struct gpib_open_dev_ioctl { + __u32 handle; + __u32 pad; + __s32 sad; + __u32 is_board; +}; + +struct gpib_close_dev_ioctl { + __u32 handle; +}; + +struct gpib_serial_poll_ioctl { + __u32 pad; + __s32 sad; + __u8 status_byte; + __u8 padding[3]; /* align to 32 bit boundary */ +}; + +struct gpib_eos_ioctl { + __s32 eos; + __s32 eos_flags; +}; + +struct gpib_wait_ioctl { + __s32 handle; + __s32 wait_mask; + __s32 clear_mask; + __s32 set_mask; + __s32 ibsta; + __s32 pad; + __s32 sad; + __u32 usec_timeout; +}; + +struct gpib_online_ioctl { + __u64 init_data_ptr; + __s32 init_data_length; + __s32 online; +}; + +struct gpib_spoll_bytes_ioctl { + __u32 num_bytes; + __u32 pad; + __s32 sad; +}; + +struct gpib_board_info_ioctl { + __u32 pad; + __s32 sad; + __s32 parallel_poll_configuration; + __s32 autopolling; + __s32 is_system_controller; + __u32 t1_delay; + unsigned ist : 1; + unsigned no_7_bit_eos : 1; + unsigned padding :30; /* align to 32 bit boundary */ +}; + +struct gpib_select_pci_ioctl { + __s32 pci_bus; + __s32 pci_slot; +}; + +struct gpib_ppoll_config_ioctl { + __u8 config; + unsigned set_ist : 1; + unsigned clear_ist : 1; + unsigned padding :22; /* align to 32 bit boundary */ +}; + +struct gpib_pad_ioctl { + __u32 handle; + __u32 pad; +}; + +struct gpib_sad_ioctl { + __u32 handle; + __s32 sad; +}; + +/* select a piece of hardware to attach by its sysfs device path */ +struct gpib_select_device_path_ioctl { + char device_path[0x1000]; +}; + +/* update status byte and request service */ +struct gpib_request_service2 { + __u8 status_byte; + __u8 padding[3]; /* align to 32 bit boundary */ + __s32 new_reason_for_service; +}; + +/* Standard functions. */ +enum gpib_ioctl { + IBRD = _IOWR(GPIB_CODE, 100, struct gpib_read_write_ioctl), + IBWRT = _IOWR(GPIB_CODE, 101, struct gpib_read_write_ioctl), + IBCMD = _IOWR(GPIB_CODE, 102, struct gpib_read_write_ioctl), + IBOPENDEV = _IOWR(GPIB_CODE, 3, struct gpib_open_dev_ioctl), + IBCLOSEDEV = _IOW(GPIB_CODE, 4, struct gpib_close_dev_ioctl), + IBWAIT = _IOWR(GPIB_CODE, 5, struct gpib_wait_ioctl), + IBRPP = _IOWR(GPIB_CODE, 6, __u8), + + IBSIC = _IOW(GPIB_CODE, 9, __u32), + IBSRE = _IOW(GPIB_CODE, 10, __s32), + IBGTS = _IO(GPIB_CODE, 11), + IBCAC = _IOW(GPIB_CODE, 12, __s32), + IBLINES = _IOR(GPIB_CODE, 14, __s16), + IBPAD = _IOW(GPIB_CODE, 15, struct gpib_pad_ioctl), + IBSAD = _IOW(GPIB_CODE, 16, struct gpib_sad_ioctl), + IBTMO = _IOW(GPIB_CODE, 17, __u32), + IBRSP = _IOWR(GPIB_CODE, 18, struct gpib_serial_poll_ioctl), + IBEOS = _IOW(GPIB_CODE, 19, struct gpib_eos_ioctl), + IBRSV = _IOW(GPIB_CODE, 20, __u8), + CFCBASE = _IOW(GPIB_CODE, 21, __u64), + CFCIRQ = _IOW(GPIB_CODE, 22, __u32), + CFCDMA = _IOW(GPIB_CODE, 23, __u32), + CFCBOARDTYPE = _IOW(GPIB_CODE, 24, struct gpib_board_type_ioctl), + + IBMUTEX = _IOW(GPIB_CODE, 26, __s32), + IBSPOLL_BYTES = _IOWR(GPIB_CODE, 27, struct gpib_spoll_bytes_ioctl), + IBPPC = _IOW(GPIB_CODE, 28, struct gpib_ppoll_config_ioctl), + IBBOARD_INFO = _IOR(GPIB_CODE, 29, struct gpib_board_info_ioctl), + + IBQUERY_BOARD_RSV = _IOR(GPIB_CODE, 31, __s32), + IBSELECT_PCI = _IOWR(GPIB_CODE, 32, struct gpib_select_pci_ioctl), + IBEVENT = _IOR(GPIB_CODE, 33, __s16), + IBRSC = _IOW(GPIB_CODE, 34, __s32), + IB_T1_DELAY = _IOW(GPIB_CODE, 35, __u32), + IBLOC = _IO(GPIB_CODE, 36), + + IBAUTOSPOLL = _IOW(GPIB_CODE, 38, __s16), + IBONL = _IOW(GPIB_CODE, 39, struct gpib_online_ioctl), + IBPP2_SET = _IOW(GPIB_CODE, 40, __s16), + IBPP2_GET = _IOR(GPIB_CODE, 41, __s16), + IBSELECT_DEVICE_PATH = _IOW(GPIB_CODE, 43, struct gpib_select_device_path_ioctl), + /* 44 was IBSELECT_SERIAL_NUMBER */ + IBRSV2 = _IOW(GPIB_CODE, 45, struct gpib_request_service2) +}; + +#endif /* _GPIB_IOCTL_H */ -- cgit v1.2.3 From 6b1ac78dd0f29fe66421c460c12ec15e45af38c3 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 13 Oct 2025 10:22:04 +1030 Subject: btrfs: implement shutdown ioctl The shutdown ioctl should follow the XFS one, which use magic number 'X', and ioctl number 125, with a uint32 as flags. For now btrfs don't distinguish DEFAULT and LOGFLUSH flags (just like f2fs), both will freeze the fs first (implies committing the current transaction), setting the SHUTDOWN flag and finally thaw the fs. For NOLOGFLUSH flag, the freeze/thaw part is skipped thus the current transaction is aborted. The new shutdown ioctl is hidden behind experimental features for more testing. Reviewed-by: Johannes Thumshirn Reviewed-by: Anand Jain Tested-by: Anand Jain Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 8e710bbb688e..e8fd92789423 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -1099,6 +1099,12 @@ enum btrfs_err_code { BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET, }; +/* Flags for IOC_SHUTDOWN, must match XFS_FSOP_GOING_FLAGS_* flags. */ +#define BTRFS_SHUTDOWN_FLAGS_DEFAULT 0x0 +#define BTRFS_SHUTDOWN_FLAGS_LOGFLUSH 0x1 +#define BTRFS_SHUTDOWN_FLAGS_NOLOGFLUSH 0x2 +#define BTRFS_SHUTDOWN_FLAGS_LAST 0x3 + #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ @@ -1220,6 +1226,9 @@ enum btrfs_err_code { #define BTRFS_IOC_SUBVOL_SYNC_WAIT _IOW(BTRFS_IOCTL_MAGIC, 65, \ struct btrfs_ioctl_subvol_wait) +/* Shutdown ioctl should follow XFS's interfaces, thus not using btrfs magic. */ +#define BTRFS_IOC_SHUTDOWN _IOR('X', 125, __u32) + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 37d369fa97cc0774ea4eab726d16bcb5fbe3a104 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 23 Nov 2025 22:05:15 +0000 Subject: fs: Add uoff_t In a recent commit, I inadvertently changed a comparison from being an unsigned comparison (on 64-bit systems) to being a signed comparison (which it had always been on 32-bit systems). This led to a sporadic fstests failure. To make sure this comparison is always unsigned, introduce a new type, uoff_t which is the unsigned version of loff_t. Generally file sizes are restricted to being a signed integer, but in these two places it is convenient to pass -1 to indicate "up to the end of the file". Signed-off-by: Matthew Wilcox (Oracle) Link: https://patch.msgid.link/20251123220518.1447261-1-willy@infradead.org Signed-off-by: Christian Brauner --- include/uapi/asm-generic/posix_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/posix_types.h b/include/uapi/asm-generic/posix_types.h index b5f7594eee7a..0a90ad92dbf3 100644 --- a/include/uapi/asm-generic/posix_types.h +++ b/include/uapi/asm-generic/posix_types.h @@ -86,6 +86,7 @@ typedef struct { */ typedef __kernel_long_t __kernel_off_t; typedef long long __kernel_loff_t; +typedef unsigned long long __kernel_uoff_t; typedef __kernel_long_t __kernel_old_time_t; #ifndef __KERNEL__ typedef __kernel_long_t __kernel_time_t; -- cgit v1.2.3 From 1c6a92a5a5de7ebf94526dee7068926e6d5b1b01 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 24 Nov 2025 18:28:34 -0800 Subject: wifi: nl80211: vendor-cmd: intel: fix a blank kernel-doc line warning Delete an empty line prevent a kernel-doc warning: Warning: ../include/uapi/linux/nl80211-vnd-intel.h:86 bad line: Fixes: 3d2a2544eae9 ("nl80211: vendor-cmd: add Intel vendor commands for iwlmei usage") Signed-off-by: Randy Dunlap Link: https://patch.msgid.link/20251125022834.3171742-1-rdunlap@infradead.org Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211-vnd-intel.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211-vnd-intel.h b/include/uapi/linux/nl80211-vnd-intel.h index 4ed7d0b24512..79ccc9401d50 100644 --- a/include/uapi/linux/nl80211-vnd-intel.h +++ b/include/uapi/linux/nl80211-vnd-intel.h @@ -84,7 +84,6 @@ enum iwl_vendor_auth_akm_mode { * * @NUM_IWL_MVM_VENDOR_ATTR: number of vendor attributes * @MAX_IWL_MVM_VENDOR_ATTR: highest vendor attribute number - */ enum iwl_mvm_vendor_attr { __IWL_MVM_VENDOR_ATTR_INVALID = 0x00, -- cgit v1.2.3 From 68e83f3472667aac18d577587102f4bf77d0bd06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Thu, 20 Nov 2025 17:44:27 +0000 Subject: tools: ynl-gen: add regeneration comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a comment on regeneration to the generated files. The comment is placed after the YNL-GEN line[1], as to not interfere with ynl-regen.sh's detection logic. [1] and after the optional YNL-ARG line. Link: https://lore.kernel.org/r/aR5m174O7pklKrMR@zx2c4.com/ Suggested-by: Jason A. Donenfeld Signed-off-by: Asbjørn Sloth Tønnesen Acked-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251120174429.390574-3-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/uapi/linux/android/binder_netlink.h | 1 + include/uapi/linux/dpll.h | 1 + include/uapi/linux/ethtool_netlink_generated.h | 1 + include/uapi/linux/fou.h | 1 + include/uapi/linux/handshake.h | 1 + include/uapi/linux/if_team.h | 1 + include/uapi/linux/lockd_netlink.h | 1 + include/uapi/linux/mptcp_pm.h | 1 + include/uapi/linux/net_shaper.h | 1 + include/uapi/linux/netdev.h | 1 + include/uapi/linux/nfsd_netlink.h | 1 + include/uapi/linux/ovpn.h | 1 + include/uapi/linux/psp.h | 1 + 13 files changed, 13 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/android/binder_netlink.h b/include/uapi/linux/android/binder_netlink.h index b218f96d6668..bf69833c9a19 100644 --- a/include/uapi/linux/android/binder_netlink.h +++ b/include/uapi/linux/android/binder_netlink.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/binder.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_ANDROID_BINDER_NETLINK_H #define _UAPI_LINUX_ANDROID_BINDER_NETLINK_H diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index 69d35570ac4f..b7ff9c44f9aa 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/dpll.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_DPLL_H #define _UAPI_LINUX_DPLL_H diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index b71b175df46d..556a0c834df5 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/ethtool.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H #define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h index b5cd3e7b3775..bb6bef74d2d1 100644 --- a/include/uapi/linux/fou.h +++ b/include/uapi/linux/fou.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_FOU_H #define _UAPI_LINUX_FOU_H diff --git a/include/uapi/linux/handshake.h b/include/uapi/linux/handshake.h index 662e7de46c54..d7e40f594888 100644 --- a/include/uapi/linux/handshake.h +++ b/include/uapi/linux/handshake.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/handshake.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_HANDSHAKE_H #define _UAPI_LINUX_HANDSHAKE_H diff --git a/include/uapi/linux/if_team.h b/include/uapi/linux/if_team.h index a5c06243a435..f4cd839ae725 100644 --- a/include/uapi/linux/if_team.h +++ b/include/uapi/linux/if_team.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/team.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_IF_TEAM_H #define _UAPI_LINUX_IF_TEAM_H diff --git a/include/uapi/linux/lockd_netlink.h b/include/uapi/linux/lockd_netlink.h index 21c65aec3bc6..2d766a0fa6ea 100644 --- a/include/uapi/linux/lockd_netlink.h +++ b/include/uapi/linux/lockd_netlink.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/lockd.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_LOCKD_NETLINK_H #define _UAPI_LINUX_LOCKD_NETLINK_H diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h index bf44a5cf5b5a..c97d060ee90b 100644 --- a/include/uapi/linux/mptcp_pm.h +++ b/include/uapi/linux/mptcp_pm.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/mptcp_pm.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_MPTCP_PM_H #define _UAPI_LINUX_MPTCP_PM_H diff --git a/include/uapi/linux/net_shaper.h b/include/uapi/linux/net_shaper.h index d8834b59f7d7..3dd22c2930d9 100644 --- a/include/uapi/linux/net_shaper.h +++ b/include/uapi/linux/net_shaper.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/net_shaper.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_NET_SHAPER_H #define _UAPI_LINUX_NET_SHAPER_H diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 048c8de1a130..e0b579a1df4f 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_NETDEV_H #define _UAPI_LINUX_NETDEV_H diff --git a/include/uapi/linux/nfsd_netlink.h b/include/uapi/linux/nfsd_netlink.h index 887cbd12b695..e157e2009ea8 100644 --- a/include/uapi/linux/nfsd_netlink.h +++ b/include/uapi/linux/nfsd_netlink.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/nfsd.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_NFSD_NETLINK_H #define _UAPI_LINUX_NFSD_NETLINK_H diff --git a/include/uapi/linux/ovpn.h b/include/uapi/linux/ovpn.h index 680d1522dc87..959b41def61f 100644 --- a/include/uapi/linux/ovpn.h +++ b/include/uapi/linux/ovpn.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/ovpn.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_OVPN_H #define _UAPI_LINUX_OVPN_H diff --git a/include/uapi/linux/psp.h b/include/uapi/linux/psp.h index d8449c043ba1..a3a336488dc3 100644 --- a/include/uapi/linux/psp.h +++ b/include/uapi/linux/psp.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/psp.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_PSP_H #define _UAPI_LINUX_PSP_H -- cgit v1.2.3 From 60f511f443e552ef5b5cd79ec2b881f4323e19c9 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 26 Nov 2025 11:16:05 +0100 Subject: can: netlink: add CAN_CTRLMODE_RESTRICTED ISO 11898-1:2024 adds a new restricted operation mode. This mode is added as a mandatory feature for nodes which support CAN XL and is retrofitted as optional for legacy nodes (i.e. the ones which only support Classical CAN and CAN FD). The restricted operation mode is nearly the same as the listen only mode: the node can not send data frames or remote frames and can not send dominant bits if an error occurs. The only exception is that the node shall still send the acknowledgment bit. A second niche exception is that the node may still send a data frame containing a time reference message if the node is a primary time provider, but because the time provider feature is not yet implemented in the kernel, this second exception is not relevant to us at the moment. Add the CAN_CTRLMODE_RESTRICTED control mode flag and update the can_dev_dropped_skb() helper function accordingly. Finally, bail out if both CAN_CTRLMODE_LISTENONLY and CAN_CTRLMODE_RESTRICTED are provided. Signed-off-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp Link: https://patch.msgid.link/20251126-canxl-v8-4-e7e3eb74f889@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index ef62f56eaaef..fafd1cce4798 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -103,6 +103,7 @@ struct can_ctrlmode { #define CAN_CTRLMODE_CC_LEN8_DLC 0x100 /* Classic CAN DLC option */ #define CAN_CTRLMODE_TDC_AUTO 0x200 /* FD transceiver automatically calculates TDCV */ #define CAN_CTRLMODE_TDC_MANUAL 0x400 /* FD TDCV is manually set up by user */ +#define CAN_CTRLMODE_RESTRICTED 0x800 /* Restricted operation mode */ /* * CAN device statistics -- cgit v1.2.3 From e63281614747c73f25b708c75bc696c4e76f5588 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 26 Nov 2025 11:16:06 +0100 Subject: can: netlink: add initial CAN XL support CAN XL uses bittiming parameters different from Classical CAN and CAN FD. Thus, all the data bittiming parameters, including TDC, need to be duplicated for CAN XL. Add the CAN XL netlink interface for all the features which are common with CAN FD. Any new CAN XL specific features are added later on. The first time CAN XL is activated, the MTU is set by default to CANXL_MAX_MTU. The user may then configure a custom MTU within the CANXL_MIN_MTU to CANXL_MAX_MTU range, in which case, the custom MTU value will be kept as long as CAN XL remains active. Signed-off-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp Link: https://patch.msgid.link/20251126-canxl-v8-5-e7e3eb74f889@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/netlink.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index fafd1cce4798..c2c96c5978a8 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -104,6 +104,9 @@ struct can_ctrlmode { #define CAN_CTRLMODE_TDC_AUTO 0x200 /* FD transceiver automatically calculates TDCV */ #define CAN_CTRLMODE_TDC_MANUAL 0x400 /* FD TDCV is manually set up by user */ #define CAN_CTRLMODE_RESTRICTED 0x800 /* Restricted operation mode */ +#define CAN_CTRLMODE_XL 0x1000 /* CAN XL mode */ +#define CAN_CTRLMODE_XL_TDC_AUTO 0x2000 /* XL transceiver automatically calculates TDCV */ +#define CAN_CTRLMODE_XL_TDC_MANUAL 0x4000 /* XL TDCV is manually set up by user */ /* * CAN device statistics @@ -139,6 +142,10 @@ enum { IFLA_CAN_BITRATE_MAX, IFLA_CAN_TDC, /* FD */ IFLA_CAN_CTRLMODE_EXT, + IFLA_CAN_XL_DATA_BITTIMING, + IFLA_CAN_XL_DATA_BITTIMING_CONST, + IFLA_CAN_XL_DATA_BITRATE_CONST, + IFLA_CAN_XL_TDC, /* add new constants above here */ __IFLA_CAN_MAX, -- cgit v1.2.3 From 233134af208689c2d5d40896f5740473a74e3cb2 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 26 Nov 2025 11:16:07 +0100 Subject: can: netlink: add CAN_CTRLMODE_XL_TMS flag The Transceiver Mode Switching (TMS) indicates whether the CAN XL controller shall use the PWM or NRZ encoding during the data phase. The term "transceiver mode switching" is used in both ISO 11898-1 and CiA 612-2 (although only the latter one uses the abbreviation TMS). We adopt the same naming convention here for consistency. Add the CAN_CTRLMODE_XL_TMS flag to the list of the CAN control modes. Add can_validate_xl_flags() to check the coherency of the TMS flag. That function will be reused in upcoming changes to validate the other CAN XL flags. Signed-off-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp Link: https://patch.msgid.link/20251126-canxl-v8-6-e7e3eb74f889@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index c2c96c5978a8..ebafb091d80f 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -107,6 +107,7 @@ struct can_ctrlmode { #define CAN_CTRLMODE_XL 0x1000 /* CAN XL mode */ #define CAN_CTRLMODE_XL_TDC_AUTO 0x2000 /* XL transceiver automatically calculates TDCV */ #define CAN_CTRLMODE_XL_TDC_MANUAL 0x4000 /* XL TDCV is manually set up by user */ +#define CAN_CTRLMODE_XL_TMS 0x8000 /* Transceiver Mode Switching */ /* * CAN device statistics -- cgit v1.2.3 From 46552323fa6779beb1ea558254dfd56021174c93 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 26 Nov 2025 11:16:12 +0100 Subject: can: netlink: add PWM netlink interface When the TMS is switched on, the node uses PWM (Pulse Width Modulation) during the data phase instead of the classic NRZ (Non Return to Zero) encoding. PWM is configured by three parameters: - PWMS: Pulse Width Modulation Short phase - PWML: Pulse Width Modulation Long phase - PWMO: Pulse Width Modulation Offset time For each of these parameters, define three IFLA symbols: - IFLA_CAN_PWM_PWM*_MIN: the minimum allowed value. - IFLA_CAN_PWM_PWM*_MAX: the maximum allowed value. - IFLA_CAN_PWM_PWM*: the runtime value. This results in a total of nine IFLA symbols which are all nested in a parent IFLA_CAN_XL_PWM symbol. IFLA_CAN_PWM_PWM*_MIN and IFLA_CAN_PWM_PWM*_MAX define the range of allowed values and will match the value statically configured by the device in struct can_pwm_const. IFLA_CAN_PWM_PWM* match the runtime values stored in struct can_pwm. Those parameters may only be configured when the tms mode is on. If the PWMS, PWML and PWMO parameters are provided, check that all the needed parameters are present using can_validate_pwm(), then check their value using can_validate_pwm_bittiming(). PWMO defaults to zero if omitted. Otherwise, if CAN_CTRLMODE_XL_TMS is true but none of the PWM parameters are provided, calculate them using can_calc_pwm(). Signed-off-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp Link: https://patch.msgid.link/20251126-canxl-v8-11-e7e3eb74f889@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/netlink.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index ebafb091d80f..c30d16746159 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -5,6 +5,7 @@ * Definitions for the CAN netlink interface * * Copyright (c) 2009 Wolfgang Grandegger + * Copyright (c) 2021-2025 Vincent Mailhol * * This program is free software; you can redistribute it and/or modify * it under the terms of the version 2 of the GNU General Public License @@ -147,6 +148,7 @@ enum { IFLA_CAN_XL_DATA_BITTIMING_CONST, IFLA_CAN_XL_DATA_BITRATE_CONST, IFLA_CAN_XL_TDC, + IFLA_CAN_XL_PWM, /* add new constants above here */ __IFLA_CAN_MAX, @@ -188,6 +190,29 @@ enum { IFLA_CAN_CTRLMODE_MAX = __IFLA_CAN_CTRLMODE - 1 }; +/* + * CAN FD/XL Pulse-Width Modulation (PWM) + * + * Please refer to struct can_pwm_const and can_pwm in + * include/linux/can/bittiming.h for further details. + */ +enum { + IFLA_CAN_PWM_UNSPEC, + IFLA_CAN_PWM_PWMS_MIN, /* u32 */ + IFLA_CAN_PWM_PWMS_MAX, /* u32 */ + IFLA_CAN_PWM_PWML_MIN, /* u32 */ + IFLA_CAN_PWM_PWML_MAX, /* u32 */ + IFLA_CAN_PWM_PWMO_MIN, /* u32 */ + IFLA_CAN_PWM_PWMO_MAX, /* u32 */ + IFLA_CAN_PWM_PWMS, /* u32 */ + IFLA_CAN_PWM_PWML, /* u32 */ + IFLA_CAN_PWM_PWMO, /* u32 */ + + /* add new constants above here */ + __IFLA_CAN_PWM, + IFLA_CAN_PWM_MAX = __IFLA_CAN_PWM - 1 +}; + /* u16 termination range: 1..65535 Ohms */ #define CAN_TERMINATION_DISABLED 0 -- cgit v1.2.3 From f0fdaa4ad55b7c6e46a5ccb9102bc9a96cad360f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 27 Oct 2025 21:04:09 -0700 Subject: virt: acrn: split acrn_mmio_dev_res out of acrn_mmiodev Add struct acrn_mmio_dev_res before struct acrn_mmio_dev. The former is used in the latter and breaking them up provides better kernel-doc documentation for the struct members. Suggested-by: Fei Li Signed-off-by: Randy Dunlap Acked-by: Fei Li Link: https://patch.msgid.link/20251028040409.868254-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/acrn.h | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/acrn.h b/include/uapi/linux/acrn.h index 7b714c1902eb..79e7855a8c42 100644 --- a/include/uapi/linux/acrn.h +++ b/include/uapi/linux/acrn.h @@ -418,26 +418,32 @@ struct acrn_pcidev { }; /** - * struct acrn_mmiodev - Info for assigning or de-assigning a MMIO device - * @name: Name of the MMIO device. - * @res[].user_vm_pa: Physical address of User VM of the MMIO region - * for the MMIO device. - * @res[].service_vm_pa: Physical address of Service VM of the MMIO - * region for the MMIO device. - * @res[].size: Size of the MMIO region for the MMIO device. - * @res[].mem_type: Memory type of the MMIO region for the MMIO - * device. + * struct acrn_mmio_dev_res - MMIO device resource description + * @user_vm_pa: Physical address of User VM of the MMIO region + * for the MMIO device. + * @service_vm_pa: Physical address of Service VM of the MMIO + * region for the MMIO device. + * @size: Size of the MMIO region for the MMIO device. + * @mem_type: Memory type of the MMIO region for the MMIO + * device. + */ +struct acrn_mmio_dev_res { + __u64 user_vm_pa; + __u64 service_vm_pa; + __u64 size; + __u64 mem_type; +}; + +/** + * struct acrn_mmiodev - Info for assigning or de-assigning an MMIO device + * @name: Name of the MMIO device. + * @res: Array of MMIO device descriptions * * This structure will be passed to hypervisor directly. */ struct acrn_mmiodev { __u8 name[8]; - struct { - __u64 user_vm_pa; - __u64 service_vm_pa; - __u64 size; - __u64 mem_type; - } res[ACRN_MMIODEV_RES_NUM]; + struct acrn_mmio_dev_res res[ACRN_MMIODEV_RES_NUM]; }; /** -- cgit v1.2.3 From 81c45c62dc3eefd83af8eb8df10e45705e8e3a47 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 3 Nov 2025 09:27:55 -0800 Subject: iommu/arm-smmu-v3-iommufd: Allow attaching nested domain for GBPA cases A vDEVICE has been a hard requirement for attaching a nested domain to the device. This makes sense when installing a guest STE, since a vSID must be present and given to the kernel during the vDEVICE allocation. But, when CR0.SMMUEN is disabled, VM doesn't really need a vSID to program the vSMMU behavior as GBPA will take effect, in which case the vSTE in the nested domain could have carried the bypass or abort configuration in GBPA register. Thus, having such a hard requirement doesn't work well for GBPA. Skip vmaster allocation in arm_smmu_attach_prepare_vmaster() for an abort or bypass vSTE. Note that device on this attachment won't report vevents. Update the uAPI doc accordingly. Link: https://patch.msgid.link/r/20251103172755.2026145-1-nicolinc@nvidia.com Tested-by: Shameer Kolothum Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Reviewed-by: Pranjal Shrivastava Tested-by: Shuai Xue Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index c218c89e0e2e..2c41920b641d 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -450,6 +450,16 @@ struct iommu_hwpt_vtd_s1 { * nested domain will translate the same as the nesting parent. The S1 will * install a Context Descriptor Table pointing at userspace memory translated * by the nesting parent. + * + * It's suggested to allocate a vDEVICE object carrying vSID and then re-attach + * the nested domain, as soon as the vSID is available in the VMM level: + * + * - when Cfg=translate, a vDEVICE must be allocated prior to attaching to the + * allocated nested domain, as CD/ATS invalidations and vevents need a vSID. + * - when Cfg=bypass/abort, a vDEVICE is not enforced during the nested domain + * attachment, to support a GBPA case where VM sets CR0.SMMUEN=0. However, if + * VM sets CR0.SMMUEN=1 while missing a vDEVICE object, kernel would fail to + * report events to the VM. E.g. F_TRANSLATION when guest STE.Cfg=abort. */ struct iommu_hwpt_arm_smmuv3 { __aligned_le64 ste[2]; -- cgit v1.2.3 From 5d24321e4c159088604512d7a5c5cf634d23e01a Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 25 Nov 2025 16:18:01 -0500 Subject: io_uring: Introduce getsockname io_uring cmd Introduce a socket-specific io_uring_cmd to support getsockname/getpeername via io_uring. I made this an io_uring_cmd instead of a new operation to avoid polluting the command namespace with what is exclusively a socket operation. In addition, since we don't need to conform to existing interfaces, this merges the getsockname/getpeername in a single operation, since the implementation is pretty much the same. This has been frequently requested, for instance at [1] and more recently in the project Discord channel. The main use-case is to support fixed socket file descriptors. [1] https://github.com/axboe/liburing/issues/1356 Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index deb772222b6d..b5b23c0d5283 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -1009,6 +1009,7 @@ enum io_uring_socket_op { SOCKET_URING_OP_GETSOCKOPT, SOCKET_URING_OP_SETSOCKOPT, SOCKET_URING_OP_TX_TIMESTAMP, + SOCKET_URING_OP_GETSOCKNAME, }; /* -- cgit v1.2.3 From e6c43c95009035a63091cd49736886f883127510 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 21 Nov 2025 08:39:55 -0800 Subject: net: phy: Add MDIO_PMA_CTRL1_SPEED for 2.5G and 5G to reflect PMA values The 2.5G and 5G values are not consistent between the PCS CTRL1 and PMA CTRL1 values. In order to avoid confusion between the two I am updating the values to include "PMA" in the name similar to values used in similar places. To avoid breaking UAPI I have retained the original macros and just defined them as the new PMA based defines. Signed-off-by: Alexander Duyck Link: https://patch.msgid.link/176374319569.959489.6610469879021800710.stgit@ahduyck-xeon-server.home.arpa Signed-off-by: Paolo Abeni --- include/uapi/linux/mdio.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index 6975f182b22c..9ee6eeae64b8 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -116,10 +116,18 @@ #define MDIO_CTRL1_SPEED10G (MDIO_CTRL1_SPEEDSELEXT | 0x00) /* 10PASS-TS/2BASE-TL */ #define MDIO_CTRL1_SPEED10P2B (MDIO_CTRL1_SPEEDSELEXT | 0x04) +/* Note: the MDIO_CTRL1_SPEED_XXX values for everything past 10PASS-TS/2BASE-TL + * do not match between the PCS and PMA values. Any additions past this point + * should be PMA or PCS specific. The following 2 defines are workarounds for + * values added before this was caught. They should be considered deprecated. + */ +#define MDIO_CTRL1_SPEED2_5G MDIO_PMA_CTRL1_SPEED2_5G +#define MDIO_CTRL1_SPEED5G MDIO_PMA_CTRL1_SPEED5G /* 2.5 Gb/s */ -#define MDIO_CTRL1_SPEED2_5G (MDIO_CTRL1_SPEEDSELEXT | 0x18) +#define MDIO_PMA_CTRL1_SPEED2_5G (MDIO_CTRL1_SPEEDSELEXT | 0x18) /* 5 Gb/s */ -#define MDIO_CTRL1_SPEED5G (MDIO_CTRL1_SPEEDSELEXT | 0x1c) +#define MDIO_PMA_CTRL1_SPEED5G (MDIO_CTRL1_SPEEDSELEXT | 0x1c) + /* Status register 1. */ #define MDIO_STAT1_LPOWERABLE 0x0002 /* Low-power ability */ -- cgit v1.2.3 From 7622d55276932bfeb947b7b6cbf7ea0aa41feeb8 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 21 Nov 2025 08:40:02 -0800 Subject: net: pcs: xpcs: Add support for 25G, 50G, and 100G interfaces With this change we are adding support for 25G, 50G, and 100G interface types to the XPCS driver. This had supposedly been enabled with the addition of XLGMII but I don't see any capability for configuration there so I suspect it may need to be refactored in the future. With this change we can enable the XPCS driver with the selected interface and it should be able to detect link, speed, and report the link status to the phylink interface. Signed-off-by: Alexander Duyck Link: https://patch.msgid.link/176374320248.959489.11649590675011158859.stgit@ahduyck-xeon-server.home.arpa Signed-off-by: Paolo Abeni --- include/uapi/linux/mdio.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index 9ee6eeae64b8..f23cab33e586 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -123,6 +123,12 @@ */ #define MDIO_CTRL1_SPEED2_5G MDIO_PMA_CTRL1_SPEED2_5G #define MDIO_CTRL1_SPEED5G MDIO_PMA_CTRL1_SPEED5G +/* 100 Gb/s */ +#define MDIO_PCS_CTRL1_SPEED100G (MDIO_CTRL1_SPEEDSELEXT | 0x10) +/* 25 Gb/s */ +#define MDIO_PCS_CTRL1_SPEED25G (MDIO_CTRL1_SPEEDSELEXT | 0x14) +/* 50 Gb/s */ +#define MDIO_PCS_CTRL1_SPEED50G (MDIO_CTRL1_SPEEDSELEXT | 0x18) /* 2.5 Gb/s */ #define MDIO_PMA_CTRL1_SPEED2_5G (MDIO_CTRL1_SPEEDSELEXT | 0x18) /* 5 Gb/s */ -- cgit v1.2.3 From 39e138173ae7641e952b456d2de7ad2ac03e8d88 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 21 Nov 2025 08:40:09 -0800 Subject: net: pcs: xpcs: Fix PMA identifier handling in XPCS The XPCS driver was mangling the PMA identifier as the original code appears to have been focused on just capturing the OUI. Rather than store a mangled ID it is better to work with the actual PMA ID and instead just mask out the values that don't apply rather than shifting them and reordering them as you still don't get the original OUI for the NIC without having to bitswap the values as per the definition of the layout in IEEE 802.3-2022 22.2.4.3.1. By laying it out as it was in the hardware it is also less likely for us to have an unintentional collision as the enum values will occupy the revision number area while the OUI occupies the upper 22 bits. Signed-off-by: Alexander Duyck Link: https://patch.msgid.link/176374320920.959489.17267159479370601070.stgit@ahduyck-xeon-server.home.arpa Signed-off-by: Paolo Abeni --- include/uapi/linux/mdio.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index f23cab33e586..8d769f100de6 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -147,6 +147,11 @@ #define MDIO_AN_STAT1_PAGE 0x0040 /* Page received */ #define MDIO_AN_STAT1_XNP 0x0080 /* Extended next page status */ +/* Device Identifier 2 */ +#define MDIO_DEVID2_OUI 0xfc00 /* OUI Portion of PHY ID */ +#define MDIO_DEVID2_MODEL_NUM 0x03f0 /* Manufacturer's Model Number */ +#define MDIO_DEVID2_REV_NUM 0x000f /* Revision Number */ + /* Speed register. */ #define MDIO_SPEED_10G 0x0001 /* 10G capable */ #define MDIO_PMA_SPEED_2B 0x0002 /* 2BASE-TL capable */ -- cgit v1.2.3 From 9e2fd062fa1713a33380cc97ef324d086dd45ba5 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Tue, 25 Nov 2025 11:58:31 -0500 Subject: liveupdate: luo_core: Live Update Orchestrator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "Live Update Orchestrator", v8. This series introduces the Live Update Orchestrator, a kernel subsystem designed to facilitate live kernel updates using a kexec-based reboot. This capability is critical for cloud environments, allowing hypervisors to be updated with minimal downtime for running virtual machines. LUO achieves this by preserving the state of selected resources, such as memory, devices and their dependencies, across the kernel transition. As a key feature, this series includes support for preserving memfd file descriptors, which allows critical in-memory data, such as guest RAM or any other large memory region, to be maintained in RAM across the kexec reboot. The other series that use LUO, are VFIO [1], IOMMU [2], and PCI [3] preservations. Github repo of this series [4]. The core of LUO is a framework for managing the lifecycle of preserved resources through a userspace-driven interface. Key features include: - Session Management Userspace agent (i.e. luod [5]) creates named sessions, each represented by a file descriptor (via centralized agent that controls /dev/liveupdate). The lifecycle of all preserved resources within a session is tied to this FD, ensuring automatic kernel cleanup if the controlling userspace agent crashes or exits unexpectedly. - File Preservation A handler-based framework allows specific file types (demonstrated here with memfd) to be preserved. Handlers manage the serialization, restoration, and lifecycle of their specific file types. - File-Lifecycle-Bound State A new mechanism for managing shared global state whose lifecycle is tied to the preservation of one or more files. This is crucial for subsystems like IOMMU or HugeTLB, where multiple file descriptors may depend on a single, shared underlying resource that must be preserved only once. - KHO Integration LUO drives the Kexec Handover framework programmatically to pass its serialized metadata to the next kernel. The LUO state is finalized and added to the kexec image just before the reboot is triggered. In the future this step will also be removed once stateless KHO is merged [6]. - Userspace Interface Control is provided via ioctl commands on /dev/liveupdate for creating and retrieving sessions, as well as on session file descriptors for managing individual files. - Testing The series includes a set of selftests, including userspace API validation, kexec-based lifecycle tests for various session and file scenarios, and a new in-kernel test module to validate the FLB logic. Introduce LUO, a mechanism intended to facilitate kernel updates while keeping designated devices operational across the transition (e.g., via kexec). The primary use case is updating hypervisors with minimal disruption to running virtual machines. For userspace side of hypervisor update we have copyless migration. LUO is for updating the kernel. This initial patch lays the groundwork for the LUO subsystem. Further functionality, including the implementation of state transition logic, integration with KHO, and hooks for subsystems and file descriptors, will be added in subsequent patches. Create a character device at /dev/liveupdate. A new uAPI header, , will define the necessary structures. The magic number for IOCTL is registered in Documentation/userspace-api/ioctl/ioctl-number.rst. Link: https://lkml.kernel.org/r/20251125165850.3389713-1-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20251125165850.3389713-2-pasha.tatashin@soleen.com Link: https://lore.kernel.org/all/20251018000713.677779-1-vipinsh@google.com/ [1] Link: https://lore.kernel.org/linux-iommu/20250928190624.3735830-1-skhawaja@google.com [2] Link: https://lore.kernel.org/linux-pci/20250916-luo-pci-v2-0-c494053c3c08@kernel.org [3] Link: https://github.com/googleprodkernel/linux-liveupdate/tree/luo/v8 [4] Link: https://tinyurl.com/luoddesign [5] Link: https://lore.kernel.org/all/20251020100306.2709352-1-jasonmiu@google.com [6] Link: https://lore.kernel.org/all/20251115233409.768044-1-pasha.tatashin@soleen.com [7] Link: https://github.com/soleen/linux/blob/luo/v8b03/diff.v7.v8 [8] Signed-off-by: Pasha Tatashin Reviewed-by: Pratyush Yadav Reviewed-by: Mike Rapoport (Microsoft) Tested-by: David Matlack Cc: Aleksander Lobakin Cc: Alexander Graf Cc: Alice Ryhl Cc: Andriy Shevchenko Cc: anish kumar Cc: Anna Schumaker Cc: Bartosz Golaszewski Cc: Bjorn Helgaas Cc: Borislav Betkov Cc: Chanwoo Choi Cc: Chen Ridong Cc: Chris Li Cc: Christian Brauner Cc: Daniel Wagner Cc: Danilo Krummrich Cc: Dan Williams Cc: David Hildenbrand Cc: David Jeffery Cc: David Rientjes Cc: Greg Kroah-Hartman Cc: Guixin Liu Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ilpo Järvinen Cc: Ingo Molnar Cc: Ira Weiny Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Joanthan Cameron Cc: Joel Granados Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Lennart Poettering Cc: Leon Romanovsky Cc: Leon Romanovsky Cc: Lukas Wunner Cc: Marc Rutland Cc: Masahiro Yamada Cc: Matthew Maurer Cc: Miguel Ojeda Cc: Myugnjoo Ham Cc: Parav Pandit Cc: Randy Dunlap Cc: Roman Gushchin Cc: Saeed Mahameed Cc: Samiullah Khawaja Cc: Song Liu Cc: Steven Rostedt Cc: Stuart Hayes Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Weißschuh Cc: Vincent Guittot Cc: William Tu Cc: Yoann Congal Cc: Zijun Hu Cc: Pratyush Yadav Cc: Zhu Yanjun Signed-off-by: Andrew Morton --- include/uapi/linux/liveupdate.h | 46 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 include/uapi/linux/liveupdate.h (limited to 'include/uapi') diff --git a/include/uapi/linux/liveupdate.h b/include/uapi/linux/liveupdate.h new file mode 100644 index 000000000000..df34c1642c4d --- /dev/null +++ b/include/uapi/linux/liveupdate.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +/* + * Userspace interface for /dev/liveupdate + * Live Update Orchestrator + * + * Copyright (c) 2025, Google LLC. + * Pasha Tatashin + */ + +#ifndef _UAPI_LIVEUPDATE_H +#define _UAPI_LIVEUPDATE_H + +#include +#include + +/** + * DOC: General ioctl format + * + * The ioctl interface follows a general format to allow for extensibility. Each + * ioctl is passed in a structure pointer as the argument providing the size of + * the structure in the first u32. The kernel checks that any structure space + * beyond what it understands is 0. This allows userspace to use the backward + * compatible portion while consistently using the newer, larger, structures. + * + * ioctls use a standard meaning for common errnos: + * + * - ENOTTY: The IOCTL number itself is not supported at all + * - E2BIG: The IOCTL number is supported, but the provided structure has + * non-zero in a part the kernel does not understand. + * - EOPNOTSUPP: The IOCTL number is supported, and the structure is + * understood, however a known field has a value the kernel does not + * understand or support. + * - EINVAL: Everything about the IOCTL was understood, but a field is not + * correct. + * - ENOENT: A provided token does not exist. + * - ENOMEM: Out of memory. + * - EOVERFLOW: Mathematics overflowed. + * + * As well as additional errnos, within specific ioctls. + */ + +/* The ioctl type, documented in ioctl-number.rst */ +#define LIVEUPDATE_IOCTL_TYPE 0xBA + +#endif /* _UAPI_LIVEUPDATE_H */ -- cgit v1.2.3 From 0153094d03df5a2e834a19c59b255649a258ae46 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Tue, 25 Nov 2025 11:58:34 -0500 Subject: liveupdate: luo_session: add sessions support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce concept of "Live Update Sessions" within the LUO framework. LUO sessions provide a mechanism to group and manage `struct file *` instances (representing file descriptors) that need to be preserved across a kexec-based live update. Each session is identified by a unique name and acts as a container for file objects whose state is critical to a userspace workload, such as a virtual machine or a high-performance database, aiming to maintain their functionality across a kernel transition. This groundwork establishes the framework for preserving file-backed state across kernel updates, with the actual file data preservation mechanisms to be implemented in subsequent patches. [dan.carpenter@linaro.org: fix use after free in luo_session_deserialize()] Link: https://lkml.kernel.org/r/c5dd637d7eed3a3be48c5e9fedb881596a3b1f5a.1764163896.git.dan.carpenter@linaro.org Link: https://lkml.kernel.org/r/20251125165850.3389713-5-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Signed-off-by: Dan Carpenter Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Pratyush Yadav Tested-by: David Matlack Cc: Aleksander Lobakin Cc: Alexander Graf Cc: Alice Ryhl Cc: Andriy Shevchenko Cc: anish kumar Cc: Anna Schumaker Cc: Bartosz Golaszewski Cc: Bjorn Helgaas Cc: Borislav Betkov Cc: Chanwoo Choi Cc: Chen Ridong Cc: Chris Li Cc: Christian Brauner Cc: Daniel Wagner Cc: Danilo Krummrich Cc: Dan Williams Cc: David Hildenbrand Cc: David Jeffery Cc: David Rientjes Cc: Greg Kroah-Hartman Cc: Guixin Liu Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ilpo Järvinen Cc: Ingo Molnar Cc: Ira Weiny Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Joanthan Cameron Cc: Joel Granados Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Lennart Poettering Cc: Leon Romanovsky Cc: Leon Romanovsky Cc: Lukas Wunner Cc: Marc Rutland Cc: Masahiro Yamada Cc: Matthew Maurer Cc: Miguel Ojeda Cc: Myugnjoo Ham Cc: Parav Pandit Cc: Pratyush Yadav Cc: Randy Dunlap Cc: Roman Gushchin Cc: Saeed Mahameed Cc: Samiullah Khawaja Cc: Song Liu Cc: Steven Rostedt Cc: Stuart Hayes Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Weißschuh Cc: Vincent Guittot Cc: William Tu Cc: Yoann Congal Cc: Zhu Yanjun Cc: Zijun Hu Signed-off-by: Andrew Morton --- include/uapi/linux/liveupdate.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/liveupdate.h b/include/uapi/linux/liveupdate.h index df34c1642c4d..40578ae19668 100644 --- a/include/uapi/linux/liveupdate.h +++ b/include/uapi/linux/liveupdate.h @@ -43,4 +43,7 @@ /* The ioctl type, documented in ioctl-number.rst */ #define LIVEUPDATE_IOCTL_TYPE 0xBA +/* The maximum length of session name including null termination */ +#define LIVEUPDATE_SESSION_NAME_LENGTH 64 + #endif /* _UAPI_LIVEUPDATE_H */ -- cgit v1.2.3 From 81cd25d263a182b3dcdc8af3b92e4b8e4db336de Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Tue, 25 Nov 2025 11:58:35 -0500 Subject: liveupdate: luo_core: add user interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the user-space interface for the Live Update Orchestrator via ioctl commands, enabling external control over the live update process and management of preserved resources. The idea is that there is going to be a single userspace agent driving the live update, therefore, only a single process can ever hold this device opened at a time. The following ioctl commands are introduced: LIVEUPDATE_IOCTL_CREATE_SESSION Provides a way for userspace to create a named session for grouping file descriptors that need to be preserved. It returns a new file descriptor representing the session. LIVEUPDATE_IOCTL_RETRIEVE_SESSION Allows the userspace agent in the new kernel to reclaim a preserved session by its name, receiving a new file descriptor to manage the restored resources. Link: https://lkml.kernel.org/r/20251125165850.3389713-6-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Pratyush Yadav Tested-by: David Matlack Cc: Aleksander Lobakin Cc: Alexander Graf Cc: Alice Ryhl Cc: Andriy Shevchenko Cc: anish kumar Cc: Anna Schumaker Cc: Bartosz Golaszewski Cc: Bjorn Helgaas Cc: Borislav Betkov Cc: Chanwoo Choi Cc: Chen Ridong Cc: Chris Li Cc: Christian Brauner Cc: Daniel Wagner Cc: Danilo Krummrich Cc: Dan Williams Cc: David Hildenbrand Cc: David Jeffery Cc: David Rientjes Cc: Greg Kroah-Hartman Cc: Guixin Liu Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ilpo Järvinen Cc: Ingo Molnar Cc: Ira Weiny Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Joanthan Cameron Cc: Joel Granados Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Lennart Poettering Cc: Leon Romanovsky Cc: Leon Romanovsky Cc: Lukas Wunner Cc: Marc Rutland Cc: Masahiro Yamada Cc: Matthew Maurer Cc: Miguel Ojeda Cc: Myugnjoo Ham Cc: Parav Pandit Cc: Pratyush Yadav Cc: Randy Dunlap Cc: Roman Gushchin Cc: Saeed Mahameed Cc: Samiullah Khawaja Cc: Song Liu Cc: Steven Rostedt Cc: Stuart Hayes Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Weißschuh Cc: Vincent Guittot Cc: William Tu Cc: Yoann Congal Cc: Zhu Yanjun Cc: Zijun Hu Signed-off-by: Andrew Morton --- include/uapi/linux/liveupdate.h | 64 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/liveupdate.h b/include/uapi/linux/liveupdate.h index 40578ae19668..1183cf984b5f 100644 --- a/include/uapi/linux/liveupdate.h +++ b/include/uapi/linux/liveupdate.h @@ -46,4 +46,68 @@ /* The maximum length of session name including null termination */ #define LIVEUPDATE_SESSION_NAME_LENGTH 64 +/* The /dev/liveupdate ioctl commands */ +enum { + LIVEUPDATE_CMD_BASE = 0x00, + LIVEUPDATE_CMD_CREATE_SESSION = LIVEUPDATE_CMD_BASE, + LIVEUPDATE_CMD_RETRIEVE_SESSION = 0x01, +}; + +/** + * struct liveupdate_ioctl_create_session - ioctl(LIVEUPDATE_IOCTL_CREATE_SESSION) + * @size: Input; sizeof(struct liveupdate_ioctl_create_session) + * @fd: Output; The new file descriptor for the created session. + * @name: Input; A null-terminated string for the session name, max + * length %LIVEUPDATE_SESSION_NAME_LENGTH including termination + * character. + * + * Creates a new live update session for managing preserved resources. + * This ioctl can only be called on the main /dev/liveupdate device. + * + * Return: 0 on success, negative error code on failure. + */ +struct liveupdate_ioctl_create_session { + __u32 size; + __s32 fd; + __u8 name[LIVEUPDATE_SESSION_NAME_LENGTH]; +}; + +#define LIVEUPDATE_IOCTL_CREATE_SESSION \ + _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_CREATE_SESSION) + +/** + * struct liveupdate_ioctl_retrieve_session - ioctl(LIVEUPDATE_IOCTL_RETRIEVE_SESSION) + * @size: Input; sizeof(struct liveupdate_ioctl_retrieve_session) + * @fd: Output; The new file descriptor for the retrieved session. + * @name: Input; A null-terminated string identifying the session to retrieve. + * The name must exactly match the name used when the session was + * created in the previous kernel. + * + * Retrieves a handle (a new file descriptor) for a preserved session by its + * name. This is the primary mechanism for a userspace agent to regain control + * of its preserved resources after a live update. + * + * The userspace application provides the null-terminated `name` of a session + * it created before the live update. If a preserved session with a matching + * name is found, the kernel instantiates it and returns a new file descriptor + * in the `fd` field. This new session FD can then be used for all file-specific + * operations, such as restoring individual file descriptors with + * LIVEUPDATE_SESSION_RETRIEVE_FD. + * + * It is the responsibility of the userspace application to know the names of + * the sessions it needs to retrieve. If no session with the given name is + * found, the ioctl will fail with -ENOENT. + * + * This ioctl can only be called on the main /dev/liveupdate device when the + * system is in the LIVEUPDATE_STATE_UPDATED state. + */ +struct liveupdate_ioctl_retrieve_session { + __u32 size; + __s32 fd; + __u8 name[LIVEUPDATE_SESSION_NAME_LENGTH]; +}; + +#define LIVEUPDATE_IOCTL_RETRIEVE_SESSION \ + _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_RETRIEVE_SESSION) + #endif /* _UAPI_LIVEUPDATE_H */ -- cgit v1.2.3 From 16cec0d265219f14a7fcebcc43aeb69205adba56 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Tue, 25 Nov 2025 11:58:37 -0500 Subject: liveupdate: luo_session: add ioctls for file preservation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introducing the userspace interface and internal logic required to manage the lifecycle of file descriptors within a session. Previously, a session was merely a container; this change makes it a functional management unit. The following capabilities are added: A new set of ioctl commands are added, which operate on the file descriptor returned by CREATE_SESSION. This allows userspace to: - LIVEUPDATE_SESSION_PRESERVE_FD: Add a file descriptor to a session to be preserved across the live update. - LIVEUPDATE_SESSION_RETRIEVE_FD: Retrieve a preserved file in the new kernel using its unique token. - LIVEUPDATE_SESSION_FINISH: finish session The session's .release handler is enhanced to be state-aware. When a session's file descriptor is closed, it correctly unpreserves the session based on its current state before freeing all associated file resources. Link: https://lkml.kernel.org/r/20251125165850.3389713-8-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Reviewed-by: Pratyush Yadav Reviewed-by: Mike Rapoport (Microsoft) Tested-by: David Matlack Cc: Aleksander Lobakin Cc: Alexander Graf Cc: Alice Ryhl Cc: Andriy Shevchenko Cc: anish kumar Cc: Anna Schumaker Cc: Bartosz Golaszewski Cc: Bjorn Helgaas Cc: Borislav Betkov Cc: Chanwoo Choi Cc: Chen Ridong Cc: Chris Li Cc: Christian Brauner Cc: Daniel Wagner Cc: Danilo Krummrich Cc: Dan Williams Cc: David Hildenbrand Cc: David Jeffery Cc: David Rientjes Cc: Greg Kroah-Hartman Cc: Guixin Liu Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ilpo Järvinen Cc: Ingo Molnar Cc: Ira Weiny Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Joanthan Cameron Cc: Joel Granados Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Lennart Poettering Cc: Leon Romanovsky Cc: Leon Romanovsky Cc: Lukas Wunner Cc: Marc Rutland Cc: Masahiro Yamada Cc: Matthew Maurer Cc: Miguel Ojeda Cc: Myugnjoo Ham Cc: Parav Pandit Cc: Pratyush Yadav Cc: Randy Dunlap Cc: Roman Gushchin Cc: Saeed Mahameed Cc: Samiullah Khawaja Cc: Song Liu Cc: Steven Rostedt Cc: Stuart Hayes Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Weißschuh Cc: Vincent Guittot Cc: William Tu Cc: Yoann Congal Cc: Zhu Yanjun Cc: Zijun Hu Signed-off-by: Andrew Morton --- include/uapi/linux/liveupdate.h | 103 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/liveupdate.h b/include/uapi/linux/liveupdate.h index 1183cf984b5f..30bc66ee9436 100644 --- a/include/uapi/linux/liveupdate.h +++ b/include/uapi/linux/liveupdate.h @@ -53,6 +53,14 @@ enum { LIVEUPDATE_CMD_RETRIEVE_SESSION = 0x01, }; +/* ioctl commands for session file descriptors */ +enum { + LIVEUPDATE_CMD_SESSION_BASE = 0x40, + LIVEUPDATE_CMD_SESSION_PRESERVE_FD = LIVEUPDATE_CMD_SESSION_BASE, + LIVEUPDATE_CMD_SESSION_RETRIEVE_FD = 0x41, + LIVEUPDATE_CMD_SESSION_FINISH = 0x42, +}; + /** * struct liveupdate_ioctl_create_session - ioctl(LIVEUPDATE_IOCTL_CREATE_SESSION) * @size: Input; sizeof(struct liveupdate_ioctl_create_session) @@ -110,4 +118,99 @@ struct liveupdate_ioctl_retrieve_session { #define LIVEUPDATE_IOCTL_RETRIEVE_SESSION \ _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_RETRIEVE_SESSION) +/* Session specific IOCTLs */ + +/** + * struct liveupdate_session_preserve_fd - ioctl(LIVEUPDATE_SESSION_PRESERVE_FD) + * @size: Input; sizeof(struct liveupdate_session_preserve_fd) + * @fd: Input; The user-space file descriptor to be preserved. + * @token: Input; An opaque, unique token for preserved resource. + * + * Holds parameters for preserving a file descriptor. + * + * User sets the @fd field identifying the file descriptor to preserve + * (e.g., memfd, kvm, iommufd, VFIO). The kernel validates if this FD type + * and its dependencies are supported for preservation. If validation passes, + * the kernel marks the FD internally and *initiates the process* of preparing + * its state for saving. The actual snapshotting of the state typically occurs + * during the subsequent %LIVEUPDATE_IOCTL_PREPARE execution phase, though + * some finalization might occur during freeze. + * On successful validation and initiation, the kernel uses the @token + * field with an opaque identifier representing the resource being preserved. + * This token confirms the FD is targeted for preservation and is required for + * the subsequent %LIVEUPDATE_SESSION_RETRIEVE_FD call after the live update. + * + * Return: 0 on success (validation passed, preservation initiated), negative + * error code on failure (e.g., unsupported FD type, dependency issue, + * validation failed). + */ +struct liveupdate_session_preserve_fd { + __u32 size; + __s32 fd; + __aligned_u64 token; +}; + +#define LIVEUPDATE_SESSION_PRESERVE_FD \ + _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_PRESERVE_FD) + +/** + * struct liveupdate_session_retrieve_fd - ioctl(LIVEUPDATE_SESSION_RETRIEVE_FD) + * @size: Input; sizeof(struct liveupdate_session_retrieve_fd) + * @fd: Output; The new file descriptor representing the fully restored + * kernel resource. + * @token: Input; An opaque, token that was used to preserve the resource. + * + * Retrieve a previously preserved file descriptor. + * + * User sets the @token field to the value obtained from a successful + * %LIVEUPDATE_IOCTL_FD_PRESERVE call before the live update. On success, + * the kernel restores the state (saved during the PREPARE/FREEZE phases) + * associated with the token and populates the @fd field with a new file + * descriptor referencing the restored resource in the current (new) kernel. + * This operation must be performed *before* signaling completion via + * %LIVEUPDATE_IOCTL_FINISH. + * + * Return: 0 on success, negative error code on failure (e.g., invalid token). + */ +struct liveupdate_session_retrieve_fd { + __u32 size; + __s32 fd; + __aligned_u64 token; +}; + +#define LIVEUPDATE_SESSION_RETRIEVE_FD \ + _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_RETRIEVE_FD) + +/** + * struct liveupdate_session_finish - ioctl(LIVEUPDATE_SESSION_FINISH) + * @size: Input; sizeof(struct liveupdate_session_finish) + * @reserved: Input; Must be zero. Reserved for future use. + * + * Signals the completion of the restoration process for a retrieved session. + * This is the final operation that should be performed on a session file + * descriptor after a live update. + * + * This ioctl must be called once all required file descriptors for the session + * have been successfully retrieved (using %LIVEUPDATE_SESSION_RETRIEVE_FD) and + * are fully restored from the userspace and kernel perspective. + * + * Upon success, the kernel releases its ownership of the preserved resources + * associated with this session. This allows internal resources to be freed, + * typically by decrementing reference counts on the underlying preserved + * objects. + * + * If this operation fails, the resources remain preserved in memory. Userspace + * may attempt to call finish again. The resources will otherwise be reset + * during the next live update cycle. + * + * Return: 0 on success, negative error code on failure. + */ +struct liveupdate_session_finish { + __u32 size; + __u32 reserved; +}; + +#define LIVEUPDATE_SESSION_FINISH \ + _IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_FINISH) + #endif /* _UAPI_LIVEUPDATE_H */ -- cgit v1.2.3 From 3fa805c37dd4d3e72ae5c58800f3f46ab3ca1f70 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 10 Oct 2025 03:36:50 -0700 Subject: vmcoreinfo: track and log recoverable hardware errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a generic infrastructure for tracking recoverable hardware errors (HW errors that are visible to the OS but does not cause a panic) and record them for vmcore consumption. This aids post-mortem crash analysis tools by preserving a count and timestamp for the last occurrence of such errors. On the other side, correctable errors, which the OS typically remains unaware of because the underlying hardware handles them transparently, are less relevant for crash dump and therefore are NOT tracked in this infrastructure. Add centralized logging for sources of recoverable hardware errors based on the subsystem it has been notified. hwerror_data is write-only at kernel runtime, and it is meant to be read from vmcore using tools like crash/drgn. For example, this is how it looks like when opening the crashdump from drgn. >>> prog['hwerror_data'] (struct hwerror_info[1]){ { .count = (int)844, .timestamp = (time64_t)1752852018, }, ... This helps fleet operators quickly triage whether a crash may be influenced by hardware recoverable errors (which executes a uncommon code path in the kernel), especially when recoverable errors occurred shortly before a panic, such as the bug fixed by commit ee62ce7a1d90 ("page_pool: Track DMA-mapped pages and unmap them when destroying the pool") This is not intended to replace full hardware diagnostics but provides a fast way to correlate hardware events with kernel panics quickly. Rare machine check exceptions—like those indicated by mce_flags.p5 or mce_flags.winchip—are not accounted for in this method, as they fall outside the intended usage scope for this feature's user base. [leitao@debian.org: add hw-recoverable-errors to toctree] Link: https://lkml.kernel.org/r/20251127-vmcoreinfo_fix-v1-1-26f5b1c43da9@debian.org Link: https://lkml.kernel.org/r/20251010-vmcore_hw_error-v5-1-636ede3efe44@debian.org Signed-off-by: Breno Leitao Suggested-by: Tony Luck Suggested-by: Shuai Xue Reviewed-by: Shuai Xue Reviewed-by: Hanjun Guo [APEI] Cc: Bjorn Helgaas Cc: Bob Moore Cc: Borislav Betkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Morse Cc: Konrad Rzessutek Wilk Cc: Len Brown Cc: Mahesh Salgaonkar Cc: Mauro Carvalho Chehab Cc: "Oliver O'Halloran" Cc: Omar Sandoval Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- include/uapi/linux/vmcore.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/vmcore.h b/include/uapi/linux/vmcore.h index 3e9da91866ff..2ba89fafa518 100644 --- a/include/uapi/linux/vmcore.h +++ b/include/uapi/linux/vmcore.h @@ -15,4 +15,13 @@ struct vmcoredd_header { __u8 dump_name[VMCOREDD_MAX_NAME_BYTES]; /* Device dump's name */ }; +enum hwerr_error_type { + HWERR_RECOV_CPU, + HWERR_RECOV_MEMORY, + HWERR_RECOV_PCI, + HWERR_RECOV_CXL, + HWERR_RECOV_OTHERS, + HWERR_RECOV_MAX, +}; + #endif /* _UAPI_VMCORE_H */ -- cgit v1.2.3 From c4f0ab06e1e0c1331e6febd03538a7f621f15134 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 1 Nov 2025 12:20:50 -0700 Subject: netfilter: ip6t_srh: fix UAPI kernel-doc comments format Fix the kernel-doc format for struct members to be "@member" instead of "@ member" to avoid kernel-doc warnings. Warning: ip6t_srh.h:60 struct member 'next_hdr' not described in 'ip6t_srh' Warning: ip6t_srh.h:60 struct member 'hdr_len' not described in 'ip6t_srh' Warning: ip6t_srh.h:60 struct member 'segs_left' not described in 'ip6t_srh' Warning: ip6t_srh.h:60 struct member 'last_entry' not described in 'ip6t_srh' Warning: ip6t_srh.h:60 struct member 'tag' not described in 'ip6t_srh' Warning: ip6t_srh.h:60 struct member 'mt_flags' not described in 'ip6t_srh' Warning: ip6t_srh.h:60 struct member 'mt_invflags' not described in 'ip6t_srh' Warning: ip6t_srh.h:93 struct member 'next_hdr' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'hdr_len' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'segs_left' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'last_entry' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'tag' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'psid_addr' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'nsid_addr' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'lsid_addr' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'psid_msk' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'nsid_msk' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'lsid_msk' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'mt_flags' not described in 'ip6t_srh1' Warning: ip6t_srh.h:93 struct member 'mt_invflags' not described in 'ip6t_srh1' Signed-off-by: Randy Dunlap Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_ipv6/ip6t_srh.h | 40 ++++++++++++++-------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_srh.h b/include/uapi/linux/netfilter_ipv6/ip6t_srh.h index 54ed83360dac..80c66c8ece82 100644 --- a/include/uapi/linux/netfilter_ipv6/ip6t_srh.h +++ b/include/uapi/linux/netfilter_ipv6/ip6t_srh.h @@ -41,13 +41,13 @@ /** * struct ip6t_srh - SRH match options - * @ next_hdr: Next header field of SRH - * @ hdr_len: Extension header length field of SRH - * @ segs_left: Segments left field of SRH - * @ last_entry: Last entry field of SRH - * @ tag: Tag field of SRH - * @ mt_flags: match options - * @ mt_invflags: Invert the sense of match options + * @next_hdr: Next header field of SRH + * @hdr_len: Extension header length field of SRH + * @segs_left: Segments left field of SRH + * @last_entry: Last entry field of SRH + * @tag: Tag field of SRH + * @mt_flags: match options + * @mt_invflags: Invert the sense of match options */ struct ip6t_srh { @@ -62,19 +62,19 @@ struct ip6t_srh { /** * struct ip6t_srh1 - SRH match options (revision 1) - * @ next_hdr: Next header field of SRH - * @ hdr_len: Extension header length field of SRH - * @ segs_left: Segments left field of SRH - * @ last_entry: Last entry field of SRH - * @ tag: Tag field of SRH - * @ psid_addr: Address of previous SID in SRH SID list - * @ nsid_addr: Address of NEXT SID in SRH SID list - * @ lsid_addr: Address of LAST SID in SRH SID list - * @ psid_msk: Mask of previous SID in SRH SID list - * @ nsid_msk: Mask of next SID in SRH SID list - * @ lsid_msk: MAsk of last SID in SRH SID list - * @ mt_flags: match options - * @ mt_invflags: Invert the sense of match options + * @next_hdr: Next header field of SRH + * @hdr_len: Extension header length field of SRH + * @segs_left: Segments left field of SRH + * @last_entry: Last entry field of SRH + * @tag: Tag field of SRH + * @psid_addr: Address of previous SID in SRH SID list + * @nsid_addr: Address of NEXT SID in SRH SID list + * @lsid_addr: Address of LAST SID in SRH SID list + * @psid_msk: Mask of previous SID in SRH SID list + * @nsid_msk: Mask of next SID in SRH SID list + * @lsid_msk: MAsk of last SID in SRH SID list + * @mt_flags: match options + * @mt_invflags: Invert the sense of match options */ struct ip6t_srh1 { -- cgit v1.2.3 From d3a439e55c193b930e0007967cf8d7a29890449b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 1 Nov 2025 12:20:38 -0700 Subject: netfilter: nf_tables: improve UAPI kernel-doc comments In include/uapi/linux/netfilter/nf_tables.h, correct the kernel-doc comments for mistyped enum names and enum values to avoid these kernel-doc warnings and improve the documentation: nf_tables.h:896: warning: Enum value 'NFT_EXTHDR_OP_TCPOPT' not described in enum 'nft_exthdr_op' nf_tables.h:896: warning: Excess enum value 'NFT_EXTHDR_OP_TCP' description in 'nft_exthdr_op' nf_tables.h:1210: warning: expecting prototype for enum nft_flow_attributes. Prototype was for enum nft_offload_attributes instead nf_tables.h:1428: warning: expecting prototype for enum nft_reject_code. Prototype was for enum nft_reject_inet_code instead (add beginning '@' to each enum value description:) nf_tables.h:1493: warning: Enum value 'NFTA_TPROXY_FAMILY' not described in enum 'nft_tproxy_attributes' nf_tables.h:1493: warning: Enum value 'NFTA_TPROXY_REG_ADDR' not described in enum 'nft_tproxy_attributes' nf_tables.h:1493: warning: Enum value 'NFTA_TPROXY_REG_PORT' not described in enum 'nft_tproxy_attributes' nf_tables.h:1796: warning: expecting prototype for enum nft_device_attributes. Prototype was for enum nft_devices_attributes instead Signed-off-by: Randy Dunlap Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 7c0c915f0306..45c71f7d21c2 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -881,7 +881,7 @@ enum nft_exthdr_flags { * enum nft_exthdr_op - nf_tables match options * * @NFT_EXTHDR_OP_IPV6: match against ipv6 extension headers - * @NFT_EXTHDR_OP_TCP: match against tcp options + * @NFT_EXTHDR_OP_TCPOPT: match against tcp options * @NFT_EXTHDR_OP_IPV4: match against ipv4 options * @NFT_EXTHDR_OP_SCTP: match against sctp chunks * @NFT_EXTHDR_OP_DCCP: match against dccp otions @@ -1200,7 +1200,7 @@ enum nft_ct_attributes { #define NFTA_CT_MAX (__NFTA_CT_MAX - 1) /** - * enum nft_flow_attributes - ct offload expression attributes + * enum nft_offload_attributes - ct offload expression attributes * @NFTA_FLOW_TABLE_NAME: flow table name (NLA_STRING) */ enum nft_offload_attributes { @@ -1410,7 +1410,7 @@ enum nft_reject_types { }; /** - * enum nft_reject_code - Generic reject codes for IPv4/IPv6 + * enum nft_reject_inet_code - Generic reject codes for IPv4/IPv6 * * @NFT_REJECT_ICMPX_NO_ROUTE: no route to host / network unreachable * @NFT_REJECT_ICMPX_PORT_UNREACH: port unreachable @@ -1480,9 +1480,9 @@ enum nft_nat_attributes { /** * enum nft_tproxy_attributes - nf_tables tproxy expression netlink attributes * - * NFTA_TPROXY_FAMILY: Target address family (NLA_U32: nft_registers) - * NFTA_TPROXY_REG_ADDR: Target address register (NLA_U32: nft_registers) - * NFTA_TPROXY_REG_PORT: Target port register (NLA_U32: nft_registers) + * @NFTA_TPROXY_FAMILY: Target address family (NLA_U32: nft_registers) + * @NFTA_TPROXY_REG_ADDR: Target address register (NLA_U32: nft_registers) + * @NFTA_TPROXY_REG_PORT: Target port register (NLA_U32: nft_registers) */ enum nft_tproxy_attributes { NFTA_TPROXY_UNSPEC, @@ -1783,7 +1783,7 @@ enum nft_synproxy_attributes { #define NFTA_SYNPROXY_MAX (__NFTA_SYNPROXY_MAX - 1) /** - * enum nft_device_attributes - nf_tables device netlink attributes + * enum nft_devices_attributes - nf_tables device netlink attributes * * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) * @NFTA_DEVICE_PREFIX: device name prefix, a simple wildcard (NLA_STRING) -- cgit v1.2.3 From 6557cae0a2a1952645e5df50e1d6eb7267ea2131 Mon Sep 17 00:00:00 2001 From: Peter Enderborg Date: Wed, 26 Nov 2025 14:54:06 +0100 Subject: if_ether.h: Clarify ethertype validity for gsw1xx dsa This 0x88C3 is registered to Infineon Technologies Corporate Research ST and are used by MaxLinear. Infineon made a spin off called Lantiq. Lantiq was acquired by Intel MaxLinear acquired Intels Connected Home division. The product FAQ from MaxLinear describes it's history from the F24S. The driver for the gsw1xx is based on Lantiq showing it's similarities. Ref https://standards-oui.ieee.org/ethertype/eth.txt Signed-off-by: Peter Enderborg Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_ether.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 2c93b7b731c8..df9d44a11540 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -92,7 +92,9 @@ #define ETH_P_ETHERCAT 0x88A4 /* EtherCAT */ #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ -#define ETH_P_MXLGSW 0x88C3 /* MaxLinear GSW DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_MXLGSW 0x88C3 /* Infineon Technologies Corporate Research ST + * Used by MaxLinear GSW DSA + */ #define ETH_P_PREAUTH 0x88C7 /* 802.11 Preauthentication */ #define ETH_P_TIPC 0x88CA /* TIPC */ #define ETH_P_LLDP 0x88CC /* Link Layer Discovery Protocol */ -- cgit v1.2.3 From 4be9e04ebf75a5c4478c1c6295e2122e5dc98f5f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 28 Nov 2025 10:55:09 +0100 Subject: vfs: add needed headers for new struct delegation definition The definition of struct delegation uses stdint.h integer types. Add the necessary headers to ensure that always works. Fixes: 1602bad16d7d ("vfs: expose delegation support to userland") Signed-off-by: Jeff Layton Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 008fac15e573..5e277fd955aa 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -4,6 +4,11 @@ #include #include +#ifdef __KERNEL__ +#include +#else +#include +#endif #define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0) #define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1) -- cgit v1.2.3 From 414690746d2da0dc9a931f8c02d83e5834141251 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 24 Nov 2025 18:28:08 -0800 Subject: i2c: i2c.h: fix a bad kernel-doc line Change an empty line into a blank kernel-doc line to prevent a kernel-doc warning: Warning: ../include/uapi/linux/i2c.h:38 bad line: Fixes: bfb3939c51d5 ("i2c: refactor documentation of struct i2c_msg") Signed-off-by: Randy Dunlap Signed-off-by: Wolfram Sang --- include/uapi/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/i2c.h b/include/uapi/linux/i2c.h index a2db2a56c8b0..2a226657d9f8 100644 --- a/include/uapi/linux/i2c.h +++ b/include/uapi/linux/i2c.h @@ -36,7 +36,7 @@ * * Only if I2C_FUNC_NOSTART is set: * %I2C_M_NOSTART: skip repeated start sequence - + * * Only if I2C_FUNC_PROTOCOL_MANGLING is set: * %I2C_M_NO_RD_ACK: in a read message, master ACK/NACK bit is skipped * %I2C_M_IGNORE_NAK: treat NACK from client as ACK -- cgit v1.2.3 From 205dd7a5d6ad6f4c8e8fcd3c3b95a7c0e7067fee Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 18 Nov 2025 18:06:31 -0500 Subject: virtio_pci: drop kernel.h virtio UAPI headers really have no business pulling in kernel.h Replace it with const.h which seems to be what's needed for __KERNEL_DIV_ROUND_UP. Fixes: 7c1ae151e812 ("virtio_pci: Introduce device parts access commands") Cc: Yishai Hadas Cc: Alex Williamson Message-ID: <7a73b6c6af67e13b86633cd7bf11ad56b5d9809b.1763535341.git.mst@redhat.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index c691ac210ce2..e732e3456e27 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -40,7 +40,7 @@ #define _LINUX_VIRTIO_PCI_H #include -#include +#include #ifndef VIRTIO_PCI_NO_LEGACY -- cgit v1.2.3 From 6b0f4ca079dbe6ae4aa57e529d67c7dc00d63577 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Wed, 26 Nov 2025 17:35:37 +0000 Subject: wireguard: netlink: add YNL specification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a near[1] complete YNL specification for WireGuard, documenting the protocol in a machine-readable format, rather than comments in wireguard.h, and eases usage from C and non-C programming languages alike. The generated C library will be featured in a later patch, so in this patch I will use the in-kernel python client for examples. This makes the documentation in the UAPI header redundant, it is therefore removed. The in-line documentation in the spec is based on the existing comment in wireguard.h, and once released it will be available in the kernel documentation at: https://docs.kernel.org/netlink/specs/wireguard.html (until then run: make htmldocs) Generate wireguard.rst from this spec: $ make -C tools/net/ynl/generated/ wireguard.rst Query wireguard interface through pyynl: $ sudo ./tools/net/ynl/pyynl/cli.py --family wireguard \ --dump get-device \ --json '{"ifindex":3}' [{'fwmark': 0, 'ifindex': 3, 'ifname': 'wg-test', 'listen-port': 54318, 'peers': [{0: {'allowedips': [{0: {'cidr-mask': 0, 'family': 2, 'ipaddr': '0.0.0.0'}}, {0: {'cidr-mask': 0, 'family': 10, 'ipaddr': '::'}}], 'endpoint': b'[...]', 'last-handshake-time': {'nsec': 42, 'sec': 42}, 'persistent-keepalive-interval': 42, 'preshared-key': '[...]', 'protocol-version': 1, 'public-key': '[...]', 'rx-bytes': 42, 'tx-bytes': 42}}], 'private-key': '[...]', 'public-key': '[...]'}] Add another allowed IP prefix: $ sudo ./tools/net/ynl/pyynl/cli.py --family wireguard \ --do set-device --json '{"ifindex":3,"peers":[ {"public-key":"6a df b1 83 a4 ..","allowedips":[ {"cidr-mask":0,"family":10,"ipaddr":"::"}]}]}' [1] As can be seen above, the "endpoint" is only dumped as binary data, as it can't be fully described in YNL. It's either a struct sockaddr_in or struct sockaddr_in6 depending on the attribute length. Signed-off-by: Asbjørn Sloth Tønnesen Signed-off-by: Jason A. Donenfeld --- include/uapi/linux/wireguard.h | 129 ----------------------------------------- 1 file changed, 129 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h index 8c26391196d5..dee4401e0b5d 100644 --- a/include/uapi/linux/wireguard.h +++ b/include/uapi/linux/wireguard.h @@ -1,135 +1,6 @@ /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ /* * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. - * - * Documentation - * ============= - * - * The below enums and macros are for interfacing with WireGuard, using generic - * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two - * methods: get and set. Note that while they share many common attributes, - * these two functions actually accept a slightly different set of inputs and - * outputs. - * - * WG_CMD_GET_DEVICE - * ----------------- - * - * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain - * one but not both of: - * - * WGDEVICE_A_IFINDEX: NLA_U32 - * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 - * - * The kernel will then return several messages (NLM_F_MULTI) containing the - * following tree of nested items: - * - * WGDEVICE_A_IFINDEX: NLA_U32 - * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 - * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN - * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN - * WGDEVICE_A_LISTEN_PORT: NLA_U16 - * WGDEVICE_A_FWMARK: NLA_U32 - * WGDEVICE_A_PEERS: NLA_NESTED - * 0: NLA_NESTED - * WGPEER_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN - * WGPEER_A_PRESHARED_KEY: NLA_EXACT_LEN, len WG_KEY_LEN - * WGPEER_A_ENDPOINT: NLA_MIN_LEN(struct sockaddr), struct sockaddr_in or struct sockaddr_in6 - * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16 - * WGPEER_A_LAST_HANDSHAKE_TIME: NLA_EXACT_LEN, struct __kernel_timespec - * WGPEER_A_RX_BYTES: NLA_U64 - * WGPEER_A_TX_BYTES: NLA_U64 - * WGPEER_A_ALLOWEDIPS: NLA_NESTED - * 0: NLA_NESTED - * WGALLOWEDIP_A_FAMILY: NLA_U16 - * WGALLOWEDIP_A_IPADDR: NLA_MIN_LEN(struct in_addr), struct in_addr or struct in6_addr - * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 - * 0: NLA_NESTED - * ... - * 0: NLA_NESTED - * ... - * ... - * WGPEER_A_PROTOCOL_VERSION: NLA_U32 - * 0: NLA_NESTED - * ... - * ... - * - * It is possible that all of the allowed IPs of a single peer will not - * fit within a single netlink message. In that case, the same peer will - * be written in the following message, except it will only contain - * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several - * times in a row for the same peer. It is then up to the receiver to - * coalesce adjacent peers. Likewise, it is possible that all peers will - * not fit within a single message. So, subsequent peers will be sent - * in following messages, except those will only contain WGDEVICE_A_IFNAME - * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these - * messages to form the complete list of peers. - * - * Since this is an NLA_F_DUMP command, the final message will always be - * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message - * contains an integer error code. It is either zero or a negative error - * code corresponding to the errno. - * - * WG_CMD_SET_DEVICE - * ----------------- - * - * May only be called via NLM_F_REQUEST. The command should contain the - * following tree of nested items, containing one but not both of - * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME: - * - * WGDEVICE_A_IFINDEX: NLA_U32 - * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 - * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current - * peers should be removed prior to adding the list below. - * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove - * WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly - * WGDEVICE_A_FWMARK: NLA_U32, 0 to disable - * WGDEVICE_A_PEERS: NLA_NESTED - * 0: NLA_NESTED - * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN - * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the - * specified peer should not exist at the end of the - * operation, rather than added/updated and/or - * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed - * IPs of this peer should be removed prior to adding - * the list below and/or WGPEER_F_UPDATE_ONLY if the - * peer should only be set if it already exists. - * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove - * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6 - * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable - * WGPEER_A_ALLOWEDIPS: NLA_NESTED - * 0: NLA_NESTED - * WGALLOWEDIP_A_FAMILY: NLA_U16 - * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr - * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 - * WGALLOWEDIP_A_FLAGS: NLA_U32, WGALLOWEDIP_F_REMOVE_ME if - * the specified IP should be removed; - * otherwise, this IP will be added if - * it is not already present. - * 0: NLA_NESTED - * ... - * 0: NLA_NESTED - * ... - * ... - * WGPEER_A_PROTOCOL_VERSION: NLA_U32, should not be set or used at - * all by most users of this API, as the - * most recent protocol will be used when - * this is unset. Otherwise, must be set - * to 1. - * 0: NLA_NESTED - * ... - * ... - * - * It is possible that the amount of configuration data exceeds that of - * the maximum message length accepted by the kernel. In that case, several - * messages should be sent one after another, with each successive one - * filling in information not contained in the prior. Note that if - * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably - * should not be specified in fragments that come after, so that the list - * of peers is only cleared the first time but appended after. Likewise for - * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message - * of a peer, it likely should not be specified in subsequent fragments. - * - * If an error occurs, NLMSG_ERROR will reply containing an errno. */ #ifndef _WG_UAPI_WIREGUARD_H -- cgit v1.2.3 From b5c5a82bf5cb96e14a6627ef21be962052a0c6d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Wed, 26 Nov 2025 17:35:38 +0000 Subject: wireguard: uapi: move enum wg_cmd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch moves enum wg_cmd to the end of the file, where ynl-gen would generate it. This is an incremental step towards adopting an UAPI header generated by ynl-gen. This is split out to keep the patches readable. This is a trivial patch with no behavioural changes intended. Signed-off-by: Asbjørn Sloth Tønnesen Signed-off-by: Jason A. Donenfeld --- include/uapi/linux/wireguard.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h index dee4401e0b5d..3ebfffd61269 100644 --- a/include/uapi/linux/wireguard.h +++ b/include/uapi/linux/wireguard.h @@ -11,13 +11,6 @@ #define WG_KEY_LEN 32 -enum wg_cmd { - WG_CMD_GET_DEVICE, - WG_CMD_SET_DEVICE, - __WG_CMD_MAX -}; -#define WG_CMD_MAX (__WG_CMD_MAX - 1) - enum wgdevice_flag { WGDEVICE_F_REPLACE_PEERS = 1U << 0, __WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS @@ -73,4 +66,12 @@ enum wgallowedip_attribute { }; #define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1) +enum wg_cmd { + WG_CMD_GET_DEVICE, + WG_CMD_SET_DEVICE, + + __WG_CMD_MAX +}; +#define WG_CMD_MAX (__WG_CMD_MAX - 1) + #endif /* _WG_UAPI_WIREGUARD_H */ -- cgit v1.2.3 From 8d974872ab29eeb93a5b0b698007257d8be07968 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Wed, 26 Nov 2025 17:35:39 +0000 Subject: wireguard: uapi: move flag enums MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the wg*_flag enums, so they are defined above the attribute set enums, where ynl-gen would place them. This is an incremental step towards adopting an UAPI header generated by ynl-gen. This is split out to keep the patches readable. This is a trivial patch with no behavioural changes intended. Signed-off-by: Asbjørn Sloth Tønnesen Signed-off-by: Jason A. Donenfeld --- include/uapi/linux/wireguard.h | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h index 3ebfffd61269..a2815f4f2910 100644 --- a/include/uapi/linux/wireguard.h +++ b/include/uapi/linux/wireguard.h @@ -15,6 +15,20 @@ enum wgdevice_flag { WGDEVICE_F_REPLACE_PEERS = 1U << 0, __WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS }; + +enum wgpeer_flag { + WGPEER_F_REMOVE_ME = 1U << 0, + WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1, + WGPEER_F_UPDATE_ONLY = 1U << 2, + __WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS | + WGPEER_F_UPDATE_ONLY +}; + +enum wgallowedip_flag { + WGALLOWEDIP_F_REMOVE_ME = 1U << 0, + __WGALLOWEDIP_F_ALL = WGALLOWEDIP_F_REMOVE_ME +}; + enum wgdevice_attribute { WGDEVICE_A_UNSPEC, WGDEVICE_A_IFINDEX, @@ -29,13 +43,6 @@ enum wgdevice_attribute { }; #define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1) -enum wgpeer_flag { - WGPEER_F_REMOVE_ME = 1U << 0, - WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1, - WGPEER_F_UPDATE_ONLY = 1U << 2, - __WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS | - WGPEER_F_UPDATE_ONLY -}; enum wgpeer_attribute { WGPEER_A_UNSPEC, WGPEER_A_PUBLIC_KEY, @@ -52,10 +59,6 @@ enum wgpeer_attribute { }; #define WGPEER_A_MAX (__WGPEER_A_LAST - 1) -enum wgallowedip_flag { - WGALLOWEDIP_F_REMOVE_ME = 1U << 0, - __WGALLOWEDIP_F_ALL = WGALLOWEDIP_F_REMOVE_ME -}; enum wgallowedip_attribute { WGALLOWEDIP_A_UNSPEC, WGALLOWEDIP_A_FAMILY, -- cgit v1.2.3 From 88cedad45ba14097e06d2c9f6578688097a94691 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Wed, 26 Nov 2025 17:35:40 +0000 Subject: wireguard: uapi: generate header with ynl-gen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use ynl-gen to generate the UAPI header for WireGuard. The cosmetic changes in this patch confirms that the spec is aligned with the implementation. By using the generated version, it ensures that they stay in sync. Changes in the generated header: * Trivial header guard rename. * Trivial white space changes. * Trivial comment changes. * Precompute bitflags in ynl-gen (see [1]). * Drop __*_F_ALL constants (see [1]). [1] https://lore.kernel.org/r/20251014123201.6ecfd146@kernel.org/ No behavioural changes intended. Signed-off-by: Asbjørn Sloth Tønnesen Signed-off-by: Jason A. Donenfeld --- include/uapi/linux/wireguard.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h index a2815f4f2910..a100b9715b08 100644 --- a/include/uapi/linux/wireguard.h +++ b/include/uapi/linux/wireguard.h @@ -1,32 +1,29 @@ -/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. - */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/wireguard.yaml */ +/* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ -#ifndef _WG_UAPI_WIREGUARD_H -#define _WG_UAPI_WIREGUARD_H +#ifndef _UAPI_LINUX_WIREGUARD_H +#define _UAPI_LINUX_WIREGUARD_H -#define WG_GENL_NAME "wireguard" -#define WG_GENL_VERSION 1 +#define WG_GENL_NAME "wireguard" +#define WG_GENL_VERSION 1 -#define WG_KEY_LEN 32 +#define WG_KEY_LEN 32 enum wgdevice_flag { - WGDEVICE_F_REPLACE_PEERS = 1U << 0, - __WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS + WGDEVICE_F_REPLACE_PEERS = 1, }; enum wgpeer_flag { - WGPEER_F_REMOVE_ME = 1U << 0, - WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1, - WGPEER_F_UPDATE_ONLY = 1U << 2, - __WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS | - WGPEER_F_UPDATE_ONLY + WGPEER_F_REMOVE_ME = 1, + WGPEER_F_REPLACE_ALLOWEDIPS = 2, + WGPEER_F_UPDATE_ONLY = 4, }; enum wgallowedip_flag { - WGALLOWEDIP_F_REMOVE_ME = 1U << 0, - __WGALLOWEDIP_F_ALL = WGALLOWEDIP_F_REMOVE_ME + WGALLOWEDIP_F_REMOVE_ME = 1, }; enum wgdevice_attribute { @@ -39,6 +36,7 @@ enum wgdevice_attribute { WGDEVICE_A_LISTEN_PORT, WGDEVICE_A_FWMARK, WGDEVICE_A_PEERS, + __WGDEVICE_A_LAST }; #define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1) @@ -55,6 +53,7 @@ enum wgpeer_attribute { WGPEER_A_TX_BYTES, WGPEER_A_ALLOWEDIPS, WGPEER_A_PROTOCOL_VERSION, + __WGPEER_A_LAST }; #define WGPEER_A_MAX (__WGPEER_A_LAST - 1) @@ -65,6 +64,7 @@ enum wgallowedip_attribute { WGALLOWEDIP_A_IPADDR, WGALLOWEDIP_A_CIDR_MASK, WGALLOWEDIP_A_FLAGS, + __WGALLOWEDIP_A_LAST }; #define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1) @@ -77,4 +77,4 @@ enum wg_cmd { }; #define WG_CMD_MAX (__WG_CMD_MAX - 1) -#endif /* _WG_UAPI_WIREGUARD_H */ +#endif /* _UAPI_LINUX_WIREGUARD_H */ -- cgit v1.2.3 From c3859de858aa7ae0d0a5ca21e8ee9792f2f256b6 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 2 Dec 2025 13:44:47 +1100 Subject: psp-sev: Assign numbers to all status codes and add new Make the definitions explicit. Add some more new codes. The following patches will be using SPDM_REQUEST and EXPAND_BUFFER_LENGTH_REQUEST, others are useful for the PSP FW diagnostics. Signed-off-by: Alexey Kardashevskiy Link: https://patch.msgid.link/20251202024449.542361-3-aik@amd.com Acked-by: Tom Lendacky Signed-off-by: Dan Williams --- include/uapi/linux/psp-sev.h | 66 +++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 25 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h index c2fd324623c4..2b5b042eb73b 100644 --- a/include/uapi/linux/psp-sev.h +++ b/include/uapi/linux/psp-sev.h @@ -47,32 +47,32 @@ typedef enum { * with possible values from the specification. */ SEV_RET_NO_FW_CALL = -1, - SEV_RET_SUCCESS = 0, - SEV_RET_INVALID_PLATFORM_STATE, - SEV_RET_INVALID_GUEST_STATE, - SEV_RET_INAVLID_CONFIG, + SEV_RET_SUCCESS = 0, + SEV_RET_INVALID_PLATFORM_STATE = 0x0001, + SEV_RET_INVALID_GUEST_STATE = 0x0002, + SEV_RET_INAVLID_CONFIG = 0x0003, SEV_RET_INVALID_CONFIG = SEV_RET_INAVLID_CONFIG, - SEV_RET_INVALID_LEN, - SEV_RET_ALREADY_OWNED, - SEV_RET_INVALID_CERTIFICATE, - SEV_RET_POLICY_FAILURE, - SEV_RET_INACTIVE, - SEV_RET_INVALID_ADDRESS, - SEV_RET_BAD_SIGNATURE, - SEV_RET_BAD_MEASUREMENT, - SEV_RET_ASID_OWNED, - SEV_RET_INVALID_ASID, - SEV_RET_WBINVD_REQUIRED, - SEV_RET_DFFLUSH_REQUIRED, - SEV_RET_INVALID_GUEST, - SEV_RET_INVALID_COMMAND, - SEV_RET_ACTIVE, - SEV_RET_HWSEV_RET_PLATFORM, - SEV_RET_HWSEV_RET_UNSAFE, - SEV_RET_UNSUPPORTED, - SEV_RET_INVALID_PARAM, - SEV_RET_RESOURCE_LIMIT, - SEV_RET_SECURE_DATA_INVALID, + SEV_RET_INVALID_LEN = 0x0004, + SEV_RET_ALREADY_OWNED = 0x0005, + SEV_RET_INVALID_CERTIFICATE = 0x0006, + SEV_RET_POLICY_FAILURE = 0x0007, + SEV_RET_INACTIVE = 0x0008, + SEV_RET_INVALID_ADDRESS = 0x0009, + SEV_RET_BAD_SIGNATURE = 0x000A, + SEV_RET_BAD_MEASUREMENT = 0x000B, + SEV_RET_ASID_OWNED = 0x000C, + SEV_RET_INVALID_ASID = 0x000D, + SEV_RET_WBINVD_REQUIRED = 0x000E, + SEV_RET_DFFLUSH_REQUIRED = 0x000F, + SEV_RET_INVALID_GUEST = 0x0010, + SEV_RET_INVALID_COMMAND = 0x0011, + SEV_RET_ACTIVE = 0x0012, + SEV_RET_HWSEV_RET_PLATFORM = 0x0013, + SEV_RET_HWSEV_RET_UNSAFE = 0x0014, + SEV_RET_UNSUPPORTED = 0x0015, + SEV_RET_INVALID_PARAM = 0x0016, + SEV_RET_RESOURCE_LIMIT = 0x0017, + SEV_RET_SECURE_DATA_INVALID = 0x0018, SEV_RET_INVALID_PAGE_SIZE = 0x0019, SEV_RET_INVALID_PAGE_STATE = 0x001A, SEV_RET_INVALID_MDATA_ENTRY = 0x001B, @@ -87,6 +87,22 @@ typedef enum { SEV_RET_RESTORE_REQUIRED = 0x0025, SEV_RET_RMP_INITIALIZATION_FAILED = 0x0026, SEV_RET_INVALID_KEY = 0x0027, + SEV_RET_SHUTDOWN_INCOMPLETE = 0x0028, + SEV_RET_INCORRECT_BUFFER_LENGTH = 0x0030, + SEV_RET_EXPAND_BUFFER_LENGTH_REQUEST = 0x0031, + SEV_RET_SPDM_REQUEST = 0x0032, + SEV_RET_SPDM_ERROR = 0x0033, + SEV_RET_SEV_STATUS_ERR_IN_DEV_CONN = 0x0035, + SEV_RET_SEV_STATUS_INVALID_DEV_CTX = 0x0036, + SEV_RET_SEV_STATUS_INVALID_TDI_CTX = 0x0037, + SEV_RET_SEV_STATUS_INVALID_TDI = 0x0038, + SEV_RET_SEV_STATUS_RECLAIM_REQUIRED = 0x0039, + SEV_RET_IN_USE = 0x003A, + SEV_RET_SEV_STATUS_INVALID_DEV_STATE = 0x003B, + SEV_RET_SEV_STATUS_INVALID_TDI_STATE = 0x003C, + SEV_RET_SEV_STATUS_DEV_CERT_CHANGED = 0x003D, + SEV_RET_SEV_STATUS_RESYNC_REQ = 0x003E, + SEV_RET_SEV_STATUS_RESPONSE_TOO_LARGE = 0x003F, SEV_RET_MAX, } sev_ret_code; -- cgit v1.2.3 From f7231cff1f3ff8259bef02dc4999bc132abf29cf Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Wed, 3 Dec 2025 08:55:34 +0000 Subject: media: uapi: c3-isp: Fix documentation warning Building htmldocs generates a warning: WARNING: include/uapi/linux/media/amlogic/c3-isp-config.h:199 error: Cannot parse struct or union! Which correctly highlights that the c3_isp_params_block_header symbol is wrongly documented as a struct while it's a plain #define instead. Fix this by removing the 'struct' identifier from the documentation of the c3_isp_params_block_header symbol. [ribalda: Add Closes:] Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20251127131425.4b5b6644@canb.auug.org.au/ Fixes: 45662082855c ("media: uapi: Convert Amlogic C3 to V4L2 extensible params") Cc: stable@vger.kernel.org Signed-off-by: Jacopo Mondi Signed-off-by: Ricardo Ribalda Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/media/amlogic/c3-isp-config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/media/amlogic/c3-isp-config.h b/include/uapi/linux/media/amlogic/c3-isp-config.h index 0a3c1cc55ccb..92db5dcdda18 100644 --- a/include/uapi/linux/media/amlogic/c3-isp-config.h +++ b/include/uapi/linux/media/amlogic/c3-isp-config.h @@ -186,7 +186,7 @@ enum c3_isp_params_block_type { #define C3_ISP_PARAMS_BLOCK_FL_ENABLE V4L2_ISP_PARAMS_FL_BLOCK_ENABLE /** - * struct c3_isp_params_block_header - C3 ISP parameter block header + * c3_isp_params_block_header - C3 ISP parameter block header * * This structure represents the common part of all the ISP configuration * blocks and is identical to :c:type:`v4l2_isp_params_block_header`. -- cgit v1.2.3 From 22a1ffea5f805dfa21b64d1c7b5fe39c0c78c997 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 1 Dec 2025 16:43:28 -0500 Subject: block: add IOC_PR_READ_KEYS ioctl Add a Persistent Reservations ioctl to read the list of currently registered reservation keys. This calls the pr_ops->read_keys() function that was previously added in commit c787f1baa503 ("block: Add PR callouts for read keys and reservation") but was only used by the in-kernel SCSI target so far. The IOC_PR_READ_KEYS ioctl is necessary so that userspace applications that rely on Persistent Reservations ioctls have a way of inspecting the current state. Cluster managers and validation tests need this functionality. Signed-off-by: Stefan Hajnoczi Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/uapi/linux/pr.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h index d8126415966f..fcb74eab92c8 100644 --- a/include/uapi/linux/pr.h +++ b/include/uapi/linux/pr.h @@ -56,6 +56,12 @@ struct pr_clear { __u32 __pad; }; +struct pr_read_keys { + __u32 generation; + __u32 num_keys; + __u64 keys_ptr; +}; + #define PR_FL_IGNORE_KEY (1 << 0) /* ignore existing key */ #define IOC_PR_REGISTER _IOW('p', 200, struct pr_registration) @@ -64,5 +70,6 @@ struct pr_clear { #define IOC_PR_PREEMPT _IOW('p', 203, struct pr_preempt) #define IOC_PR_PREEMPT_ABORT _IOW('p', 204, struct pr_preempt) #define IOC_PR_CLEAR _IOW('p', 205, struct pr_clear) +#define IOC_PR_READ_KEYS _IOWR('p', 206, struct pr_read_keys) #endif /* _UAPI_PR_H */ -- cgit v1.2.3 From 3e2cb9ee76c27f57bfdb7b4753b909594d4fa31a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 1 Dec 2025 16:43:29 -0500 Subject: block: add IOC_PR_READ_RESERVATION ioctl Add a Persistent Reservations ioctl to read the current reservation. This calls the pr_ops->read_reservation() function that was previously added in commit c787f1baa503 ("block: Add PR callouts for read keys and reservation") but was only used by the in-kernel SCSI target so far. The IOC_PR_READ_RESERVATION ioctl is necessary so that userspace applications that rely on Persistent Reservations ioctls have a way of inspecting the current state. Cluster managers and validation tests need this functionality. Signed-off-by: Stefan Hajnoczi Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/uapi/linux/pr.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h index fcb74eab92c8..847f3051057a 100644 --- a/include/uapi/linux/pr.h +++ b/include/uapi/linux/pr.h @@ -62,6 +62,12 @@ struct pr_read_keys { __u64 keys_ptr; }; +struct pr_read_reservation { + __u64 key; + __u32 generation; + __u32 type; +}; + #define PR_FL_IGNORE_KEY (1 << 0) /* ignore existing key */ #define IOC_PR_REGISTER _IOW('p', 200, struct pr_registration) @@ -71,5 +77,6 @@ struct pr_read_keys { #define IOC_PR_PREEMPT_ABORT _IOW('p', 204, struct pr_preempt) #define IOC_PR_CLEAR _IOW('p', 205, struct pr_clear) #define IOC_PR_READ_KEYS _IOWR('p', 206, struct pr_read_keys) +#define IOC_PR_READ_RESERVATION _IOR('p', 207, struct pr_read_reservation) #endif /* _UAPI_PR_H */ -- cgit v1.2.3 From fe93446b5ebdaa89a8f97b15668c077921a65140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 3 Dec 2025 14:57:57 +0100 Subject: vfs: use UAPI types for new struct delegation definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using libc types and headers from the UAPI headers is problematic as it introduces a dependency on a full C toolchain. Use the fixed-width integer types provided by the UAPI headers instead. Fixes: 1602bad16d7d ("vfs: expose delegation support to userland") Fixes: 4be9e04ebf75 ("vfs: add needed headers for new struct delegation definition") Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251203-uapi-fcntl-v1-1-490c67bf3425@linutronix.de Acked-by: Arnd Bergmann Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 5e277fd955aa..aadfbf6e0cb3 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -4,11 +4,7 @@ #include #include -#ifdef __KERNEL__ #include -#else -#include -#endif #define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0) #define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1) @@ -90,9 +86,9 @@ /* Argument structure for F_GETDELEG and F_SETDELEG */ struct delegation { - uint32_t d_flags; /* Must be 0 */ - uint16_t d_type; /* F_RDLCK, F_WRLCK, F_UNLCK */ - uint16_t __pad; /* Must be 0 */ + __u32 d_flags; /* Must be 0 */ + __u16 d_type; /* F_RDLCK, F_WRLCK, F_UNLCK */ + __u16 __pad; /* Must be 0 */ }; /* -- cgit v1.2.3 From 7bfe3b8ea6e30437e01fcb8e4f56ef6e4d986d0f Mon Sep 17 00:00:00 2001 From: Naman Jain Date: Thu, 13 Nov 2025 04:41:49 +0000 Subject: Drivers: hv: Introduce mshv_vtl driver Provide an interface for Virtual Machine Monitor like OpenVMM and its use as OpenHCL paravisor to control VTL0 (Virtual trust Level). Expose devices and support IOCTLs for features like VTL creation, VTL0 memory management, context switch, making hypercalls, mapping VTL0 address space to VTL2 userspace, getting new VMBus messages and channel events in VTL2 etc. Co-developed-by: Roman Kisel Signed-off-by: Roman Kisel Co-developed-by: Saurabh Sengar Signed-off-by: Saurabh Sengar Reviewed-by: Michael Kelley Signed-off-by: Naman Jain Signed-off-by: Wei Liu --- include/uapi/linux/mshv.h | 80 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h index b645d17cc531..dee3ece28ce5 100644 --- a/include/uapi/linux/mshv.h +++ b/include/uapi/linux/mshv.h @@ -322,4 +322,84 @@ struct mshv_get_set_vp_state { * #define MSHV_ROOT_HVCALL _IOWR(MSHV_IOCTL, 0x07, struct mshv_root_hvcall) */ +/* Structure definitions, macros and IOCTLs for mshv_vtl */ + +#define MSHV_CAP_CORE_API_STABLE 0x0 +#define MSHV_CAP_REGISTER_PAGE 0x1 +#define MSHV_CAP_VTL_RETURN_ACTION 0x2 +#define MSHV_CAP_DR6_SHARED 0x3 +#define MSHV_MAX_RUN_MSG_SIZE 256 + +struct mshv_vp_registers { + __u32 count; /* supports only 1 register at a time */ + __u32 reserved; /* Reserved for alignment or future use */ + __u64 regs_ptr; /* pointer to struct hv_register_assoc */ +}; + +struct mshv_vtl_set_eventfd { + __s32 fd; + __u32 flag; +}; + +struct mshv_vtl_signal_event { + __u32 connection_id; + __u32 flag; +}; + +struct mshv_vtl_sint_post_msg { + __u64 message_type; + __u32 connection_id; + __u32 payload_size; /* Must not exceed HV_MESSAGE_PAYLOAD_BYTE_COUNT */ + __u64 payload_ptr; /* pointer to message payload (bytes) */ +}; + +struct mshv_vtl_ram_disposition { + __u64 start_pfn; + __u64 last_pfn; +}; + +struct mshv_vtl_set_poll_file { + __u32 cpu; + __u32 fd; +}; + +struct mshv_vtl_hvcall_setup { + __u64 bitmap_array_size; /* stores number of bytes */ + __u64 allow_bitmap_ptr; +}; + +struct mshv_vtl_hvcall { + __u64 control; /* Hypercall control code */ + __u64 input_size; /* Size of the input data */ + __u64 input_ptr; /* Pointer to the input struct */ + __u64 status; /* Status of the hypercall (output) */ + __u64 output_size; /* Size of the output data */ + __u64 output_ptr; /* Pointer to the output struct */ +}; + +struct mshv_sint_mask { + __u8 mask; + __u8 reserved[7]; +}; + +/* /dev/mshv device IOCTL */ +#define MSHV_CHECK_EXTENSION _IOW(MSHV_IOCTL, 0x00, __u32) + +/* vtl device */ +#define MSHV_CREATE_VTL _IOR(MSHV_IOCTL, 0x1D, char) +#define MSHV_ADD_VTL0_MEMORY _IOW(MSHV_IOCTL, 0x21, struct mshv_vtl_ram_disposition) +#define MSHV_SET_POLL_FILE _IOW(MSHV_IOCTL, 0x25, struct mshv_vtl_set_poll_file) +#define MSHV_RETURN_TO_LOWER_VTL _IO(MSHV_IOCTL, 0x27) +#define MSHV_GET_VP_REGISTERS _IOWR(MSHV_IOCTL, 0x05, struct mshv_vp_registers) +#define MSHV_SET_VP_REGISTERS _IOW(MSHV_IOCTL, 0x06, struct mshv_vp_registers) + +/* VMBus device IOCTLs */ +#define MSHV_SINT_SIGNAL_EVENT _IOW(MSHV_IOCTL, 0x22, struct mshv_vtl_signal_event) +#define MSHV_SINT_POST_MESSAGE _IOW(MSHV_IOCTL, 0x23, struct mshv_vtl_sint_post_msg) +#define MSHV_SINT_SET_EVENTFD _IOW(MSHV_IOCTL, 0x24, struct mshv_vtl_set_eventfd) +#define MSHV_SINT_PAUSE_MESSAGE_STREAM _IOW(MSHV_IOCTL, 0x25, struct mshv_sint_mask) + +/* hv_hvcall device */ +#define MSHV_HVCALL_SETUP _IOW(MSHV_IOCTL, 0x1E, struct mshv_vtl_hvcall_setup) +#define MSHV_HVCALL _IOWR(MSHV_IOCTL, 0x1F, struct mshv_vtl_hvcall) #endif -- cgit v1.2.3 From 9a97857db0c5655b8932f86b5d18bb959079b0ee Mon Sep 17 00:00:00 2001 From: Andres J Rosa Date: Wed, 3 Dec 2025 10:25:01 -0600 Subject: ALSA: uapi: Fix typo in asound.h comment Fix 'level-shit' to 'level-shift' in struct snd_cea_861_aud_if comment. Fixes: 7ba1c40b536e ("ALSA: Add definitions for CEA-861 Audio InfoFrames") Signed-off-by: Andres J Rosa Link: https://patch.msgid.link/20251203162509.1822-1-andyrosa@gmail.com Signed-off-by: Takashi Iwai --- include/uapi/sound/asound.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 5a049eeaecce..d3ce75ba938a 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -60,7 +60,7 @@ struct snd_cea_861_aud_if { unsigned char db2_sf_ss; /* sample frequency and size */ unsigned char db3; /* not used, all zeros */ unsigned char db4_ca; /* channel allocation code */ - unsigned char db5_dminh_lsv; /* downmix inhibit & level-shit values */ + unsigned char db5_dminh_lsv; /* downmix inhibit & level-shift values */ }; /**************************************************************************** -- cgit v1.2.3