From 49b393af3130c7712c7e8f215f4126c9a8060fa6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 4 Jun 2025 10:21:38 +0200 Subject: perf: Add comment to enum perf_event_state Better describe the event states. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Leo Yan Link: https://lkml.kernel.org/r/20250604135801.GK38114@noisy.programming.kicks-ass.net --- include/linux/perf_event.h | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 52dc7cfab0e0..ec9d96025683 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -635,8 +635,46 @@ struct perf_addr_filter_range { unsigned long size; }; -/** - * enum perf_event_state - the states of an event: +/* + * The normal states are: + * + * ACTIVE --. + * ^ | + * | | + * sched_{in,out}() | + * | | + * v | + * ,---> INACTIVE --+ <-. + * | | | + * | {dis,en}able() + * sched_in() | | + * | OFF <--' --+ + * | | + * `---> ERROR ------' + * + * That is: + * + * sched_in: INACTIVE -> {ACTIVE,ERROR} + * sched_out: ACTIVE -> INACTIVE + * disable: {ACTIVE,INACTIVE} -> OFF + * enable: {OFF,ERROR} -> INACTIVE + * + * Where {OFF,ERROR} are disabled states. + * + * Then we have the {EXIT,REVOKED,DEAD} states which are various shades of + * defunct events: + * + * - EXIT means task that the even was assigned to died, but child events + * still live, and further children can still be created. But the event + * itself will never be active again. It can only transition to + * {REVOKED,DEAD}; + * + * - REVOKED means the PMU the event was associated with is gone; all + * functionality is stopped but the event is still alive. Can only + * transition to DEAD; + * + * - DEAD event really is DYING tearing down state and freeing bits. + * */ enum perf_event_state { PERF_EVENT_STATE_DEAD = -5, -- cgit v1.2.3 From e6ed54e86aae9e4f7286ce8d5c73780f91b48d1c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 3 Jun 2025 16:12:39 -0400 Subject: Bluetooth: MGMT: Fix UAF on mgmt_remove_adv_monitor_complete This reworks MGMT_OP_REMOVE_ADV_MONITOR to not use mgmt_pending_add to avoid crashes like bellow: ================================================================== BUG: KASAN: slab-use-after-free in mgmt_remove_adv_monitor_complete+0xe5/0x540 net/bluetooth/mgmt.c:5406 Read of size 8 at addr ffff88801c53f318 by task kworker/u5:5/5341 CPU: 0 UID: 0 PID: 5341 Comm: kworker/u5:5 Not tainted 6.15.0-syzkaller-10402-g4cb6c8af8591 #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Workqueue: hci0 hci_cmd_sync_work Call Trace: dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xd2/0x2b0 mm/kasan/report.c:521 kasan_report+0x118/0x150 mm/kasan/report.c:634 mgmt_remove_adv_monitor_complete+0xe5/0x540 net/bluetooth/mgmt.c:5406 hci_cmd_sync_work+0x261/0x3a0 net/bluetooth/hci_sync.c:334 process_one_work kernel/workqueue.c:3238 [inline] process_scheduled_works+0xade/0x17b0 kernel/workqueue.c:3321 worker_thread+0x8a0/0xda0 kernel/workqueue.c:3402 kthread+0x711/0x8a0 kernel/kthread.c:464 ret_from_fork+0x3fc/0x770 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Allocated by task 5987: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __kmalloc_cache_noprof+0x230/0x3d0 mm/slub.c:4358 kmalloc_noprof include/linux/slab.h:905 [inline] kzalloc_noprof include/linux/slab.h:1039 [inline] mgmt_pending_new+0x65/0x240 net/bluetooth/mgmt_util.c:252 mgmt_pending_add+0x34/0x120 net/bluetooth/mgmt_util.c:279 remove_adv_monitor+0x103/0x1b0 net/bluetooth/mgmt.c:5454 hci_mgmt_cmd+0x9c9/0xef0 net/bluetooth/hci_sock.c:1719 hci_sock_sendmsg+0x6ca/0xef0 net/bluetooth/hci_sock.c:1839 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x219/0x270 net/socket.c:727 sock_write_iter+0x258/0x330 net/socket.c:1131 new_sync_write fs/read_write.c:593 [inline] vfs_write+0x548/0xa90 fs/read_write.c:686 ksys_write+0x145/0x250 fs/read_write.c:738 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 5989: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:576 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x62/0x70 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2380 [inline] slab_free mm/slub.c:4642 [inline] kfree+0x18e/0x440 mm/slub.c:4841 mgmt_pending_foreach+0xc9/0x120 net/bluetooth/mgmt_util.c:242 mgmt_index_removed+0x10d/0x2f0 net/bluetooth/mgmt.c:9366 hci_sock_bind+0xbe9/0x1000 net/bluetooth/hci_sock.c:1314 __sys_bind_socket net/socket.c:1810 [inline] __sys_bind+0x2c3/0x3e0 net/socket.c:1841 __do_sys_bind net/socket.c:1846 [inline] __se_sys_bind net/socket.c:1844 [inline] __x64_sys_bind+0x7a/0x90 net/socket.c:1844 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 66bd095ab5d4 ("Bluetooth: advmon offload MSFT remove monitor") Closes: https://syzkaller.appspot.com/bug?extid=feb0dc579bbe30a13190 Reported-by: syzbot+feb0dc579bbe30a13190@syzkaller.appspotmail.com Tested-by: syzbot+feb0dc579bbe30a13190@syzkaller.appspotmail.com Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2b261e74e2c4..93fcb659f0d4 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -2400,7 +2400,6 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, u8 instance); -void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, bdaddr_t *bdaddr, u8 addr_type); -- cgit v1.2.3 From 6fe26f694c824b8a4dbf50c635bee1302e3f099c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 20 May 2025 15:42:21 -0400 Subject: Bluetooth: MGMT: Protect mgmt_pending list with its own lock This uses a mutex to protect from concurrent access of mgmt_pending list which can cause crashes like: ================================================================== BUG: KASAN: slab-use-after-free in hci_sock_get_channel+0x60/0x68 net/bluetooth/hci_sock.c:91 Read of size 2 at addr ffff0000c48885b2 by task syz.4.334/7318 CPU: 0 UID: 0 PID: 7318 Comm: syz.4.334 Not tainted 6.15.0-rc7-syzkaller-g187899f4124a #0 PREEMPT Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call trace: show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:466 (C) __dump_stack+0x30/0x40 lib/dump_stack.c:94 dump_stack_lvl+0xd8/0x12c lib/dump_stack.c:120 print_address_description+0xa8/0x254 mm/kasan/report.c:408 print_report+0x68/0x84 mm/kasan/report.c:521 kasan_report+0xb0/0x110 mm/kasan/report.c:634 __asan_report_load2_noabort+0x20/0x2c mm/kasan/report_generic.c:379 hci_sock_get_channel+0x60/0x68 net/bluetooth/hci_sock.c:91 mgmt_pending_find+0x7c/0x140 net/bluetooth/mgmt_util.c:223 pending_find net/bluetooth/mgmt.c:947 [inline] remove_adv_monitor+0x44/0x1a4 net/bluetooth/mgmt.c:5445 hci_mgmt_cmd+0x780/0xc00 net/bluetooth/hci_sock.c:1712 hci_sock_sendmsg+0x544/0xbb0 net/bluetooth/hci_sock.c:1832 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg net/socket.c:727 [inline] sock_write_iter+0x25c/0x378 net/socket.c:1131 new_sync_write fs/read_write.c:591 [inline] vfs_write+0x62c/0x97c fs/read_write.c:684 ksys_write+0x120/0x210 fs/read_write.c:736 __do_sys_write fs/read_write.c:747 [inline] __se_sys_write fs/read_write.c:744 [inline] __arm64_sys_write+0x7c/0x90 fs/read_write.c:744 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x58/0x17c arch/arm64/kernel/entry-common.c:767 el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:786 el0t_64_sync+0x198/0x19c arch/arm64/kernel/entry.S:600 Allocated by task 7037: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_alloc_info+0x44/0x54 mm/kasan/generic.c:562 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x9c/0xb4 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __do_kmalloc_node mm/slub.c:4327 [inline] __kmalloc_noprof+0x2fc/0x4c8 mm/slub.c:4339 kmalloc_noprof include/linux/slab.h:909 [inline] sk_prot_alloc+0xc4/0x1f0 net/core/sock.c:2198 sk_alloc+0x44/0x3ac net/core/sock.c:2254 bt_sock_alloc+0x4c/0x300 net/bluetooth/af_bluetooth.c:148 hci_sock_create+0xa8/0x194 net/bluetooth/hci_sock.c:2202 bt_sock_create+0x14c/0x24c net/bluetooth/af_bluetooth.c:132 __sock_create+0x43c/0x91c net/socket.c:1541 sock_create net/socket.c:1599 [inline] __sys_socket_create net/socket.c:1636 [inline] __sys_socket+0xd4/0x1c0 net/socket.c:1683 __do_sys_socket net/socket.c:1697 [inline] __se_sys_socket net/socket.c:1695 [inline] __arm64_sys_socket+0x7c/0x94 net/socket.c:1695 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x58/0x17c arch/arm64/kernel/entry-common.c:767 el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:786 el0t_64_sync+0x198/0x19c arch/arm64/kernel/entry.S:600 Freed by task 6607: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_free_info+0x58/0x70 mm/kasan/generic.c:576 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x68/0x88 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2380 [inline] slab_free mm/slub.c:4642 [inline] kfree+0x17c/0x474 mm/slub.c:4841 sk_prot_free net/core/sock.c:2237 [inline] __sk_destruct+0x4f4/0x760 net/core/sock.c:2332 sk_destruct net/core/sock.c:2360 [inline] __sk_free+0x320/0x430 net/core/sock.c:2371 sk_free+0x60/0xc8 net/core/sock.c:2382 sock_put include/net/sock.h:1944 [inline] mgmt_pending_free+0x88/0x118 net/bluetooth/mgmt_util.c:290 mgmt_pending_remove+0xec/0x104 net/bluetooth/mgmt_util.c:298 mgmt_set_powered_complete+0x418/0x5cc net/bluetooth/mgmt.c:1355 hci_cmd_sync_work+0x204/0x33c net/bluetooth/hci_sync.c:334 process_one_work+0x7e8/0x156c kernel/workqueue.c:3238 process_scheduled_works kernel/workqueue.c:3319 [inline] worker_thread+0x958/0xed8 kernel/workqueue.c:3400 kthread+0x5fc/0x75c kernel/kthread.c:464 ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:847 Fixes: a380b6cff1a2 ("Bluetooth: Add generic mgmt helper API") Closes: https://syzkaller.appspot.com/bug?extid=0a7039d5d9986ff4ecec Closes: https://syzkaller.appspot.com/bug?extid=cc0cc52e7f43dc9e6df1 Reported-by: syzbot+0a7039d5d9986ff4ecec@syzkaller.appspotmail.com Tested-by: syzbot+0a7039d5d9986ff4ecec@syzkaller.appspotmail.com Tested-by: syzbot+cc0cc52e7f43dc9e6df1@syzkaller.appspotmail.com Signed-off-by: Dmitry Antipov Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 93fcb659f0d4..f7b1a9eb9543 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -546,6 +546,7 @@ struct hci_dev { struct hci_conn_hash conn_hash; struct list_head mesh_pending; + struct mutex mgmt_pending_lock; struct list_head mgmt_pending; struct list_head reject_list; struct list_head accept_list; -- cgit v1.2.3 From 11fcf368506d347088e613edf6cd2604d70c454f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 6 Jun 2025 10:23:57 +0200 Subject: uapi: bitops: use UAPI-safe variant of BITS_PER_LONG again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1e7933a575ed ("uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"") did not take in account that the usage of BITS_PER_LONG in __GENMASK() was changed to __BITS_PER_LONG for UAPI-safety in commit 3c7a8e190bc5 ("uapi: introduce uapi-friendly macros for GENMASK"). BITS_PER_LONG can not be used in UAPI headers as it derives from the kernel configuration and not from the current compiler invocation. When building compat userspace code or a compat vDSO its value will be incorrect. Switch back to __BITS_PER_LONG. Fixes: 1e7933a575ed ("uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Signed-off-by: Yury Norov [NVIDIA] --- include/uapi/linux/bits.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) -- cgit v1.2.3 From 4c529a4a7260776bb4abe264498857b4537aa70d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 7 Jun 2025 14:22:56 +0200 Subject: x86/smp: PM/hibernate: Split arch_resume_nosmt() Move the inner part of the arch_resume_nosmt() code into a separate function called arch_cpu_rescan_dead_smt_siblings(), so it can be used in other places where "dead" SMT siblings may need to be taken online and offline again in order to get into deep idle states. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Dave Hansen Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/3361688.44csPzL39Z@rjwysocki.net [ rjw: Prevent build issues with CONFIG_SMP unset ] Signed-off-by: Rafael J. Wysocki --- include/linux/cpu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e6089abc28e2..96a3a0d6a60e 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -120,6 +120,7 @@ extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); int bringup_hibernate_cpu(unsigned int sleep_cpu); void bringup_nonboot_cpus(unsigned int max_cpus); +int arch_cpu_rescan_dead_smt_siblings(void); #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 @@ -134,6 +135,8 @@ static inline void cpu_maps_update_done(void) static inline int add_cpu(unsigned int cpu) { return 0;} +static inline int arch_cpu_rescan_dead_smt_siblings(void) { return 0; } + #endif /* CONFIG_SMP */ extern const struct bus_type cpu_subsys; -- cgit v1.2.3 From 33877220b8641b4cde474a4229ea92c0e3637883 Mon Sep 17 00:00:00 2001 From: Tasos Sahanidis Date: Mon, 19 May 2025 11:56:55 +0300 Subject: ata: libata-acpi: Do not assume 40 wire cable if no devices are enabled On at least an ASRock 990FX Extreme 4 with a VIA VT6330, the devices have not yet been enabled by the first time ata_acpi_cbl_80wire() is called. This means that the ata_for_each_dev loop is never entered, and a 40 wire cable is assumed. The VIA controller on this board does not report the cable in the PCI config space, thus having to fall back to ACPI even though no SATA bridge is present. The _GTM values are correctly reported by the firmware through ACPI, which has already set up faster transfer modes, but due to the above the controller is forced down to a maximum of UDMA/33. Resolve this by modifying ata_acpi_cbl_80wire() to directly return the cable type. First, an unknown cable is assumed which preserves the mode set by the firmware, and then on subsequent calls when the devices have been enabled, an 80 wire cable is correctly detected. Since the function now directly returns the cable type, it is renamed to ata_acpi_cbl_pata_type(). Signed-off-by: Tasos Sahanidis Link: https://lore.kernel.org/r/20250519085945.1399466-1-tasos@tasossah.com Signed-off-by: Niklas Cassel --- include/linux/libata.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 31be45fd47a6..1e5aec839041 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1352,7 +1352,7 @@ int ata_acpi_stm(struct ata_port *ap, const struct ata_acpi_gtm *stm); int ata_acpi_gtm(struct ata_port *ap, struct ata_acpi_gtm *stm); unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, const struct ata_acpi_gtm *gtm); -int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm); +int ata_acpi_cbl_pata_type(struct ata_port *ap); #else static inline const struct ata_acpi_gtm *ata_acpi_init_gtm(struct ata_port *ap) { @@ -1377,10 +1377,9 @@ static inline unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, return 0; } -static inline int ata_acpi_cbl_80wire(struct ata_port *ap, - const struct ata_acpi_gtm *gtm) +static inline int ata_acpi_cbl_pata_type(struct ata_port *ap) { - return 0; + return ATA_CBL_PATA40; } #endif -- cgit v1.2.3 From 2660a544fdc0940bba15f70508a46cf9a6491230 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 9 Jun 2025 19:08:03 +0200 Subject: net: Fix TOCTOU issue in sk_is_readable() sk->sk_prot->sock_is_readable is a valid function pointer when sk resides in a sockmap. After the last sk_psock_put() (which usually happens when socket is removed from sockmap), sk->sk_prot gets restored and sk->sk_prot->sock_is_readable becomes NULL. This makes sk_is_readable() racy, if the value of sk->sk_prot is reloaded after the initial check. Which in turn may lead to a null pointer dereference. Ensure the function pointer does not turn NULL after the check. Fixes: 8934ce2fd081 ("bpf: sockmap redirect ingress support") Suggested-by: Jakub Sitnicki Signed-off-by: Michal Luczaj Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250609-skisreadable-toctou-v1-1-d0dfb2d62c37@rbox.co Signed-off-by: Jakub Kicinski --- include/net/sock.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 92e7c1aae3cc..4c37015b7cf7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -3010,8 +3010,11 @@ int sock_ioctl_inout(struct sock *sk, unsigned int cmd, int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); static inline bool sk_is_readable(struct sock *sk) { - if (sk->sk_prot->sock_is_readable) - return sk->sk_prot->sock_is_readable(sk); + const struct proto *prot = READ_ONCE(sk->sk_prot); + + if (prot->sock_is_readable) + return prot->sock_is_readable(sk); + return false; } #endif /* _SOCK_H */ -- cgit v1.2.3 From 0b0cae7119a0ec9449d7261b5e672a5fed765068 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 3 Jun 2025 14:14:43 +0300 Subject: x86/its: move its_pages array to struct mod_arch_specific The of pages with ITS thunks allocated for modules are tracked by an array in 'struct module'. Since this is very architecture specific data structure, move it to 'struct mod_arch_specific'. No functional changes. Fixes: 872df34d7c51 ("x86/its: Use dynamic thunks for indirect branches") Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250603111446.2609381-4-rppt@kernel.org --- include/linux/module.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 92e1420fccdf..5faa1fb1f4b4 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -586,11 +586,6 @@ struct module { atomic_t refcnt; #endif -#ifdef CONFIG_MITIGATION_ITS - int its_num_pages; - void **its_page_array; -#endif - #ifdef CONFIG_CONSTRUCTORS /* Constructor functions. */ ctor_fn_t *ctors; -- cgit v1.2.3 From 7cd9a11dd0c3d1dd225795ed1b5b53132888e7b5 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 3 Jun 2025 14:14:45 +0300 Subject: Revert "mm/execmem: Unify early execmem_cache behaviour" The commit d6d1e3e6580c ("mm/execmem: Unify early execmem_cache behaviour") changed early behaviour of execemem ROX cache to allow its usage in early x86 code that allocates text pages when CONFIG_MITGATION_ITS is enabled. The permission management of the pages allocated from execmem for ITS mitigation is now completely contained in arch/x86/kernel/alternatives.c and therefore there is no need to special case early allocations in execmem. This reverts commit d6d1e3e6580ca35071ad474381f053cbf1fb6414. Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250603111446.2609381-6-rppt@kernel.org --- include/linux/execmem.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/execmem.h b/include/linux/execmem.h index ca42d5e46ccc..3be35680a54f 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -54,7 +54,7 @@ enum execmem_range_flags { EXECMEM_ROX_CACHE = (1 << 1), }; -#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM) +#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX /** * execmem_fill_trapping_insns - set memory to contain instructions that * will trap @@ -94,15 +94,9 @@ int execmem_make_temp_rw(void *ptr, size_t size); * Return: 0 on success or negative error code on failure. */ int execmem_restore_rox(void *ptr, size_t size); - -/* - * Called from mark_readonly(), where the system transitions to ROX. - */ -void execmem_cache_make_ro(void); #else static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } -static inline void execmem_cache_make_ro(void) { } #endif /** -- cgit v1.2.3 From 6bdd3a01fe4627ad7a562ba38eb759eba715b671 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Jun 2025 16:00:20 +0200 Subject: fs: add missing values to TRACE_IOCB_STRINGS Make sure all values are covered. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250610140020.2227932-1-hch@lst.de Signed-off-by: Christian Brauner --- include/linux/fs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..d27c402f1162 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -399,7 +399,9 @@ struct readahead_control; { IOCB_WAITQ, "WAITQ" }, \ { IOCB_NOIO, "NOIO" }, \ { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \ - { IOCB_DIO_CALLER_COMP, "CALLER_COMP" } + { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }, \ + { IOCB_AIO_RW, "AIO_RW" }, \ + { IOCB_HAS_METADATA, "AIO_HAS_METADATA" } struct kiocb { struct file *ki_filp; -- cgit v1.2.3 From 488ef3560196ee10fc1c5547e1574a87068c3494 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 28 May 2025 13:18:17 +0100 Subject: KEYS: Invert FINAL_PUT bit Invert the FINAL_PUT bit so that test_bit_acquire and clear_bit_unlock can be used instead of smp_mb. Signed-off-by: Herbert Xu Signed-off-by: David Howells Reviewed-by: Jarkko Sakkinen Signed-off-by: Linus Torvalds --- include/linux/key.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/key.h b/include/linux/key.h index ba05de8579ec..81b8f05c6898 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -236,7 +236,7 @@ struct key { #define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ #define KEY_FLAG_KEEP 8 /* set if key should not be removed */ #define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ -#define KEY_FLAG_FINAL_PUT 10 /* set if final put has happened on key */ +#define KEY_FLAG_USER_ALIVE 10 /* set if final put has not happened on key yet */ /* the key type and key description string * - the desc is used to match a key against search criteria -- cgit v1.2.3 From 5842c01a9ed1d515c8ba2d6d3733eac78ace89c1 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 6 Jun 2025 14:32:49 -0400 Subject: Bluetooth: ISO: Fix not using bc_sid as advertisement SID Currently bc_sid is being ignore when acting as Broadcast Source role, so this fix it by passing the bc_sid and then use it when programming the PA: < HCI Command: LE Set Exte.. (0x08|0x0036) plen 25 Handle: 0x01 Properties: 0x0000 Min advertising interval: 140.000 msec (0x00e0) Max advertising interval: 140.000 msec (0x00e0) Channel map: 37, 38, 39 (0x07) Own address type: Random (0x01) Peer address type: Public (0x00) Peer address: 00:00:00:00:00:00 (OUI 00-00-00) Filter policy: Allow Scan Request from Any, Allow Connect Request from Any (0x00) TX power: Host has no preference (0x7f) Primary PHY: LE 1M (0x01) Secondary max skip: 0x00 Secondary PHY: LE 2M (0x02) SID: 0x01 Scan request notifications: Disabled (0x00) Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 9 ++++++--- include/net/bluetooth/hci_sync.h | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f7b1a9eb9543..a760f05fa3fb 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -242,6 +242,7 @@ struct adv_info { __u8 mesh; __u8 instance; __u8 handle; + __u8 sid; __u32 flags; __u16 timeout; __u16 remaining_time; @@ -1551,13 +1552,14 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, u16 timeout); struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos); -struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, +struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid, struct bt_iso_qos *qos, __u8 base_len, __u8 *base); struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos); struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, - __u8 dst_type, struct bt_iso_qos *qos, + __u8 dst_type, __u8 sid, + struct bt_iso_qos *qos, __u8 data_len, __u8 *data); struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos); @@ -1832,6 +1834,7 @@ int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, void hci_adv_instances_clear(struct hci_dev *hdev); struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance); +struct adv_info *hci_find_adv_sid(struct hci_dev *hdev, u8 sid); struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance); struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, u16 adv_data_len, u8 *adv_data, @@ -1839,7 +1842,7 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u16 timeout, u16 duration, s8 tx_power, u32 min_interval, u32 max_interval, u8 mesh_handle); -struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, +struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, u8 sid, u32 flags, u8 data_len, u8 *data, u32 min_interval, u32 max_interval); int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance, diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 72558c826aa1..5224f57f6af2 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -115,8 +115,8 @@ int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance); int hci_enable_advertising_sync(struct hci_dev *hdev); int hci_enable_advertising(struct hci_dev *hdev); -int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, - u8 *data, u32 flags, u16 min_interval, +int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 sid, + u8 data_len, u8 *data, u32 flags, u16 min_interval, u16 max_interval, u16 sync_interval); int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance); -- cgit v1.2.3 From 331843c845d15413e9d89fd91cdcfa6912e291c3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Jun 2025 17:23:37 -0700 Subject: scatterlist: fix extraneous '@'-sign kernel-doc notation Using "@argname@" in kernel-doc produces "argname****" (with "argname" in bold) in the generated html output, so use the expected kernel-doc notation of just "@argname" instead. "Fixes:" lines are added in case Matthew's patch [1] is backported. Link: https://lkml.kernel.org/r/20250605002337.2842659-1-rdunlap@infradead.org Link: https://lore.kernel.org/linux-doc/3bc4e779-7a79-42c1-8867-024f643a22fc@infradead.org/T/#m5d2bd9d21fb34f297aa4e7db069f09bc27b89007 [1] Fixes: 0db9299f48eb ("SG: Move functions to lib/scatterlist.c and add sg chaining allocator helpers") Fixes: 8d1d4b538bb1 ("scatterlist: inline sg_next()") Fixes: 18dabf473e15 ("Change table chaining layout") Signed-off-by: Randy Dunlap Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/scatterlist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 0cdbfc42f153..6f8a4965f9b9 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -99,7 +99,7 @@ static inline bool sg_is_last(struct scatterlist *sg) * @sg: The current sg entry * * Description: - * Usually the next entry will be @sg@ + 1, but if this sg element is part + * Usually the next entry will be @sg + 1, but if this sg element is part * of a chained scatterlist, it could jump to the start of a new * scatterlist array. * @@ -254,7 +254,7 @@ static inline void __sg_chain(struct scatterlist *chain_sg, * @sgl: Second scatterlist * * Description: - * Links @prv@ and @sgl@ together, to form a longer scatterlist. + * Links @prv and @sgl together, to form a longer scatterlist. * **/ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, -- cgit v1.2.3 From adcaa890c7a4a91a422168d8fb629183fff07b2f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 11:15:15 +0000 Subject: net_sched: remove qdisc_tree_flush_backlog() This function is no longer used after the four prior fixes. Given all prior uses were wrong, it seems better to remove it. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250611111515.1983366-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 629368ab2787..638948be4c50 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -973,14 +973,6 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen, *backlog = qstats.backlog; } -static inline void qdisc_tree_flush_backlog(struct Qdisc *sch) -{ - __u32 qlen, backlog; - - qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); - qdisc_tree_reduce_backlog(sch, qlen, backlog); -} - static inline void qdisc_purge_queue(struct Qdisc *sch) { __u32 qlen, backlog; -- cgit v1.2.3 From bb666b7c27073b986b75699e51a7102910f58060 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 9 Jun 2025 17:57:49 +0100 Subject: mm: add mmap_prepare() compatibility layer for nested file systems Nested file systems, that is those which invoke call_mmap() within their own f_op->mmap() handlers, may encounter underlying file systems which provide the f_op->mmap_prepare() hook introduced by commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"). We have a chicken-and-egg scenario here - until all file systems are converted to using .mmap_prepare(), we cannot convert these nested handlers, as we can't call f_op->mmap from an .mmap_prepare() hook. So we have to do it the other way round - invoke the .mmap_prepare() hook from an .mmap() one. in order to do so, we need to convert VMA state into a struct vm_area_desc descriptor, invoking the underlying file system's f_op->mmap_prepare() callback passing a pointer to this, and then setting VMA state accordingly and safely. This patch achieves this via the compat_vma_mmap_prepare() function, which we invoke from call_mmap() if f_op->mmap_prepare() is specified in the passed in file pointer. We place the fundamental logic into mm/vma.h where VMA manipulation belongs. We also update the VMA userland tests to accommodate the changes. The compat_vma_mmap_prepare() function and its associated machinery is temporary, and will be removed once the conversion of file systems is complete. We carefully place this code so it can be used with CONFIG_MMU and also with cutting edge nommu silicon. [akpm@linux-foundation.org: export compat_vma_mmap_prepare tp fix build] [lorenzo.stoakes@oracle.com: remove unused declarations] Link: https://lkml.kernel.org/r/ac3ae324-4c65-432a-8c6d-2af988b18ac8@lucifer.local Link: https://lkml.kernel.org/r/20250609165749.344976-1-lorenzo.stoakes@oracle.com Fixes: c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"). Signed-off-by: Lorenzo Stoakes Reported-by: Jann Horn Closes: https://lore.kernel.org/linux-mm/CAG48ez04yOEVx1ekzOChARDDBZzAKwet8PEoPM4Ln3_rk91AzQ@mail.gmail.com/ Reviewed-by: Pedro Falcato Reviewed-by: Vlastimil Babka Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Cc: Jann Horn Cc: Liam Howlett Signed-off-by: Andrew Morton --- include/linux/fs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..4ec77da65f14 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2274,10 +2274,12 @@ static inline bool file_has_valid_mmap_hooks(struct file *file) return true; } +int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma); + static inline int call_mmap(struct file *file, struct vm_area_struct *vma) { - if (WARN_ON_ONCE(file->f_op->mmap_prepare)) - return -EINVAL; + if (file->f_op->mmap_prepare) + return compat_vma_mmap_prepare(file, vma); return file->f_op->mmap(file, vma); } -- cgit v1.2.3 From ac90aad0e9bf7c37e706fdc08ce763a553890bdf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 12 Jun 2025 10:47:09 -0700 Subject: crypto: testmgr - reinstate kconfig control over full self-tests Commit 698de822780f ("crypto: testmgr - make it easier to enable the full set of tests") removed support for building kernels that run only the "fast" set of crypto self-tests by default. This assumed that nearly everyone actually wanted the full set of tests, *if* they had already chosen to enable the tests at all. Unfortunately, it turns out that both Debian and Fedora intentionally have the crypto self-tests enabled in their production kernels. And for production kernels we do need to keep the testing time down, which implies just running the "fast" tests, not the full set of tests. For Fedora, a reason for enabling the tests in production is that they are being (mis)used to meet the FIPS 140-3 pre-operational testing requirement. However, the other reason for enabling the tests in production, which applies to both distros, is that they provide some value in protecting users from buggy drivers. Unfortunately, the crypto/ subsystem has many buggy and untested drivers for off-CPU hardware accelerators on rare platforms. These broken drivers get shipped to users, and there have been multiple examples of the tests preventing these buggy drivers from being used. So effectively, the tests are being relied on in production kernels. I think this is kind of crazy (untested drivers should just not be enabled at all), but that seems to be how things work currently. Thus, reintroduce a kconfig option that controls the level of testing. Call it CRYPTO_SELFTESTS_FULL instead of the original name CRYPTO_MANAGER_EXTRA_TESTS, which was slightly misleading. Moreover, given the "production kernel" use case, make CRYPTO_SELFTESTS depend on EXPERT instead of DEBUG_KERNEL. I also haven't reinstated all the #ifdefs in crypto/testmgr.c. Instead, just rely on the compiler to optimize out unused code. Fixes: 40b9969796bf ("crypto: testmgr - replace CRYPTO_MANAGER_DISABLE_TESTS with CRYPTO_SELFTESTS") Fixes: 698de822780f ("crypto: testmgr - make it easier to enable the full set of tests") Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/internal/simd.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h index 7e7f1ac3b7fd..9e338e7aafbd 100644 --- a/include/crypto/internal/simd.h +++ b/include/crypto/internal/simd.h @@ -44,9 +44,11 @@ void simd_unregister_aeads(struct aead_alg *algs, int count, * * This delegates to may_use_simd(), except that this also returns false if SIMD * in crypto code has been temporarily disabled on this CPU by the crypto - * self-tests, in order to test the no-SIMD fallback code. + * self-tests, in order to test the no-SIMD fallback code. This override is + * currently limited to configurations where the "full" self-tests are enabled, + * because it might be a bit too invasive to be part of the "fast" self-tests. */ -#ifdef CONFIG_CRYPTO_SELFTESTS +#ifdef CONFIG_CRYPTO_SELFTESTS_FULL DECLARE_PER_CPU(bool, crypto_simd_disabled_for_test); #define crypto_simd_usable() \ (may_use_simd() && !this_cpu_read(crypto_simd_disabled_for_test)) -- cgit v1.2.3 From f826ec7966a63d48e16e0868af4e038bf9a1a3ae Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Jun 2025 15:41:25 +0100 Subject: bio: Fix bio_first_folio() for SPARSEMEM without VMEMMAP It is possible for physically contiguous folios to have discontiguous struct pages if SPARSEMEM is enabled and SPARSEMEM_VMEMMAP is not. This is correctly handled by folio_page_idx(), so remove this open-coded implementation. Fixes: 640d1930bef4 (block: Add bio_for_each_folio_all()) Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20250612144126.2849931-1-willy@infradead.org Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 9c37c66ef9ca..46ffac5caab7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -291,7 +291,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, fi->folio = page_folio(bvec->bv_page); fi->offset = bvec->bv_offset + - PAGE_SIZE * (bvec->bv_page - &fi->folio->page); + PAGE_SIZE * folio_page_idx(fi->folio, bvec->bv_page); fi->_seg_count = bvec->bv_len; fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); fi->_next = folio_next(fi->folio); -- cgit v1.2.3 From 5e223e06ee7c6d8f630041a0645ac90e39a42cc6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Jun 2025 15:42:53 +0100 Subject: block: Fix bvec_set_folio() for very large folios Similarly to 26064d3e2b4d ("block: fix adding folio to bio"), if we attempt to add a folio that is larger than 4GB, we'll silently truncate the offset and len. Widen the parameters to size_t, assert that the length is less than 4GB and set the first page that contains the interesting data rather than the first page of the folio. Fixes: 26db5ee15851 (block: add a bvec_set_folio helper) Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20250612144255.2850278-1-willy@infradead.org Signed-off-by: Jens Axboe --- include/linux/bvec.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 204b22a99c4b..0a80e1f9aa20 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -57,9 +57,12 @@ static inline void bvec_set_page(struct bio_vec *bv, struct page *page, * @offset: offset into the folio */ static inline void bvec_set_folio(struct bio_vec *bv, struct folio *folio, - unsigned int len, unsigned int offset) + size_t len, size_t offset) { - bvec_set_page(bv, &folio->page, len, offset); + unsigned long nr = offset / PAGE_SIZE; + + WARN_ON_ONCE(len > UINT_MAX); + bvec_set_page(bv, folio_page(folio, nr), len, offset % PAGE_SIZE); } /** -- cgit v1.2.3 From 594902c986e269660302f09df9ec4bf1cf017b77 Mon Sep 17 00:00:00 2001 From: Qinyun Tan Date: Sat, 31 May 2025 02:20:53 +0800 Subject: x86,fs/resctrl: Remove inappropriate references to cacheinfo in the resctrl subsystem In the resctrl subsystem's Sub-NUMA Cluster (SNC) mode, the rdt_mon_domain structure representing a NUMA node relies on the cacheinfo interface (rdt_mon_domain::ci) to store L3 cache information (e.g., shared_cpu_map) for monitoring. The L3 cache information of a SNC NUMA node determines which domains are summed for the "top level" L3-scoped events. rdt_mon_domain::ci is initialized using the first online CPU of a NUMA node. When this CPU goes offline, its shared_cpu_map is cleared to contain only the offline CPU itself. Subsequently, attempting to read counters via smp_call_on_cpu(offline_cpu) fails (and error ignored), returning zero values for "top-level events" without any error indication. Replace the cacheinfo references in struct rdt_mon_domain and struct rmid_read with the cacheinfo ID (a unique identifier for the L3 cache). rdt_domain_hdr::cpu_mask contains the online CPUs associated with that domain. When reading "top-level events", select a CPU from rdt_domain_hdr::cpu_mask and utilize its L3 shared_cpu_map to determine valid CPUs for reading RMID counter via the MSR interface. Considering all CPUs associated with the L3 cache improves the chances of picking a housekeeping CPU on which the counter reading work can be queued, avoiding an unnecessary IPI. Fixes: 328ea68874642 ("x86/resctrl: Prepare for new Sub-NUMA Cluster (SNC) monitor files") Signed-off-by: Qinyun Tan Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Tony Luck Link: https://lore.kernel.org/20250530182053.37502-2-qinyuntan@linux.alibaba.com --- include/linux/resctrl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 9ba771f2ddea..6fb4894b8cfd 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -159,7 +159,7 @@ struct rdt_ctrl_domain { /** * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource * @hdr: common header for different domain types - * @ci: cache info for this domain + * @ci_id: cache info id for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold * @mbm_total: saved state for MBM total bandwidth * @mbm_local: saved state for MBM local bandwidth @@ -170,7 +170,7 @@ struct rdt_ctrl_domain { */ struct rdt_mon_domain { struct rdt_domain_hdr hdr; - struct cacheinfo *ci; + unsigned int ci_id; unsigned long *rmid_busy_llc; struct mbm_state *mbm_total; struct mbm_state *mbm_local; -- cgit v1.2.3 From a85b8544d46390469b6ca72d6bfd3ecb7be985ff Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 14 Jun 2025 00:30:37 +0200 Subject: wifi: remove zero-length arrays All of these are really meant to be variable-length, and in the case of s1g_beacon it's actually accessed. Make that one in particular, and a couple of others (that aren't used as arrays now), actually variable. Reported-by: syzbot+fd222bb38e916df26fa4@syzkaller.appspotmail.com Fixes: 1e1f706fc2ce ("wifi: cfg80211/mac80211: correctly parse S1G beacon optional elements") Link: https://patch.msgid.link/20250614003037.a3e82e882251.I2e8b58e56ff2a9f8b06c66f036578b7c1d4e4685@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index ce377f7fb912..22f39e5e2ff1 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1278,7 +1278,7 @@ struct ieee80211_ext { u8 sa[ETH_ALEN]; __le32 timestamp; u8 change_seq; - u8 variable[0]; + u8 variable[]; } __packed s1g_beacon; } u; } __packed __aligned(2); @@ -1536,7 +1536,7 @@ struct ieee80211_mgmt { u8 action_code; u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed tdls_discover_resp; struct { u8 action_code; @@ -1721,35 +1721,35 @@ struct ieee80211_tdls_data { struct { u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed setup_req; struct { __le16 status_code; u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed setup_resp; struct { __le16 status_code; u8 dialog_token; - u8 variable[0]; + u8 variable[]; } __packed setup_cfm; struct { __le16 reason_code; - u8 variable[0]; + u8 variable[]; } __packed teardown; struct { u8 dialog_token; - u8 variable[0]; + u8 variable[]; } __packed discover_req; struct { u8 target_channel; u8 oper_class; - u8 variable[0]; + u8 variable[]; } __packed chan_switch_req; struct { __le16 status_code; - u8 variable[0]; + u8 variable[]; } __packed chan_switch_resp; } u; } __packed; -- cgit v1.2.3 From fc92099902fbf21000554678a47654b029c15a4d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Jun 2025 12:36:06 -0300 Subject: tools headers: Synchronize linux/bits.h with the kernel sources To pick up the changes in this cset: 1e7933a575ed8af4 ("uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"") 5b572e8a9f3dcd6e ("bits: introduce fixed-type BIT_U*()") 19408200c094858d ("bits: introduce fixed-type GENMASK_U*()") 31299a5e02112411 ("bits: add comments and newlines to #if, #else and #endif directives") This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/linux/bits.h include/linux/bits.h Please see tools/include/uapi/README for further details. Acked-by: Vincent Mailhol Cc: I Hsin Cheng Cc: Yury Norov Cc: Adrian Hunter Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Lucas De Marchi Cc: Namhyung Kim Cc: Yury Norov Link: https://lore.kernel.org/r/aEr0ZJ60EbshEy6p@x1 Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/bits.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index a04afef9efca..682b406e1067 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) -- cgit v1.2.3 From 7851263998d4269125fd6cb3fdbfc7c6db853859 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 16 Jun 2025 11:21:15 -0700 Subject: atm: Revert atm_account_tx() if copy_from_iter_full() fails. In vcc_sendmsg(), we account skb->truesize to sk->sk_wmem_alloc by atm_account_tx(). It is expected to be reverted by atm_pop_raw() later called by vcc->dev->ops->send(vcc, skb). However, vcc_sendmsg() misses the same revert when copy_from_iter_full() fails, and then we will leak a socket. Let's factorise the revert part as atm_return_tx() and call it in the failure path. Note that the corresponding sk_wmem_alloc operation can be found in alloc_tx() as of the blamed commit. $ git blame -L:alloc_tx net/atm/common.c c55fa3cccbc2c~ Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Simon Horman Closes: https://lore.kernel.org/netdev/20250614161959.GR414686@horms.kernel.org/ Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250616182147.963333-3-kuni1840@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/atmdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 9b02961d65ee..45f2f278b50a 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -249,6 +249,12 @@ static inline void atm_account_tx(struct atm_vcc *vcc, struct sk_buff *skb) ATM_SKB(skb)->atm_options = vcc->atm_options; } +static inline void atm_return_tx(struct atm_vcc *vcc, struct sk_buff *skb) +{ + WARN_ON_ONCE(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, + &sk_atm(vcc)->sk_wmem_alloc)); +} + static inline void atm_force_charge(struct atm_vcc *vcc,int truesize) { atomic_add(truesize, &sk_atm(vcc)->sk_rmem_alloc); -- cgit v1.2.3 From 30b58444807c93bffeaba7d776110f2a909d2f9a Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 17 Jun 2025 13:40:56 +0800 Subject: erofs: remove unused trace event erofs_destroy_inode The trace event `erofs_destroy_inode` was added but remains unused. This unused event contributes approximately 5KB to the kernel module size. Reported-by: Steven Rostedt Closes: https://lore.kernel.org/r/20250612224906.15000244@batman.local.home Fixes: 13f06f48f7bf ("staging: erofs: support tracepoint") Cc: stable@vger.kernel.org Reviewed-by: Hongbo Li Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20250617054056.3232365-1-hsiangkao@linux.alibaba.com --- include/trace/events/erofs.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index a5f4b9234f46..dad7360f42f9 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -211,24 +211,6 @@ TRACE_EVENT(erofs_map_blocks_exit, show_mflags(__entry->mflags), __entry->ret) ); -TRACE_EVENT(erofs_destroy_inode, - TP_PROTO(struct inode *inode), - - TP_ARGS(inode), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( erofs_nid_t, nid ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->nid = EROFS_I(inode)->nid; - ), - - TP_printk("dev = (%d,%d), nid = %llu", show_dev_nid(__entry)) -); - #endif /* _TRACE_EROFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 635e118317ffa773f6d25ec6a71b7927d7e8886a Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 18 Jun 2025 10:54:44 +0200 Subject: Revert "mtd: core: always create master device" The idea behind this patch was to always let a "master" mtd device available to anchor runtime PM. Historically, there was no mtd device representing the whole storage as soon as partitions were coming into play. The introduction of CONFIG_MTD_PARTITIONED_MASTER allowed to keep this "master" device, but was not enabled by default to avoid breaking existing users (otherwise the mtd device numbering would be totally messed up with an off by 1, at least). The approach of adding an mtd_master class on top of partitioned mtd devices is breaking the mtd core in many creative ways, so better think again this approach and revert the faulty changes for now. This reverts commit 0aa7b390fc40a871267a2328bbbefca8b37ad307. Fixes: 0aa7b390fc40 ("mtd: core: always create master device") Tested-by: Guenter Roeck Acked-by: Guenter Roeck Signed-off-by: Miquel Raynal --- include/linux/mtd/partitions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index 5daf80df9e89..b74a539ec581 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -108,7 +108,7 @@ extern void deregister_mtd_parser(struct mtd_part_parser *parser); deregister_mtd_parser) int mtd_add_partition(struct mtd_info *master, const char *name, - long long offset, long long length, struct mtd_info **part); + long long offset, long long length); int mtd_del_partition(struct mtd_info *master, int partno); uint64_t mtd_get_device_size(const struct mtd_info *mtd); -- cgit v1.2.3 From dba90f5a79c13936de4273a19e67908a0c296afe Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 18 Jun 2025 10:48:00 +0200 Subject: mtd: spinand: winbond: Prevent unsupported frequencies on dual/quad I/O variants Dual and quad capable chips natively support dual and quad I/O variants at up to 104MHz (1-2-2 and 1-4-4 operations). Reaching the maximum speed of 166MHz is theoretically possible (while still unsupported in the field) by adding a few more dummy cycles. Let's be accurate and clearly state this limit. Setting a maximum frequency implies adding the frequency parameter to the macro, which is done using a variadic argument to avoid impacting all the other drivers which already make use of this macro. Fixes: 1ea808b4d15b ("mtd: spinand: winbond: Update the *JW chip definitions") Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 811a0f356315..15eaa09da998 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -113,11 +113,12 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len, ...) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ SPI_MEM_OP_ADDR(2, addr, 2), \ SPI_MEM_OP_DUMMY(ndummy, 2), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) + SPI_MEM_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) #define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_2S_2S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ @@ -151,11 +152,12 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len, ...) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(2, addr, 4), \ SPI_MEM_OP_DUMMY(ndummy, 4), \ - SPI_MEM_OP_DATA_IN(len, buf, 4)) + SPI_MEM_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) #define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_4S_4S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ -- cgit v1.2.3 From c6d732c38f93c4aebd204a5656583142289c3a2e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 17 Jun 2025 13:22:40 -0700 Subject: net: ethtool: remove duplicate defines for family info Commit under fixes switched to uAPI generation from the YAML spec. A number of custom defines were left behind, mostly for commands very hard to express in YAML spec. Among what was left behind was the name and version of the generic netlink family. Problem is that the codegen always outputs those values so we ended up with a duplicated, differently named set of defines. Provide naming info in YAML and remove the incorrect defines. Fixes: 8d0580c6ebdd ("ethtool: regenerate uapi header from the spec") Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250617202240.811179-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 4 ---- include/uapi/linux/ethtool_netlink_generated.h | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 9ff72cfb2e98..09a75bdb6560 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -208,10 +208,6 @@ enum { ETHTOOL_A_STATS_PHY_MAX = (__ETHTOOL_A_STATS_PHY_CNT - 1) }; -/* generic netlink info */ -#define ETHTOOL_GENL_NAME "ethtool" -#define ETHTOOL_GENL_VERSION 1 - #define ETHTOOL_MCGRP_MONITOR_NAME "monitor" #endif /* _UAPI_LINUX_ETHTOOL_NETLINK_H_ */ diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 9a02f579de22..aa8ab5227c1e 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -6,8 +6,8 @@ #ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H #define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H -#define ETHTOOL_FAMILY_NAME "ethtool" -#define ETHTOOL_FAMILY_VERSION 1 +#define ETHTOOL_GENL_NAME "ethtool" +#define ETHTOOL_GENL_VERSION 1 enum { ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, -- cgit v1.2.3 From cf207eac06f661fb692f405d5ab8230df884ee52 Mon Sep 17 00:00:00 2001 From: Binbin Wu Date: Tue, 10 Jun 2025 10:14:20 +0800 Subject: KVM: TDX: Handle TDG.VP.VMCALL Handle TDVMCALL for GetQuote to generate a TD-Quote. GetQuote is a doorbell-like interface used by TDX guests to request VMM to generate a TD-Quote signed by a service hosting TD-Quoting Enclave operating on the host. A TDX guest passes a TD Report (TDREPORT_STRUCT) in a shared-memory area as parameter. Host VMM can access it and queue the operation for a service hosting TD-Quoting enclave. When completed, the Quote is returned via the same shared-memory area. KVM only checks the GPA from the TDX guest has the shared-bit set and drops the shared-bit before exiting to userspace to avoid bleeding the shared-bit into KVM's exit ABI. KVM forwards the request to userspace VMM (e.g. QEMU) and userspace VMM queues the operation asynchronously. KVM sets the return code according to the 'ret' field set by userspace to notify the TDX guest whether the request has been queued successfully or not. When the request has been queued successfully, the TDX guest can poll the status field in the shared-memory area to check whether the Quote generation is completed or not. When completed, the generated Quote is returned via the same buffer. Add KVM_EXIT_TDX as a new exit reason to userspace. Userspace is required to handle the KVM exit reason as the initial support for TDX, by reentering KVM to ensure that the TDVMCALL is complete. While at it, add a note that KVM_EXIT_HYPERCALL also requires reentry with KVM_RUN. Signed-off-by: Binbin Wu Tested-by: Mikko Ylinen Acked-by: Kai Huang [Adjust userspace API. - Paolo] Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d00b85cb168c..e23e7286ad1a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -178,6 +178,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_TDX 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -447,6 +448,22 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + __u64 ret; + __u64 data[5]; + } unknown; + struct { + __u64 ret; + __u64 gpa; + __u64 size; + } get_quote; + }; + } tdx; /* Fix the size of the union. */ char padding[256]; }; -- cgit v1.2.3 From 25e8b1dd4883e6c251c3db5b347f3c8ae4ade921 Mon Sep 17 00:00:00 2001 From: Binbin Wu Date: Tue, 10 Jun 2025 10:14:21 +0800 Subject: KVM: TDX: Exit to userspace for GetTdVmCallInfo Exit to userspace for TDG.VP.VMCALL via KVM_EXIT_TDX, to allow userspace to provide information about the support of TDVMCALLs when r12 is 1 for the TDVMCALLs beyond the GHCI base API. GHCI spec defines the GHCI base TDVMCALLs: , , , , <#VE.RequestMMIO>, , , and . They must be supported by VMM to support TDX guests. For GetTdVmCallInfo - When leaf (r12) to enumerate TDVMCALL functionality is set to 0, successful execution indicates all GHCI base TDVMCALLs listed above are supported. Update the KVM TDX document with the set of the GHCI base APIs. - When leaf (r12) to enumerate TDVMCALL functionality is set to 1, it indicates the TDX guest is querying the supported TDVMCALLs beyond the GHCI base TDVMCALLs. Exit to userspace to let userspace set the TDVMCALL sub-function bit(s) accordingly to the leaf outputs. KVM could set the TDVMCALL bit(s) supported by itself when the TDVMCALLs don't need support from userspace after returning from userspace and before entering guest. Currently, no such TDVMCALLs implemented, KVM just sets the values returned from userspace. Suggested-by: Paolo Bonzini Signed-off-by: Binbin Wu [Adjust userspace API. - Paolo] Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e23e7286ad1a..37891580d05d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -462,6 +462,11 @@ struct kvm_run { __u64 gpa; __u64 size; } get_quote; + struct { + __u64 ret; + __u64 leaf; + __u64 r11, r12, r13, r14; + } get_tdvmcall_info; }; } tdx; /* Fix the size of the union. */ -- cgit v1.2.3