From e228e7d382fa85005ee2ebf303e1bf194aca49a8 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 25 Aug 2025 09:22:09 +0000 Subject: drm/gpuvm: fix various typos in .c and .h gpuvm file After working with this code for a while, I came across several typos. This patch fixes them. Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20250825-gpuvm-typo-fix-v1-1-14e9e78e28e6@google.com Signed-off-by: Danilo Krummrich --- include/drm/drm_gpuvm.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 274532facfd6..2e7088264355 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -103,7 +103,7 @@ struct drm_gpuva { } va; /** - * @gem: structure containing the &drm_gem_object and it's offset + * @gem: structure containing the &drm_gem_object and its offset */ struct { /** @@ -843,7 +843,7 @@ struct drm_gpuva_op_map { } va; /** - * @gem: structure containing the &drm_gem_object and it's offset + * @gem: structure containing the &drm_gem_object and its offset */ struct { /** @@ -1189,11 +1189,11 @@ struct drm_gpuvm_ops { /** * @sm_step_unmap: called from &drm_gpuvm_sm_map and - * &drm_gpuvm_sm_unmap to unmap an existent mapping + * &drm_gpuvm_sm_unmap to unmap an existing mapping * - * This callback is called when existent mapping needs to be unmapped. + * This callback is called when existing mapping needs to be unmapped. * This is the case when either a newly requested mapping encloses an - * existent mapping or an unmap of an existent mapping is requested. + * existing mapping or an unmap of an existing mapping is requested. * * The &priv pointer matches the one the driver passed to * &drm_gpuvm_sm_map or &drm_gpuvm_sm_unmap, respectively. -- cgit v1.2.3 From 6310c149e5dede74bb47110e0d7a38c78772c152 Mon Sep 17 00:00:00 2001 From: Brian Mak Date: Tue, 5 Aug 2025 14:15:26 -0700 Subject: kexec: add KEXEC_FILE_NO_CMA as a legal flag Commit 07d24902977e ("kexec: enable CMA based contiguous allocation") introduces logic to use CMA-based allocation in kexec by default. As part of the changes, it introduces a kexec_file_load flag to disable the use of CMA allocations from userspace. However, this flag is broken since it is missing from the list of legal flags for kexec_file_load. kexec_file_load returns EINVAL when attempting to use the flag. Fix this by adding the KEXEC_FILE_NO_CMA flag to the list of legal flags for kexec_file_load. Without this fix, kexec_file_load syscall will failed and return '-EINVAL' when KEXEC_FILE_NO_CMA is specified. Link: https://lkml.kernel.org/r/20250805211527.122367-2-makb@juniper.net Fixes: 07d24902977e ("kexec: enable CMA based contiguous allocation") Signed-off-by: Brian Mak Acked-by: Baoquan He Cc: Alexander Graf Cc: Borislav Betkov Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Rob Herring Cc: Saravana Kannan Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- include/linux/kexec.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 1b10a5d84b68..39fe3e6cd282 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -460,7 +460,8 @@ bool kexec_load_permitted(int kexec_image_type); /* List of defined/legal kexec file flags */ #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ - KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG) + KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG | \ + KEXEC_FILE_NO_CMA) /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; -- cgit v1.2.3 From 7cc183f2e67d19b03ee5c13a6664b8c6cc37ff9d Mon Sep 17 00:00:00 2001 From: Harry Yoo Date: Mon, 18 Aug 2025 11:02:04 +0900 Subject: mm: move page table sync declarations to linux/pgtable.h During our internal testing, we started observing intermittent boot failures when the machine uses 4-level paging and has a large amount of persistent memory: BUG: unable to handle page fault for address: ffffe70000000034 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] SMP NOPTI RIP: 0010:__init_single_page+0x9/0x6d Call Trace: __init_zone_device_page+0x17/0x5d memmap_init_zone_device+0x154/0x1bb pagemap_range+0x2e0/0x40f memremap_pages+0x10b/0x2f0 devm_memremap_pages+0x1e/0x60 dev_dax_probe+0xce/0x2ec [device_dax] dax_bus_probe+0x6d/0xc9 [... snip ...] It turns out that the kernel panics while initializing vmemmap (struct page array) when the vmemmap region spans two PGD entries, because the new PGD entry is only installed in init_mm.pgd, but not in the page tables of other tasks. And looking at __populate_section_memmap(): if (vmemmap_can_optimize(altmap, pgmap)) // does not sync top level page tables r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap); else // sync top level page tables in x86 r = vmemmap_populate(start, end, nid, altmap); In the normal path, vmemmap_populate() in arch/x86/mm/init_64.c synchronizes the top level page table (See commit 9b861528a801 ("x86-64, mem: Update all PGDs for direct mapping and vmemmap mapping changes")) so that all tasks in the system can see the new vmemmap area. However, when vmemmap_can_optimize() returns true, the optimized path skips synchronization of top-level page tables. This is because vmemmap_populate_compound_pages() is implemented in core MM code, which does not handle synchronization of the top-level page tables. Instead, the core MM has historically relied on each architecture to perform this synchronization manually. We're not the first party to encounter a crash caused by not-sync'd top level page tables: earlier this year, Gwan-gyeong Mun attempted to address the issue [1] [2] after hitting a kernel panic when x86 code accessed the vmemmap area before the corresponding top-level entries were synced. At that time, the issue was believed to be triggered only when struct page was enlarged for debugging purposes, and the patch did not get further updates. It turns out that current approach of relying on each arch to handle the page table sync manually is fragile because 1) it's easy to forget to sync the top level page table, and 2) it's also easy to overlook that the kernel should not access the vmemmap and direct mapping areas before the sync. # The solution: Make page table sync more code robust and harder to miss To address this, Dave Hansen suggested [3] [4] introducing {pgd,p4d}_populate_kernel() for updating kernel portion of the page tables and allow each architecture to explicitly perform synchronization when installing top-level entries. With this approach, we no longer need to worry about missing the sync step, reducing the risk of future regressions. The new interface reuses existing ARCH_PAGE_TABLE_SYNC_MASK, PGTBL_P*D_MODIFIED and arch_sync_kernel_mappings() facility used by vmalloc and ioremap to synchronize page tables. pgd_populate_kernel() looks like this: static inline void pgd_populate_kernel(unsigned long addr, pgd_t *pgd, p4d_t *p4d) { pgd_populate(&init_mm, pgd, p4d); if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED) arch_sync_kernel_mappings(addr, addr); } It is worth noting that vmalloc() and apply_to_range() carefully synchronizes page tables by calling p*d_alloc_track() and arch_sync_kernel_mappings(), and thus they are not affected by this patch series. This series was hugely inspired by Dave Hansen's suggestion and hence added Suggested-by: Dave Hansen. Cc stable because lack of this series opens the door to intermittent boot failures. This patch (of 3): Move ARCH_PAGE_TABLE_SYNC_MASK and arch_sync_kernel_mappings() to linux/pgtable.h so that they can be used outside of vmalloc and ioremap. Link: https://lkml.kernel.org/r/20250818020206.4517-1-harry.yoo@oracle.com Link: https://lkml.kernel.org/r/20250818020206.4517-2-harry.yoo@oracle.com Link: https://lore.kernel.org/linux-mm/20250220064105.808339-1-gwan-gyeong.mun@intel.com [1] Link: https://lore.kernel.org/linux-mm/20250311114420.240341-1-gwan-gyeong.mun@intel.com [2] Link: https://lore.kernel.org/linux-mm/d1da214c-53d3-45ac-a8b6-51821c5416e4@intel.com [3] Link: https://lore.kernel.org/linux-mm/4d800744-7b88-41aa-9979-b245e8bf794b@intel.com [4] Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges") Signed-off-by: Harry Yoo Acked-by: Kiryl Shutsemau Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: "Uladzislau Rezki (Sony)" Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Cc: Alexander Potapenko Cc: Alistair Popple Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: bibo mao Cc: Borislav Betkov Cc: Christoph Lameter (Ampere) Cc: Dennis Zhou Cc: Dev Jain Cc: Dmitriy Vyukov Cc: Gwan-gyeong Mun Cc: Ingo Molnar Cc: Jane Chu Cc: Joao Martins Cc: Joerg Roedel Cc: John Hubbard Cc: Kevin Brodsky Cc: Liam Howlett Cc: Michal Hocko Cc: Oscar Salvador Cc: Peter Xu Cc: Peter Zijlstra Cc: Qi Zheng Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Huth Cc: Vincenzo Frascino Cc: Vlastimil Babka Cc: Dave Hansen Cc: Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 16 ++++++++++++++++ include/linux/vmalloc.h | 16 ---------------- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 4c035637eeb7..ba699df6ef69 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1467,6 +1467,22 @@ static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned } #endif +/* + * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values + * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings() + * needs to be called. + */ +#ifndef ARCH_PAGE_TABLE_SYNC_MASK +#define ARCH_PAGE_TABLE_SYNC_MASK 0 +#endif + +/* + * There is no default implementation for arch_sync_kernel_mappings(). It is + * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK + * is 0. + */ +void arch_sync_kernel_mappings(unsigned long start, unsigned long end); + #endif /* CONFIG_MMU */ /* diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index fdc9aeb74a44..2759dac6be44 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -219,22 +219,6 @@ extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift); -/* - * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values - * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings() - * needs to be called. - */ -#ifndef ARCH_PAGE_TABLE_SYNC_MASK -#define ARCH_PAGE_TABLE_SYNC_MASK 0 -#endif - -/* - * There is no default implementation for arch_sync_kernel_mappings(). It is - * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK - * is 0. - */ -void arch_sync_kernel_mappings(unsigned long start, unsigned long end); - /* * Lowlevel-APIs (not for driver use!) */ -- cgit v1.2.3 From f2d2f9598ebb0158a3fe17cda0106d7752e654a2 Mon Sep 17 00:00:00 2001 From: Harry Yoo Date: Mon, 18 Aug 2025 11:02:05 +0900 Subject: mm: introduce and use {pgd,p4d}_populate_kernel() Introduce and use {pgd,p4d}_populate_kernel() in core MM code when populating PGD and P4D entries for the kernel address space. These helpers ensure proper synchronization of page tables when updating the kernel portion of top-level page tables. Until now, the kernel has relied on each architecture to handle synchronization of top-level page tables in an ad-hoc manner. For example, see commit 9b861528a801 ("x86-64, mem: Update all PGDs for direct mapping and vmemmap mapping changes"). However, this approach has proven fragile for following reasons: 1) It is easy to forget to perform the necessary page table synchronization when introducing new changes. For instance, commit 4917f55b4ef9 ("mm/sparse-vmemmap: improve memory savings for compound devmaps") overlooked the need to synchronize page tables for the vmemmap area. 2) It is also easy to overlook that the vmemmap and direct mapping areas must not be accessed before explicit page table synchronization. For example, commit 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges")) caused crashes by accessing the vmemmap area before calling sync_global_pgds(). To address this, as suggested by Dave Hansen, introduce _kernel() variants of the page table population helpers, which invoke architecture-specific hooks to properly synchronize page tables. These are introduced in a new header file, include/linux/pgalloc.h, so they can be called from common code. They reuse existing infrastructure for vmalloc and ioremap. Synchronization requirements are determined by ARCH_PAGE_TABLE_SYNC_MASK, and the actual synchronization is performed by arch_sync_kernel_mappings(). This change currently targets only x86_64, so only PGD and P4D level helpers are introduced. Currently, these helpers are no-ops since no architecture sets PGTBL_{PGD,P4D}_MODIFIED in ARCH_PAGE_TABLE_SYNC_MASK. In theory, PUD and PMD level helpers can be added later if needed by other architectures. For now, 32-bit architectures (x86-32 and arm) only handle PGTBL_PMD_MODIFIED, so p*d_populate_kernel() will never affect them unless we introduce a PMD level helper. [harry.yoo@oracle.com: fix KASAN build error due to p*d_populate_kernel()] Link: https://lkml.kernel.org/r/20250822020727.202749-1-harry.yoo@oracle.com Link: https://lkml.kernel.org/r/20250818020206.4517-3-harry.yoo@oracle.com Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges") Signed-off-by: Harry Yoo Suggested-by: Dave Hansen Acked-by: Kiryl Shutsemau Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Cc: Alexander Potapenko Cc: Alistair Popple Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: bibo mao Cc: Borislav Betkov Cc: Christoph Lameter (Ampere) Cc: Dennis Zhou Cc: Dev Jain Cc: Dmitriy Vyukov Cc: Gwan-gyeong Mun Cc: Ingo Molnar Cc: Jane Chu Cc: Joao Martins Cc: Joerg Roedel Cc: John Hubbard Cc: Kevin Brodsky Cc: Liam Howlett Cc: Michal Hocko Cc: Oscar Salvador Cc: Peter Xu Cc: Peter Zijlstra Cc: Qi Zheng Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Huth Cc: "Uladzislau Rezki (Sony)" Cc: Vincenzo Frascino Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/pgalloc.h | 29 +++++++++++++++++++++++++++++ include/linux/pgtable.h | 13 +++++++------ 2 files changed, 36 insertions(+), 6 deletions(-) create mode 100644 include/linux/pgalloc.h (limited to 'include') diff --git a/include/linux/pgalloc.h b/include/linux/pgalloc.h new file mode 100644 index 000000000000..9174fa59bbc5 --- /dev/null +++ b/include/linux/pgalloc.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PGALLOC_H +#define _LINUX_PGALLOC_H + +#include +#include + +/* + * {pgd,p4d}_populate_kernel() are defined as macros to allow + * compile-time optimization based on the configured page table levels. + * Without this, linking may fail because callers (e.g., KASAN) may rely + * on calls to these functions being optimized away when passing symbols + * that exist only for certain page table levels. + */ +#define pgd_populate_kernel(addr, pgd, p4d) \ + do { \ + pgd_populate(&init_mm, pgd, p4d); \ + if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED) \ + arch_sync_kernel_mappings(addr, addr); \ + } while (0) + +#define p4d_populate_kernel(addr, p4d, pud) \ + do { \ + p4d_populate(&init_mm, p4d, pud); \ + if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_P4D_MODIFIED) \ + arch_sync_kernel_mappings(addr, addr); \ + } while (0) + +#endif /* _LINUX_PGALLOC_H */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index ba699df6ef69..2b80fd456c8b 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1469,8 +1469,8 @@ static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned /* * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values - * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings() - * needs to be called. + * and let generic vmalloc, ioremap and page table update code know when + * arch_sync_kernel_mappings() needs to be called. */ #ifndef ARCH_PAGE_TABLE_SYNC_MASK #define ARCH_PAGE_TABLE_SYNC_MASK 0 @@ -1954,10 +1954,11 @@ static inline bool arch_has_pfn_modify_check(void) /* * Page Table Modification bits for pgtbl_mod_mask. * - * These are used by the p?d_alloc_track*() set of functions an in the generic - * vmalloc/ioremap code to track at which page-table levels entries have been - * modified. Based on that the code can better decide when vmalloc and ioremap - * mapping changes need to be synchronized to other page-tables in the system. + * These are used by the p?d_alloc_track*() and p*d_populate_kernel() + * functions in the generic vmalloc, ioremap and page table update code + * to track at which page-table levels entries have been modified. + * Based on that the code can better decide when page table changes need + * to be synchronized to other page-tables in the system. */ #define __PGTBL_PGD_MODIFIED 0 #define __PGTBL_P4D_MODIFIED 1 -- cgit v1.2.3 From 4beb44a2d62dddfe450f310aa1a950901731cb3a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 31 Aug 2025 18:34:33 +0100 Subject: net: phy: add phy_interface_weight() Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1uslwn-00000001SOx-0a7H@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 4c2b8b6e7187..bb45787d8684 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -169,6 +169,11 @@ static inline bool phy_interface_empty(const unsigned long *intf) return bitmap_empty(intf, PHY_INTERFACE_MODE_MAX); } +static inline unsigned int phy_interface_weight(const unsigned long *intf) +{ + return bitmap_weight(intf, PHY_INTERFACE_MODE_MAX); +} + static inline void phy_interface_and(unsigned long *dst, const unsigned long *a, const unsigned long *b) { -- cgit v1.2.3 From 5d6b58c932ec451a5c41482790eb5b1ecf165a94 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Sep 2025 18:36:03 +0000 Subject: net: lockless sock_i_ino() Followup of commit c51da3f7a161 ("net: remove sock_i_uid()") A recent syzbot report was the trigger for this change. Over the years, we had many problems caused by the read_lock[_bh](&sk->sk_callback_lock) in sock_i_uid(). We could fix smc_diag_dump_proto() or make a more radical move: Instead of waiting for new syzbot reports, cache the socket inode number in sk->sk_ino, so that we no longer need to acquire sk->sk_callback_lock in sock_i_ino(). This makes socket dumps faster (one less cache line miss, and two atomic ops avoided). Prior art: commit 25a9c8a4431c ("netlink: Add __sock_i_ino() for __netlink_diag_dump().") commit 4f9bf2a2f5aa ("tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH.") commit efc3dbc37412 ("rds: Make rds_sock_lock BH rather than IRQ safe.") Fixes: d2d6422f8bd1 ("x86: Allow to enable PREEMPT_RT.") Reported-by: syzbot+50603c05bbdf4dfdaffa@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68b73804.050a0220.3db4df.01d8.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250902183603.740428-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index c8a4b283df6f..fb13322a11fc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -285,6 +285,7 @@ struct sk_filter; * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_uid: user id of owner + * @sk_ino: inode number (zero if orphaned) * @sk_prefer_busy_poll: prefer busypolling over softirq processing * @sk_busy_poll_budget: napi processing budget when busypolling * @sk_priority: %SO_PRIORITY setting @@ -518,6 +519,7 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; + unsigned long sk_ino; spinlock_t sk_peer_lock; int sk_bind_phc; struct pid *sk_peer_pid; @@ -2056,6 +2058,10 @@ static inline int sk_rx_queue_get(const struct sock *sk) static inline void sk_set_socket(struct sock *sk, struct socket *sock) { sk->sk_socket = sock; + if (sock) { + WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid); + WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino); + } } static inline wait_queue_head_t *sk_sleep(struct sock *sk) @@ -2077,6 +2083,7 @@ static inline void sock_orphan(struct sock *sk) sk_set_socket(sk, NULL); sk->sk_wq = NULL; /* Note: sk_uid is unchanged. */ + WRITE_ONCE(sk->sk_ino, 0); write_unlock_bh(&sk->sk_callback_lock); } @@ -2087,20 +2094,22 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) rcu_assign_pointer(sk->sk_wq, &parent->wq); parent->sk = sk; sk_set_socket(sk, parent); - WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid); security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } +static inline unsigned long sock_i_ino(const struct sock *sk) +{ + /* Paired with WRITE_ONCE() in sock_graft() and sock_orphan() */ + return READ_ONCE(sk->sk_ino); +} + static inline kuid_t sk_uid(const struct sock *sk) { /* Paired with WRITE_ONCE() in sockfs_setattr() */ return READ_ONCE(sk->sk_uid); } -unsigned long __sock_i_ino(struct sock *sk); -unsigned long sock_i_ino(struct sock *sk); - static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) { return sk ? sk_uid(sk) : make_kuid(net->user_ns, 0); -- cgit v1.2.3 From 4039ce7ef40474d5ba46f414c50cc7020b9cf8ae Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 7 Aug 2025 15:49:59 +0200 Subject: netfilter: nf_tables: Introduce NFTA_DEVICE_PREFIX This new attribute is supposed to be used instead of NFTA_DEVICE_NAME for simple wildcard interface specs. It holds a NUL-terminated string representing an interface name prefix to match on. While kernel code to distinguish full names from prefixes in NFTA_DEVICE_NAME is simpler than this solution, reusing the existing attribute with different semantics leads to confusion between different versions of kernel and user space though: * With old kernels, wildcards submitted by user space are accepted yet silently treated as regular names. * With old user space, wildcards submitted by kernel may cause crashes since libnftnl expects NUL-termination when there is none. Using a distinct attribute type sanitizes these situations as the receiving part detects and rejects the unexpected attribute nested in *_HOOK_DEVS attributes. Fixes: 6d07a289504a ("netfilter: nf_tables: Support wildcard netdev hook specs") Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 2beb30be2c5f..8e0eb832bc01 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1784,10 +1784,12 @@ enum nft_synproxy_attributes { * enum nft_device_attributes - nf_tables device netlink attributes * * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) + * @NFTA_DEVICE_PREFIX: device name prefix, a simple wildcard (NLA_STRING) */ enum nft_devices_attributes { NFTA_DEVICE_UNSPEC, NFTA_DEVICE_NAME, + NFTA_DEVICE_PREFIX, __NFTA_DEVICE_MAX }; #define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) -- cgit v1.2.3