From 09d88791c7cd888d5195c84733caf9183dcfbd16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 20 Sep 2024 14:56:24 +0200 Subject: bpf: Make sure internal and UAPI bpf_redirect flags don't overlap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bpf_redirect_info is shared between the SKB and XDP redirect paths, and the two paths use the same numeric flag values in the ri->flags field (specifically, BPF_F_BROADCAST == BPF_F_NEXTHOP). This means that if skb bpf_redirect_neigh() is used with a non-NULL params argument and, subsequently, an XDP redirect is performed using the same bpf_redirect_info struct, the XDP path will get confused and end up crashing, which syzbot managed to trigger. With the stack-allocated bpf_redirect_info, the structure is no longer shared between the SKB and XDP paths, so the crash doesn't happen anymore. However, different code paths using identically-numbered flag values in the same struct field still seems like a bit of a mess, so this patch cleans that up by moving the flag definitions together and redefining the three flags in BPF_F_REDIRECT_INTERNAL to not overlap with the flags used for XDP. It also adds a BUILD_BUG_ON() check to make sure the overlap is not re-introduced by mistake. Fixes: e624d4ed4aa8 ("xdp: Extend xdp_redirect_map with broadcast support") Reported-by: syzbot+cca39e6e84a367a7e6f6@syzkaller.appspotmail.com Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Daniel Borkmann Closes: https://syzkaller.appspot.com/bug?extid=cca39e6e84a367a7e6f6 Link: https://lore.kernel.org/bpf/20240920125625.59465-1-toke@redhat.com --- include/uapi/linux/bpf.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c6cd7c7aeeee..e8241b320c6d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6047,11 +6047,6 @@ enum { BPF_F_MARK_ENFORCE = (1ULL << 6), }; -/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ -enum { - BPF_F_INGRESS = (1ULL << 0), -}; - /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ enum { BPF_F_TUNINFO_IPV6 = (1ULL << 0), @@ -6198,10 +6193,12 @@ enum { BPF_F_BPRM_SECUREEXEC = (1ULL << 0), }; -/* Flags for bpf_redirect_map helper */ +/* Flags for bpf_redirect and bpf_redirect_map helpers */ enum { - BPF_F_BROADCAST = (1ULL << 3), - BPF_F_EXCLUDE_INGRESS = (1ULL << 4), + BPF_F_INGRESS = (1ULL << 0), /* used for skb path */ + BPF_F_BROADCAST = (1ULL << 3), /* used for XDP path */ + BPF_F_EXCLUDE_INGRESS = (1ULL << 4), /* used for XDP path */ +#define BPF_F_REDIRECT_FLAGS (BPF_F_INGRESS | BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) }; #define __bpf_md_ptr(type, name) \ -- cgit v1.2.3 From a848c29e3486189aaabd5663bc11aea50c5bd144 Mon Sep 17 00:00:00 2001 From: Yanjun Zhang Date: Tue, 1 Oct 2024 16:39:30 +0800 Subject: NFSv4: Prevent NULL-pointer dereference in nfs42_complete_copies() On the node of an NFS client, some files saved in the mountpoint of the NFS server were copied to another location of the same NFS server. Accidentally, the nfs42_complete_copies() got a NULL-pointer dereference crash with the following syslog: [232064.838881] NFSv4: state recovery failed for open file nfs/pvc-12b5200d-cd0f-46a3-b9f0-af8f4fe0ef64.qcow2, error = -116 [232064.839360] NFSv4: state recovery failed for open file nfs/pvc-12b5200d-cd0f-46a3-b9f0-af8f4fe0ef64.qcow2, error = -116 [232066.588183] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058 [232066.588586] Mem abort info: [232066.588701] ESR = 0x0000000096000007 [232066.588862] EC = 0x25: DABT (current EL), IL = 32 bits [232066.589084] SET = 0, FnV = 0 [232066.589216] EA = 0, S1PTW = 0 [232066.589340] FSC = 0x07: level 3 translation fault [232066.589559] Data abort info: [232066.589683] ISV = 0, ISS = 0x00000007 [232066.589842] CM = 0, WnR = 0 [232066.589967] user pgtable: 64k pages, 48-bit VAs, pgdp=00002000956ff400 [232066.590231] [0000000000000058] pgd=08001100ae100003, p4d=08001100ae100003, pud=08001100ae100003, pmd=08001100b3c00003, pte=0000000000000000 [232066.590757] Internal error: Oops: 96000007 [#1] SMP [232066.590958] Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm vhost_net vhost vhost_iotlb tap tun ipt_rpfilter xt_multiport ip_set_hash_ip ip_set_hash_net xfrm_interface xfrm6_tunnel tunnel4 tunnel6 esp4 ah4 wireguard libcurve25519_generic veth xt_addrtype xt_set nf_conntrack_netlink ip_set_hash_ipportnet ip_set_hash_ipportip ip_set_bitmap_port ip_set_hash_ipport dummy ip_set ip_vs_sh ip_vs_wrr ip_vs_rr ip_vs iptable_filter sch_ingress nfnetlink_cttimeout vport_gre ip_gre ip_tunnel gre vport_geneve geneve vport_vxlan vxlan ip6_udp_tunnel udp_tunnel openvswitch nf_conncount dm_round_robin dm_service_time dm_multipath xt_nat xt_MASQUERADE nft_chain_nat nf_nat xt_mark xt_conntrack xt_comment nft_compat nft_counter nf_tables nfnetlink ocfs2 ocfs2_nodemanager ocfs2_stackglue iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ipmi_ssif nbd overlay 8021q garp mrp bonding tls rfkill sunrpc ext4 mbcache jbd2 [232066.591052] vfat fat cas_cache cas_disk ses enclosure scsi_transport_sas sg acpi_ipmi ipmi_si ipmi_devintf ipmi_msghandler ip_tables vfio_pci vfio_pci_core vfio_virqfd vfio_iommu_type1 vfio dm_mirror dm_region_hash dm_log dm_mod nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter bridge stp llc fuse xfs libcrc32c ast drm_vram_helper qla2xxx drm_kms_helper syscopyarea crct10dif_ce sysfillrect ghash_ce sysimgblt sha2_ce fb_sys_fops cec sha256_arm64 sha1_ce drm_ttm_helper ttm nvme_fc igb sbsa_gwdt nvme_fabrics drm nvme_core i2c_algo_bit i40e scsi_transport_fc megaraid_sas aes_neon_bs [232066.596953] CPU: 6 PID: 4124696 Comm: 10.253.166.125- Kdump: loaded Not tainted 5.15.131-9.cl9_ocfs2.aarch64 #1 [232066.597356] Hardware name: Great Wall .\x93\x8e...RF6260 V5/GWMSSE2GL1T, BIOS T656FBE_V3.0.18 2024-01-06 [232066.597721] pstate: 20400009 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [232066.598034] pc : nfs4_reclaim_open_state+0x220/0x800 [nfsv4] [232066.598327] lr : nfs4_reclaim_open_state+0x12c/0x800 [nfsv4] [232066.598595] sp : ffff8000f568fc70 [232066.598731] x29: ffff8000f568fc70 x28: 0000000000001000 x27: ffff21003db33000 [232066.599030] x26: ffff800005521ae0 x25: ffff0100f98fa3f0 x24: 0000000000000001 [232066.599319] x23: ffff800009920008 x22: ffff21003db33040 x21: ffff21003db33050 [232066.599628] x20: ffff410172fe9e40 x19: ffff410172fe9e00 x18: 0000000000000000 [232066.599914] x17: 0000000000000000 x16: 0000000000000004 x15: 0000000000000000 [232066.600195] x14: 0000000000000000 x13: ffff800008e685a8 x12: 00000000eac0c6e6 [232066.600498] x11: 0000000000000000 x10: 0000000000000008 x9 : ffff8000054e5828 [232066.600784] x8 : 00000000ffffffbf x7 : 0000000000000001 x6 : 000000000a9eb14a [232066.601062] x5 : 0000000000000000 x4 : ffff70ff8a14a800 x3 : 0000000000000058 [232066.601348] x2 : 0000000000000001 x1 : 54dce46366daa6c6 x0 : 0000000000000000 [232066.601636] Call trace: [232066.601749] nfs4_reclaim_open_state+0x220/0x800 [nfsv4] [232066.601998] nfs4_do_reclaim+0x1b8/0x28c [nfsv4] [232066.602218] nfs4_state_manager+0x928/0x10f0 [nfsv4] [232066.602455] nfs4_run_state_manager+0x78/0x1b0 [nfsv4] [232066.602690] kthread+0x110/0x114 [232066.602830] ret_from_fork+0x10/0x20 [232066.602985] Code: 1400000d f9403f20 f9402e61 91016003 (f9402c00) [232066.603284] SMP: stopping secondary CPUs [232066.606936] Starting crashdump kernel... [232066.607146] Bye! Analysing the vmcore, we know that nfs4_copy_state listed by destination nfs_server->ss_copies was added by the field copies in handle_async_copy(), and we found a waiting copy process with the stack as: PID: 3511963 TASK: ffff710028b47e00 CPU: 0 COMMAND: "cp" #0 [ffff8001116ef740] __switch_to at ffff8000081b92f4 #1 [ffff8001116ef760] __schedule at ffff800008dd0650 #2 [ffff8001116ef7c0] schedule at ffff800008dd0a00 #3 [ffff8001116ef7e0] schedule_timeout at ffff800008dd6aa0 #4 [ffff8001116ef860] __wait_for_common at ffff800008dd166c #5 [ffff8001116ef8e0] wait_for_completion_interruptible at ffff800008dd1898 #6 [ffff8001116ef8f0] handle_async_copy at ffff8000055142f4 [nfsv4] #7 [ffff8001116ef970] _nfs42_proc_copy at ffff8000055147c8 [nfsv4] #8 [ffff8001116efa80] nfs42_proc_copy at ffff800005514cf0 [nfsv4] #9 [ffff8001116efc50] __nfs4_copy_file_range.constprop.0 at ffff8000054ed694 [nfsv4] The NULL-pointer dereference was due to nfs42_complete_copies() listed the nfs_server->ss_copies by the field ss_copies of nfs4_copy_state. So the nfs4_copy_state address ffff0100f98fa3f0 was offset by 0x10 and the data accessed through this pointer was also incorrect. Generally, the ordered list nfs4_state_owner->so_states indicate open(O_RDWR) or open(O_WRITE) states are reclaimed firstly by nfs4_reclaim_open_state(). When destination state reclaim is failed with NFS_STATE_RECOVERY_FAILED and copies are not deleted in nfs_server->ss_copies, the source state may be passed to the nfs42_complete_copies() process earlier, resulting in this crash scene finally. To solve this issue, we add a list_head nfs_server->ss_src_copies for a server-to-server copy specially. Fixes: 0e65a32c8a56 ("NFS: handle source server reboot") Signed-off-by: Yanjun Zhang Reviewed-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 853df3fcd4c2..b804346a9741 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -249,6 +249,7 @@ struct nfs_server { struct list_head layouts; struct list_head delegations; struct list_head ss_copies; + struct list_head ss_src_copies; unsigned long delegation_gen; unsigned long mig_gen; -- cgit v1.2.3 From 65f2a5c366353da6fa724c68347e1de954928143 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 3 Oct 2024 15:34:58 -0400 Subject: nfs_common: fix race in NFS calls to nfsd_file_put_local() and nfsd_serv_put() Add nfs_to_nfsd_file_put_local() interface to fix race with nfsd module unload. Similarly, use RCU around nfs_open_local_fh()'s error path call to nfs_to->nfsd_serv_put(). Holding RCU ensures that NFS will safely _call and return_ from its nfs_to calls into the NFSD functions nfsd_file_put_local() and nfsd_serv_put(). Otherwise, if RCU isn't used then there is a narrow window when NFS's reference for the nfsd_file and nfsd_serv are dropped and the NFSD module could be unloaded, which could result in a crash from the return instruction for either nfs_to->nfsd_file_put_local() or nfs_to->nfsd_serv_put(). Reported-by: NeilBrown Signed-off-by: Mike Snitzer Signed-off-by: Anna Schumaker --- include/linux/nfslocalio.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index b353abe00357..b0dd9b1eef4f 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -65,10 +65,25 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *, struct rpc_clnt *, const struct cred *, const struct nfs_fh *, const fmode_t); +static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +{ + /* + * Once reference to nfsd_serv is dropped, NFSD could be + * unloaded, so ensure safe return from nfsd_file_put_local() + * by always taking RCU. + */ + rcu_read_lock(); + nfs_to->nfsd_file_put_local(localio); + rcu_read_unlock(); +} + #else /* CONFIG_NFS_LOCALIO */ static inline void nfsd_localio_ops_init(void) { } +static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +{ +} #endif /* CONFIG_NFS_LOCALIO */ #endif /* __LINUX_NFSLOCALIO_H */ -- cgit v1.2.3 From 1dae9f1187189bc09ff6d25ca97ead711f7e26f9 Mon Sep 17 00:00:00 2001 From: Anastasia Kovaleva Date: Thu, 3 Oct 2024 13:44:31 +0300 Subject: net: Fix an unsafe loop on the list The kernel may crash when deleting a genetlink family if there are still listeners for that family: Oops: Kernel access of bad area, sig: 11 [#1] ... NIP [c000000000c080bc] netlink_update_socket_mc+0x3c/0xc0 LR [c000000000c0f764] __netlink_clear_multicast_users+0x74/0xc0 Call Trace: __netlink_clear_multicast_users+0x74/0xc0 genl_unregister_family+0xd4/0x2d0 Change the unsafe loop on the list to a safe one, because inside the loop there is an element removal from this list. Fixes: b8273570f802 ("genetlink: fix netns vs. netlink table locking (2)") Cc: stable@vger.kernel.org Signed-off-by: Anastasia Kovaleva Reviewed-by: Dmitry Bogdanov Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241003104431.12391-1-a.kovaleva@yadro.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index c58ca8dd561b..db29c39e19a7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -894,6 +894,8 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_for_each_entry_safe(__sk, tmp, list, sk_node) #define sk_for_each_bound(__sk, list) \ hlist_for_each_entry(__sk, list, sk_bind_node) +#define sk_for_each_bound_safe(__sk, tmp, list) \ + hlist_for_each_entry_safe(__sk, tmp, list, sk_bind_node) /** * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset -- cgit v1.2.3 From 1442ee0011983f0c5c4b92380e6853afb513841a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 2 Oct 2024 21:49:59 +0100 Subject: irqchip/gic-v4: Don't allow a VMOVP on a dying VPE Kunkun Jiang reported that there is a small window of opportunity for userspace to force a change of affinity for a VPE while the VPE has already been unmapped, but the corresponding doorbell interrupt still visible in /proc/irq/. Plug the race by checking the value of vmapp_count, which tracks whether the VPE is mapped ot not, and returning an error in this case. This involves making vmapp_count common to both GICv4.1 and its v4.0 ancestor. Fixes: 64edfaa9a234 ("irqchip/gic-v4.1: Implement the v4.1 flavour of VMAPP") Reported-by: Kunkun Jiang Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/c182ece6-2ba0-ce4f-3404-dba7a3ab6c52@huawei.com Link: https://lore.kernel.org/all/20241002204959.2051709-1-maz@kernel.org --- include/linux/irqchip/arm-gic-v4.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index ecabed6d3307..7f1f11a5e4e4 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -66,10 +66,12 @@ struct its_vpe { bool enabled; bool group; } sgi_config[16]; - atomic_t vmapp_count; }; }; + /* Track the VPE being mapped */ + atomic_t vmapp_count; + /* * Ensures mutual exclusion between affinity setting of the * vPE and vLPI operations using vpe->col_idx. -- cgit v1.2.3 From 3cb7cf1540ddff5473d6baeb530228d19bc97b8a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Oct 2024 18:41:30 +0000 Subject: net/sched: accept TCA_STAB only for root qdisc Most qdiscs maintain their backlog using qdisc_pkt_len(skb) on the assumption it is invariant between the enqueue() and dequeue() handlers. Unfortunately syzbot can crash a host rather easily using a TBF + SFQ combination, with an STAB on SFQ [1] We can't support TCA_STAB on arbitrary level, this would require to maintain per-qdisc storage. [1] [ 88.796496] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 88.798611] #PF: supervisor read access in kernel mode [ 88.799014] #PF: error_code(0x0000) - not-present page [ 88.799506] PGD 0 P4D 0 [ 88.799829] Oops: Oops: 0000 [#1] SMP NOPTI [ 88.800569] CPU: 14 UID: 0 PID: 2053 Comm: b371744477 Not tainted 6.12.0-rc1-virtme #1117 [ 88.801107] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 88.801779] RIP: 0010:sfq_dequeue (net/sched/sch_sfq.c:272 net/sched/sch_sfq.c:499) sch_sfq [ 88.802544] Code: 0f b7 50 12 48 8d 04 d5 00 00 00 00 48 89 d6 48 29 d0 48 8b 91 c0 01 00 00 48 c1 e0 03 48 01 c2 66 83 7a 1a 00 7e c0 48 8b 3a <4c> 8b 07 4c 89 02 49 89 50 08 48 c7 47 08 00 00 00 00 48 c7 07 00 All code ======== 0: 0f b7 50 12 movzwl 0x12(%rax),%edx 4: 48 8d 04 d5 00 00 00 lea 0x0(,%rdx,8),%rax b: 00 c: 48 89 d6 mov %rdx,%rsi f: 48 29 d0 sub %rdx,%rax 12: 48 8b 91 c0 01 00 00 mov 0x1c0(%rcx),%rdx 19: 48 c1 e0 03 shl $0x3,%rax 1d: 48 01 c2 add %rax,%rdx 20: 66 83 7a 1a 00 cmpw $0x0,0x1a(%rdx) 25: 7e c0 jle 0xffffffffffffffe7 27: 48 8b 3a mov (%rdx),%rdi 2a:* 4c 8b 07 mov (%rdi),%r8 <-- trapping instruction 2d: 4c 89 02 mov %r8,(%rdx) 30: 49 89 50 08 mov %rdx,0x8(%r8) 34: 48 c7 47 08 00 00 00 movq $0x0,0x8(%rdi) 3b: 00 3c: 48 rex.W 3d: c7 .byte 0xc7 3e: 07 (bad) ... Code starting with the faulting instruction =========================================== 0: 4c 8b 07 mov (%rdi),%r8 3: 4c 89 02 mov %r8,(%rdx) 6: 49 89 50 08 mov %rdx,0x8(%r8) a: 48 c7 47 08 00 00 00 movq $0x0,0x8(%rdi) 11: 00 12: 48 rex.W 13: c7 .byte 0xc7 14: 07 (bad) ... [ 88.803721] RSP: 0018:ffff9a1f892b7d58 EFLAGS: 00000206 [ 88.804032] RAX: 0000000000000000 RBX: ffff9a1f8420c800 RCX: ffff9a1f8420c800 [ 88.804560] RDX: ffff9a1f81bc1440 RSI: 0000000000000000 RDI: 0000000000000000 [ 88.805056] RBP: ffffffffc04bb0e0 R08: 0000000000000001 R09: 00000000ff7f9a1f [ 88.805473] R10: 000000000001001b R11: 0000000000009a1f R12: 0000000000000140 [ 88.806194] R13: 0000000000000001 R14: ffff9a1f886df400 R15: ffff9a1f886df4ac [ 88.806734] FS: 00007f445601a740(0000) GS:ffff9a2e7fd80000(0000) knlGS:0000000000000000 [ 88.807225] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 88.807672] CR2: 0000000000000000 CR3: 000000050cc46000 CR4: 00000000000006f0 [ 88.808165] Call Trace: [ 88.808459] [ 88.808710] ? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434) [ 88.809261] ? page_fault_oops (arch/x86/mm/fault.c:715) [ 88.809561] ? exc_page_fault (./arch/x86/include/asm/irqflags.h:26 ./arch/x86/include/asm/irqflags.h:87 ./arch/x86/include/asm/irqflags.h:147 arch/x86/mm/fault.c:1489 arch/x86/mm/fault.c:1539) [ 88.809806] ? asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:623) [ 88.810074] ? sfq_dequeue (net/sched/sch_sfq.c:272 net/sched/sch_sfq.c:499) sch_sfq [ 88.810411] sfq_reset (net/sched/sch_sfq.c:525) sch_sfq [ 88.810671] qdisc_reset (./include/linux/skbuff.h:2135 ./include/linux/skbuff.h:2441 ./include/linux/skbuff.h:3304 ./include/linux/skbuff.h:3310 net/sched/sch_generic.c:1036) [ 88.810950] tbf_reset (./include/linux/timekeeping.h:169 net/sched/sch_tbf.c:334) sch_tbf [ 88.811208] qdisc_reset (./include/linux/skbuff.h:2135 ./include/linux/skbuff.h:2441 ./include/linux/skbuff.h:3304 ./include/linux/skbuff.h:3310 net/sched/sch_generic.c:1036) [ 88.811484] netif_set_real_num_tx_queues (./include/linux/spinlock.h:396 ./include/net/sch_generic.h:768 net/core/dev.c:2958) [ 88.811870] __tun_detach (drivers/net/tun.c:590 drivers/net/tun.c:673) [ 88.812271] tun_chr_close (drivers/net/tun.c:702 drivers/net/tun.c:3517) [ 88.812505] __fput (fs/file_table.c:432 (discriminator 1)) [ 88.812735] task_work_run (kernel/task_work.c:230) [ 88.813016] do_exit (kernel/exit.c:940) [ 88.813372] ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:58 (discriminator 4)) [ 88.813639] ? handle_mm_fault (./arch/x86/include/asm/irqflags.h:42 ./arch/x86/include/asm/irqflags.h:97 ./arch/x86/include/asm/irqflags.h:155 ./include/linux/memcontrol.h:1022 ./include/linux/memcontrol.h:1045 ./include/linux/memcontrol.h:1052 mm/memory.c:5928 mm/memory.c:6088) [ 88.813867] do_group_exit (kernel/exit.c:1070) [ 88.814138] __x64_sys_exit_group (kernel/exit.c:1099) [ 88.814490] x64_sys_call (??:?) [ 88.814791] do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1)) [ 88.815012] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) [ 88.815495] RIP: 0033:0x7f44560f1975 Fixes: 175f9c1bba9b ("net_sched: Add size table for qdiscs") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Daniel Borkmann Link: https://patch.msgid.link/20241007184130.3960565-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 79edd5b5e3c9..5d74fa7e694c 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -848,7 +848,6 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb, static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - qdisc_calculate_pkt_len(skb, sch); return sch->enqueue(skb, sch, to_free); } -- cgit v1.2.3 From 9897713fe1077c90b4a86c9af0a878d56c8888a2 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 26 Sep 2024 19:11:50 +0200 Subject: bcachefs: do not use PF_MEMALLOC_NORECLAIM Patch series "remove PF_MEMALLOC_NORECLAIM" v3. This patch (of 2): bch2_new_inode relies on PF_MEMALLOC_NORECLAIM to try to allocate a new inode to achieve GFP_NOWAIT semantic while holding locks. If this allocation fails it will drop locks and use GFP_NOFS allocation context. We would like to drop PF_MEMALLOC_NORECLAIM because it is really dangerous to use if the caller doesn't control the full call chain with this flag set. E.g. if any of the function down the chain needed GFP_NOFAIL request the PF_MEMALLOC_NORECLAIM would override this and cause unexpected failure. While this is not the case in this particular case using the scoped gfp semantic is not really needed bacause we can easily pus the allocation context down the chain without too much clutter. [akpm@linux-foundation.org: fix kerneldoc warnings] Link: https://lkml.kernel.org/r/20240926172940.167084-1-mhocko@kernel.org Link: https://lkml.kernel.org/r/20240926172940.167084-2-mhocko@kernel.org Signed-off-by: Michal Hocko Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Jan Kara # For vfs changes Cc: Al Viro Cc: Christian Brauner Cc: James Morris Cc: Kent Overstreet Cc: Paul Moore Cc: Serge E. Hallyn Cc: Yafang Shao Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/fs.h | 7 ++++++- include/linux/security.h | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index e3c603d01337..3559446279c1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3082,7 +3082,12 @@ extern loff_t default_llseek(struct file *file, loff_t offset, int whence); extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence); -extern int inode_init_always(struct super_block *, struct inode *); +extern int inode_init_always_gfp(struct super_block *, struct inode *, gfp_t); +static inline int inode_init_always(struct super_block *sb, struct inode *inode) +{ + return inode_init_always_gfp(sb, inode, GFP_NOFS); +} + extern void inode_init_once(struct inode *); extern void address_space_init_once(struct address_space *mapping); extern struct inode * igrab(struct inode *); diff --git a/include/linux/security.h b/include/linux/security.h index b86ec2afc691..2ec8f3014757 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -348,7 +348,7 @@ int security_dentry_create_files_as(struct dentry *dentry, int mode, struct cred *new); int security_path_notify(const struct path *path, u64 mask, unsigned int obj_type); -int security_inode_alloc(struct inode *inode); +int security_inode_alloc(struct inode *inode, gfp_t gfp); void security_inode_free(struct inode *inode); int security_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, @@ -789,7 +789,7 @@ static inline int security_path_notify(const struct path *path, u64 mask, return 0; } -static inline int security_inode_alloc(struct inode *inode) +static inline int security_inode_alloc(struct inode *inode, gfp_t gfp) { return 0; } -- cgit v1.2.3 From 9a8da05d7ad619beb84d0c6904c3fa7022c6fb9b Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 26 Sep 2024 19:11:51 +0200 Subject: Revert "mm: introduce PF_MEMALLOC_NORECLAIM, PF_MEMALLOC_NOWARN" This reverts commit eab0af905bfc3e9c05da2ca163d76a1513159aa4. There is no existing user of those flags. PF_MEMALLOC_NOWARN is dangerous because a nested allocation context can use GFP_NOFAIL which could cause unexpected failure. Such a code would be hard to maintain because it could be deeper in the call chain. PF_MEMALLOC_NORECLAIM has been added even when it was pointed out [1] that such a allocation contex is inherently unsafe if the context doesn't fully control all allocations called from this context. While PF_MEMALLOC_NOWARN is not dangerous the way PF_MEMALLOC_NORECLAIM is it doesn't have any user and as Matthew has pointed out we are running out of those flags so better reclaim it without any real users. [1] https://lore.kernel.org/all/ZcM0xtlKbAOFjv5n@tiehlicka/ Link: https://lkml.kernel.org/r/20240926172940.167084-3-mhocko@kernel.org Signed-off-by: Michal Hocko Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Vlastimil Babka Cc: Al Viro Cc: Christian Brauner Cc: James Morris Cc: Jan Kara Cc: Kent Overstreet Cc: Paul Moore Cc: Serge E. Hallyn Cc: Yafang Shao Signed-off-by: Andrew Morton --- include/linux/sched.h | 4 ++-- include/linux/sched/mm.h | 17 ++++------------- 2 files changed, 6 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index e6ee4258169a..449dd64ed9ac 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1681,8 +1681,8 @@ extern struct pid *cad_pid; * I am cleaning dirty pages from some other bdi. */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ -#define PF_MEMALLOC_NORECLAIM 0x00800000 /* All allocation requests will clear __GFP_DIRECT_RECLAIM */ -#define PF_MEMALLOC_NOWARN 0x01000000 /* All allocation requests will inherit __GFP_NOWARN */ +#define PF__HOLE__00800000 0x00800000 +#define PF__HOLE__01000000 0x01000000 #define PF__HOLE__02000000 0x02000000 #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 07bb8d4181d7..928a626725e6 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -251,25 +251,16 @@ static inline gfp_t current_gfp_context(gfp_t flags) { unsigned int pflags = READ_ONCE(current->flags); - if (unlikely(pflags & (PF_MEMALLOC_NOIO | - PF_MEMALLOC_NOFS | - PF_MEMALLOC_NORECLAIM | - PF_MEMALLOC_NOWARN | - PF_MEMALLOC_PIN))) { + if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) { /* - * Stronger flags before weaker flags: - * NORECLAIM implies NOIO, which in turn implies NOFS + * NOIO implies both NOIO and NOFS and it is a weaker context + * so always make sure it makes precedence */ - if (pflags & PF_MEMALLOC_NORECLAIM) - flags &= ~__GFP_DIRECT_RECLAIM; - else if (pflags & PF_MEMALLOC_NOIO) + if (pflags & PF_MEMALLOC_NOIO) flags &= ~(__GFP_IO | __GFP_FS); else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; - if (pflags & PF_MEMALLOC_NOWARN) - flags |= __GFP_NOWARN; - if (pflags & PF_MEMALLOC_PIN) flags &= ~__GFP_MOVABLE; } -- cgit v1.2.3 From 04b670de2859a8a8b0830779f9c9bda5d39662ab Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 7 Oct 2024 16:54:11 -0400 Subject: closures: Add closure_wait_event_timeout() Add a closure version of wait_event_timeout(), with the same semantics. The closure version is useful because unlike wait_event(), it allows blocking code to run in the conditional expression. Cc: Coly Li Signed-off-by: Kent Overstreet --- include/linux/closure.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include') diff --git a/include/linux/closure.h b/include/linux/closure.h index 2af44427107d..880fe85e35e9 100644 --- a/include/linux/closure.h +++ b/include/linux/closure.h @@ -454,4 +454,39 @@ do { \ __closure_wait_event(waitlist, _cond); \ } while (0) +#define __closure_wait_event_timeout(waitlist, _cond, _until) \ +({ \ + struct closure cl; \ + long _t; \ + \ + closure_init_stack(&cl); \ + \ + while (1) { \ + closure_wait(waitlist, &cl); \ + if (_cond) { \ + _t = max_t(long, 1L, _until - jiffies); \ + break; \ + } \ + _t = max_t(long, 0L, _until - jiffies); \ + if (!_t) \ + break; \ + closure_sync_timeout(&cl, _t); \ + } \ + closure_wake_up(waitlist); \ + closure_sync(&cl); \ + _t; \ +}) + +/* + * Returns 0 if timeout expired, remaining time in jiffies (at least 1) if + * condition became true + */ +#define closure_wait_event_timeout(waitlist, _cond, _timeout) \ +({ \ + unsigned long _until = jiffies + _timeout; \ + (_cond) \ + ? max_t(long, 1L, _until - jiffies) \ + : __closure_wait_event_timeout(waitlist, _cond, _until);\ +}) + #endif /* _LINUX_CLOSURE_H */ -- cgit v1.2.3 From 07cc7b0b942bf55ef1a471470ecda8d2a6a6541f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 8 Oct 2024 11:47:32 -0700 Subject: rtnetlink: Add bulk registration helpers for rtnetlink message handlers. Before commit addf9b90de22 ("net: rtnetlink: use rcu to free rtnl message handlers"), once rtnl_msg_handlers[protocol] was allocated, the following rtnl_register_module() for the same protocol never failed. However, after the commit, rtnl_msg_handler[protocol][msgtype] needs to be allocated in each rtnl_register_module(), so each call could fail. Many callers of rtnl_register_module() do not handle the returned error, and we need to add many error handlings. To handle that easily, let's add wrapper functions for bulk registration of rtnetlink message handlers. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/rtnetlink.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index b45d57b5968a..2d3eb7cb4dff 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -29,6 +29,15 @@ static inline enum rtnl_kinds rtnl_msgtype_kind(int msgtype) return msgtype & RTNL_KIND_MASK; } +struct rtnl_msg_handler { + struct module *owner; + int protocol; + int msgtype; + rtnl_doit_func doit; + rtnl_dumpit_func dumpit; + int flags; +}; + void rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_register_module(struct module *owner, int protocol, int msgtype, @@ -36,6 +45,14 @@ int rtnl_register_module(struct module *owner, int protocol, int msgtype, int rtnl_unregister(int protocol, int msgtype); void rtnl_unregister_all(int protocol); +int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n); +void __rtnl_unregister_many(const struct rtnl_msg_handler *handlers, int n); + +#define rtnl_register_many(handlers) \ + __rtnl_register_many(handlers, ARRAY_SIZE(handlers)) +#define rtnl_unregister_many(handlers) \ + __rtnl_unregister_many(handlers, ARRAY_SIZE(handlers)) + static inline int rtnl_msg_family(const struct nlmsghdr *nlh) { if (nlmsg_len(nlh) >= sizeof(struct rtgenmsg)) -- cgit v1.2.3 From d51705614f668254cc5def7490df76f9680b4659 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 8 Oct 2024 11:47:35 -0700 Subject: mctp: Handle error of rtnl_register_module(). Since introduced, mctp has been ignoring the returned value of rtnl_register_module(), which could fail silently. Handling the error allows users to view a module as an all-or-nothing thing in terms of the rtnetlink functionality. This prevents syzkaller from reporting spurious errors from its tests, where OOM often occurs and module is automatically loaded. Let's handle the errors by rtnl_register_many(). Fixes: 583be982d934 ("mctp: Add device handling and netlink interface") Fixes: 831119f88781 ("mctp: Add neighbour netlink interface") Fixes: 06d2f4c583a7 ("mctp: Add netlink route management") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- include/net/mctp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mctp.h b/include/net/mctp.h index 7b17c52e8ce2..28d59ae94ca3 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -295,7 +295,7 @@ void mctp_neigh_remove_dev(struct mctp_dev *mdev); int mctp_routes_init(void); void mctp_routes_exit(void); -void mctp_device_init(void); +int mctp_device_init(void); void mctp_device_exit(void); #endif /* __NET_MCTP_H */ -- cgit v1.2.3 From c657243ae12000dc57e3648b0ddd30da9ffd1f14 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Oct 2024 16:51:07 +0200 Subject: serial: qcom-geni: fix rx cancel dma status bit Cancelling an rx command is signalled using bit 14 of the rx DMA status register and not bit 11. This bit is currently unused, but this error becomes apparent, for example, when tracing the status register when closing the port. Fixes: eddac5af0654 ("soc: qcom: Add GENI based QUP Wrapper driver") Reviewed-by: Douglas Anderson Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20241009145110.16847-7-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/soc/qcom/geni-se.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h index c3bca9c0bf2c..2996a3c28ef3 100644 --- a/include/linux/soc/qcom/geni-se.h +++ b/include/linux/soc/qcom/geni-se.h @@ -258,8 +258,8 @@ struct geni_se { #define RX_DMA_PARITY_ERR BIT(5) #define RX_DMA_BREAK GENMASK(8, 7) #define RX_GENI_GP_IRQ GENMASK(10, 5) -#define RX_GENI_CANCEL_IRQ BIT(11) #define RX_GENI_GP_IRQ_EXT GENMASK(13, 12) +#define RX_GENI_CANCEL_IRQ BIT(14) /* SE_HW_PARAM_0 fields */ #define TX_FIFO_WIDTH_MSK GENMASK(29, 24) -- cgit v1.2.3 From 73ab05aa46b02d96509cb029a8d04fca7bbde8c7 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 9 Oct 2024 21:44:32 -0400 Subject: sched/core: Disable page allocation in task_tick_mm_cid() With KASAN and PREEMPT_RT enabled, calling task_work_add() in task_tick_mm_cid() may cause the following splat. [ 63.696416] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 [ 63.696416] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 610, name: modprobe [ 63.696416] preempt_count: 10001, expected: 0 [ 63.696416] RCU nest depth: 1, expected: 1 This problem is caused by the following call trace. sched_tick() [ acquire rq->__lock ] -> task_tick_mm_cid() -> task_work_add() -> __kasan_record_aux_stack() -> kasan_save_stack() -> stack_depot_save_flags() -> alloc_pages_mpol_noprof() -> __alloc_pages_noprof() -> get_page_from_freelist() -> rmqueue() -> rmqueue_pcplist() -> __rmqueue_pcplist() -> rmqueue_bulk() -> rt_spin_lock() The rq lock is a raw_spinlock_t. We can't sleep while holding it. IOW, we can't call alloc_pages() in stack_depot_save_flags(). The task_tick_mm_cid() function with its task_work_add() call was introduced by commit 223baf9d17f2 ("sched: Fix performance regression introduced by mm_cid") in v6.4 kernel. Fortunately, there is a kasan_record_aux_stack_noalloc() variant that calls stack_depot_save_flags() while not allowing it to allocate new pages. To allow task_tick_mm_cid() to use task_work without page allocation, a new TWAF_NO_ALLOC flag is added to enable calling kasan_record_aux_stack_noalloc() instead of kasan_record_aux_stack() if set. The task_tick_mm_cid() function is modified to add this new flag. The possible downside is the missing stack trace in a KASAN report due to new page allocation required when task_work_add_noallloc() is called which should be rare. Fixes: 223baf9d17f2 ("sched: Fix performance regression introduced by mm_cid") Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20241010014432.194742-1-longman@redhat.com --- include/linux/task_work.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/task_work.h b/include/linux/task_work.h index cf5e7e891a77..2964171856e0 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -14,11 +14,14 @@ init_task_work(struct callback_head *twork, task_work_func_t func) } enum task_work_notify_mode { - TWA_NONE, + TWA_NONE = 0, TWA_RESUME, TWA_SIGNAL, TWA_SIGNAL_NO_IPI, TWA_NMI_CURRENT, + + TWA_FLAGS = 0xff00, + TWAF_NO_ALLOC = 0x0100, }; static inline bool task_work_pending(struct task_struct *task) -- cgit v1.2.3 From 1d7b2ce43d2c22a21dadaf689cb36a69570346a6 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Fri, 11 Oct 2024 11:01:03 +0800 Subject: net: enetc: add missing static descriptor and inline keyword Fix the build warnings when CONFIG_FSL_ENETC_MDIO is not enabled. The detailed warnings are shown as follows. include/linux/fsl/enetc_mdio.h:62:18: warning: no previous prototype for function 'enetc_hw_alloc' [-Wmissing-prototypes] 62 | struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs) | ^ include/linux/fsl/enetc_mdio.h:62:1: note: declare 'static' if the function is not intended to be used outside of this translation unit 62 | struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs) | ^ | static 8 warnings generated. Fixes: 6517798dd343 ("enetc: Make MDIO accessors more generic and export to include/linux/fsl") Cc: stable@vger.kernel.org Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202410102136.jQHZOcS4-lkp@intel.com/ Signed-off-by: Wei Fang Reviewed-by: Claudiu Manoil Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241011030103.392362-1-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- include/linux/fsl/enetc_mdio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fsl/enetc_mdio.h b/include/linux/fsl/enetc_mdio.h index df25fffdc0ae..623ccfcbf39c 100644 --- a/include/linux/fsl/enetc_mdio.h +++ b/include/linux/fsl/enetc_mdio.h @@ -59,7 +59,8 @@ static inline int enetc_mdio_read_c45(struct mii_bus *bus, int phy_id, static inline int enetc_mdio_write_c45(struct mii_bus *bus, int phy_id, int devad, int regnum, u16 value) { return -EINVAL; } -struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs) +static inline struct enetc_hw *enetc_hw_alloc(struct device *dev, + void __iomem *port_regs) { return ERR_PTR(-EINVAL); } #endif -- cgit v1.2.3 From cd9626e9ebc77edec33023fe95dab4b04ffc819d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Oct 2024 11:38:10 +0200 Subject: sched/fair: Fix external p->on_rq users Sean noted that ever since commit 152e11f6df29 ("sched/fair: Implement delayed dequeue") KVM's preemption notifiers have started mis-classifying preemption vs blocking. Notably p->on_rq is no longer sufficient to determine if a task is runnable or blocked -- the aforementioned commit introduces tasks that remain on the runqueue even through they will not run again, and should be considered blocked for many cases. Add the task_is_runnable() helper to classify things and audit all external users of the p->on_rq state. Also add a few comments. Fixes: 152e11f6df29 ("sched/fair: Implement delayed dequeue") Reported-by: Sean Christopherson Tested-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20241010091843.GK33184@noisy.programming.kicks-ass.net --- include/linux/sched.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index e6ee4258169a..8a9517e6640c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2133,6 +2133,11 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #endif /* CONFIG_SMP */ +static inline bool task_is_runnable(struct task_struct *p) +{ + return p->on_rq && !p->se.sched_delayed; +} + extern bool sched_task_on_rq(struct task_struct *p); extern unsigned long get_wchan(struct task_struct *p); extern struct task_struct *cpu_curr_snapshot(int cpu); -- cgit v1.2.3 From 71dce222d5865ccb19b231a84d26ca316a65e255 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 1 Oct 2024 15:06:11 +0800 Subject: ALSA/hda: intel-sdw-acpi: add support for sdw-manager-list property read The DisCo for SoundWire 2.0 spec adds support for a new sdw-manager-list property. Add it in backwards-compatible mode with 'sdw-master-count', which assumed that all links between 0..count-1 exist. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20241001070611.63288-5-yung-chuan.liao@linux.intel.com --- include/linux/soundwire/sdw_intel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 37ae69365fe2..734dc1fa3b5b 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -227,7 +227,7 @@ struct sdw_intel_ops { /** * struct sdw_intel_acpi_info - Soundwire Intel information found in ACPI tables * @handle: ACPI controller handle - * @count: link count found with "sdw-master-count" property + * @count: link count found with "sdw-master-count" or "sdw-manager-list" property * @link_mask: bit-wise mask listing links enabled by BIOS menu * * this structure could be expanded to e.g. provide all the _ADR -- cgit v1.2.3 From c0adf8c3a9bf33f1dd1bf950601380f46a3fcec3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Oct 2024 10:59:12 +0200 Subject: iomap: factor out a iomap_last_written_block helper Split out a pice of logic from iomap_file_buffered_write_punch_delalloc that is useful for all iomap_end implementations. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- include/linux/iomap.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 4ad12a3c8bae..62253739dedc 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -256,6 +256,20 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i) return &i->iomap; } +/* + * Return the file offset for the first unchanged block after a short write. + * + * If nothing was written, round @pos down to point at the first block in + * the range, else round up to include the partially written block. + */ +static inline loff_t iomap_last_written_block(struct inode *inode, loff_t pos, + ssize_t written) +{ + if (unlikely(!written)) + return round_down(pos, i_blocksize(inode)); + return round_up(pos + written, i_blocksize(inode)); +} + ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops, void *private); int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); -- cgit v1.2.3 From caf0ea451d97c33c5bbaa0074dad33b0b2a4e649 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Oct 2024 10:59:13 +0200 Subject: iomap: remove iomap_file_buffered_write_punch_delalloc Currently iomap_file_buffered_write_punch_delalloc can be called from XFS either with the invalidate lock held or not. To fix this while keeping the locking in the file system and not the iomap library code we'll need to life the locking up into the file system. To prepare for that, open code iomap_file_buffered_write_punch_delalloc in the only caller, and instead export iomap_write_delalloc_release. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- include/linux/iomap.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 62253739dedc..d0420e962ffd 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -290,9 +290,9 @@ vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length, struct iomap *iomap); -void iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t pos, - loff_t length, ssize_t written, unsigned flag, - struct iomap *iomap, iomap_punch_t punch); +void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte, + loff_t end_byte, unsigned flags, struct iomap *iomap, + iomap_punch_t punch); int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len, const struct iomap_ops *ops); -- cgit v1.2.3 From eb0c062161cf5f98556a906c48b0cfc019d9e89c Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 16 Sep 2024 15:33:20 +0200 Subject: gpu: host1x: Set up device DMA parameters In order to store device DMA parameters, the DMA framework depends on the device's dma_parms field to point at a valid memory location. Add backing storage for this in struct host1x_memory_context and point to it. Reported-by: Jonathan Hunter Reviewed-by: Christoph Hellwig Tested-by: Jon Hunter Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20240916133320.368620-1-thierry.reding@gmail.com (cherry picked from commit b4ad4ef374d66cc8df3188bb1ddb65bce5fc9e50) Signed-off-by: Thierry Reding --- include/linux/host1x.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 9c8119ed13a4..c4dde3aafcac 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -466,6 +466,7 @@ struct host1x_memory_context { refcount_t ref; struct pid *owner; + struct device_dma_parameters dma_parms; struct device dev; u64 dma_mask; u32 stream_id; -- cgit v1.2.3 From 56440d7ec28d60f8da3bfa09062b3368ff9b16db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Oct 2024 17:12:17 +0000 Subject: genetlink: hold RCU in genlmsg_mcast() While running net selftests with CONFIG_PROVE_RCU_LIST=y I saw one lockdep splat [1]. genlmsg_mcast() uses for_each_net_rcu(), and must therefore hold RCU. Instead of letting all callers guard genlmsg_multicast_allns() with a rcu_read_lock()/rcu_read_unlock() pair, do it in genlmsg_mcast(). This also means the @flags parameter is useless, we need to always use GFP_ATOMIC. [1] [10882.424136] ============================= [10882.424166] WARNING: suspicious RCU usage [10882.424309] 6.12.0-rc2-virtme #1156 Not tainted [10882.424400] ----------------------------- [10882.424423] net/netlink/genetlink.c:1940 RCU-list traversed in non-reader section!! [10882.424469] other info that might help us debug this: [10882.424500] rcu_scheduler_active = 2, debug_locks = 1 [10882.424744] 2 locks held by ip/15677: [10882.424791] #0: ffffffffb6b491b0 (cb_lock){++++}-{3:3}, at: genl_rcv (net/netlink/genetlink.c:1219) [10882.426334] #1: ffffffffb6b49248 (genl_mutex){+.+.}-{3:3}, at: genl_rcv_msg (net/netlink/genetlink.c:61 net/netlink/genetlink.c:57 net/netlink/genetlink.c:1209) [10882.426465] stack backtrace: [10882.426805] CPU: 14 UID: 0 PID: 15677 Comm: ip Not tainted 6.12.0-rc2-virtme #1156 [10882.426919] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [10882.427046] Call Trace: [10882.427131] [10882.427244] dump_stack_lvl (lib/dump_stack.c:123) [10882.427335] lockdep_rcu_suspicious (kernel/locking/lockdep.c:6822) [10882.427387] genlmsg_multicast_allns (net/netlink/genetlink.c:1940 (discriminator 7) net/netlink/genetlink.c:1977 (discriminator 7)) [10882.427436] l2tp_tunnel_notify.constprop.0 (net/l2tp/l2tp_netlink.c:119) l2tp_netlink [10882.427683] l2tp_nl_cmd_tunnel_create (net/l2tp/l2tp_netlink.c:253) l2tp_netlink [10882.427748] genl_family_rcv_msg_doit (net/netlink/genetlink.c:1115) [10882.427834] genl_rcv_msg (net/netlink/genetlink.c:1195 net/netlink/genetlink.c:1210) [10882.427877] ? __pfx_l2tp_nl_cmd_tunnel_create (net/l2tp/l2tp_netlink.c:186) l2tp_netlink [10882.427927] ? __pfx_genl_rcv_msg (net/netlink/genetlink.c:1201) [10882.427959] netlink_rcv_skb (net/netlink/af_netlink.c:2551) [10882.428069] genl_rcv (net/netlink/genetlink.c:1220) [10882.428095] netlink_unicast (net/netlink/af_netlink.c:1332 net/netlink/af_netlink.c:1357) [10882.428140] netlink_sendmsg (net/netlink/af_netlink.c:1901) [10882.428210] ____sys_sendmsg (net/socket.c:729 (discriminator 1) net/socket.c:744 (discriminator 1) net/socket.c:2607 (discriminator 1)) Fixes: 33f72e6f0c67 ("l2tp : multicast notification to the registered listeners") Signed-off-by: Eric Dumazet Cc: James Chapman Cc: Tom Parkin Cc: Johannes Berg Link: https://patch.msgid.link/20241011171217.3166614-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/genetlink.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 9ab49bfeae78..c1d91f1d20f6 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -531,13 +531,12 @@ static inline int genlmsg_multicast(const struct genl_family *family, * @skb: netlink message as socket buffer * @portid: own netlink portid to avoid sending to yourself * @group: offset of multicast group in groups array - * @flags: allocation flags * * This function must hold the RTNL or rcu_read_lock(). */ int genlmsg_multicast_allns(const struct genl_family *family, struct sk_buff *skb, u32 portid, - unsigned int group, gfp_t flags); + unsigned int group); /** * genlmsg_unicast - unicast a netlink message -- cgit v1.2.3 From 42aafd8b48adac1c3b20fe5892b1b91b80c1a1e6 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Oct 2024 21:48:47 +0800 Subject: ublk: don't allow user copy for unprivileged device UBLK_F_USER_COPY requires userspace to call write() on ublk char device for filling request buffer, and unprivileged device can't be trusted. So don't allow user copy for unprivileged device. Cc: stable@vger.kernel.org Fixes: 1172d5b8beca ("ublk: support user copy") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20241016134847.2911721-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index c8dc5f8ea699..12873639ea96 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -175,7 +175,13 @@ /* use ioctl encoding for uring command */ #define UBLK_F_CMD_IOCTL_ENCODE (1UL << 6) -/* Copy between request and user buffer by pread()/pwrite() */ +/* + * Copy between request and user buffer by pread()/pwrite() + * + * Not available for UBLK_F_UNPRIVILEGED_DEV, otherwise userspace may + * deceive us by not filling request buffer, then kernel uninitialized + * data may be leaked. + */ #define UBLK_F_USER_COPY (1UL << 7) /* -- cgit v1.2.3 From 8f3ce3d996bf1e2f8474ec3ddabdb8765c19e6ea Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 7 Oct 2024 16:30:49 +0200 Subject: mm: percpu: increase PERCPU_DYNAMIC_SIZE_SHIFT on certain builds. Arnd reported a build failure due to the BUILD_BUG_ON() statement in alloc_kmem_cache_cpus(). The test PERCPU_DYNAMIC_EARLY_SIZE < NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu) The factors that increase the right side of the equation: - PAGE_SIZE > 4KiB increases KMALLOC_SHIFT_HIGH - For the local_lock_t in kmem_cache_cpu: - PREEMPT_RT adds an actual lock. - LOCKDEP increases the size of the lock. - LOCK_STAT adds additional bytes plus padding to the lockdep structure. The net difference with and without PREEMPT_RT is 88 bytes for the lock_lock_t, 96 bytes for kmem_cache_cpu due to additional padding. This is enough to exceed the 80KiB limit with 16KiB page size - the 8KiB page size is fine. Increase PERCPU_DYNAMIC_SIZE_SHIFT to 13 on configs with PAGE_SIZE larger than 4KiB and LOCKDEP enabled. Link: https://lkml.kernel.org/r/20241007143049.gyMpEu89@linutronix.de Fixes: d8fccd9ca5f9 ("arm64: Allow to enable PREEMPT_RT.") Signed-off-by: Sebastian Andrzej Siewior Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202410020326.iaZIteIx-lkp@intel.com/ Reported-by: Arnd Bergmann Closes: https://lore.kernel.org/20241004095702.637528-1-arnd@kernel.org Acked-by: Arnd Bergmann Acked-by: Vlastimil Babka Acked-by: David Rientjes Cc: Christoph Lameter Cc: Dennis Zhou Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Joonsoo Kim Cc: Pekka Enberg Cc: Roman Gushchin Cc: Tejun Heo Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/percpu.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index b6321fc49159..52b5ea663b9f 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -41,7 +41,11 @@ PCPU_MIN_ALLOC_SHIFT) #ifdef CONFIG_RANDOM_KMALLOC_CACHES -#define PERCPU_DYNAMIC_SIZE_SHIFT 12 +# if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PAGE_SIZE_4KB) +# define PERCPU_DYNAMIC_SIZE_SHIFT 13 +# else +# define PERCPU_DYNAMIC_SIZE_SHIFT 12 +#endif /* LOCKDEP and PAGE_SIZE > 4KiB */ #else #define PERCPU_DYNAMIC_SIZE_SHIFT 10 #endif -- cgit v1.2.3 From 37f0b47c5143c2957909ced44fc09ffb118c99f7 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 11 Oct 2024 18:17:02 -0700 Subject: mm: khugepaged: fix the arguments order in khugepaged_collapse_file trace point The "addr" and "is_shmem" arguments have different order in TP_PROTO and TP_ARGS. This resulted in the incorrect trace result: text-hugepage-644429 [276] 392092.878683: mm_khugepaged_collapse_file: mm=0xffff20025d52c440, hpage_pfn=0x200678c00, index=512, addr=1, is_shmem=0, filename=text-hugepage, nr=512, result=failed The value of "addr" is wrong because it was treated as bool value, the type of is_shmem. Fix the order in TP_PROTO to keep "addr" is before "is_shmem" since the original patch review suggested this order to achieve best packing. And use "lx" for "addr" instead of "ld" in TP_printk because address is typically shown in hex. After the fix, the trace result looks correct: text-hugepage-7291 [004] 128.627251: mm_khugepaged_collapse_file: mm=0xffff0001328f9500, hpage_pfn=0x20016ea00, index=512, addr=0x400000, is_shmem=0, filename=text-hugepage, nr=512, result=failed Link: https://lkml.kernel.org/r/20241012011702.1084846-1-yang@os.amperecomputing.com Fixes: 4c9473e87e75 ("mm/khugepaged: add tracepoint to collapse_file()") Signed-off-by: Yang Shi Cc: Gautam Menghani Cc: Steven Rostedt (Google) Cc: [6.2+] Signed-off-by: Andrew Morton --- include/trace/events/huge_memory.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index b5f5369b6300..9d5c00b0285c 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -208,7 +208,7 @@ TRACE_EVENT(mm_khugepaged_scan_file, TRACE_EVENT(mm_khugepaged_collapse_file, TP_PROTO(struct mm_struct *mm, struct folio *new_folio, pgoff_t index, - bool is_shmem, unsigned long addr, struct file *file, + unsigned long addr, bool is_shmem, struct file *file, int nr, int result), TP_ARGS(mm, new_folio, index, addr, is_shmem, file, nr, result), TP_STRUCT__entry( @@ -233,7 +233,7 @@ TRACE_EVENT(mm_khugepaged_collapse_file, __entry->result = result; ), - TP_printk("mm=%p, hpage_pfn=0x%lx, index=%ld, addr=%ld, is_shmem=%d, filename=%s, nr=%d, result=%s", + TP_printk("mm=%p, hpage_pfn=0x%lx, index=%ld, addr=%lx, is_shmem=%d, filename=%s, nr=%d, result=%s", __entry->mm, __entry->hpfn, __entry->index, -- cgit v1.2.3 From 963756aac1f011d904ddd9548ae82286d3a91f96 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 11 Oct 2024 12:24:44 +0200 Subject: mm: huge_memory: add vma_thp_disabled() and thp_disabled_by_hw() Patch series "mm: don't install PMD mappings when THPs are disabled by the hw/process/vma". During testing, it was found that we can get PMD mappings in processes where THP (and more precisely, PMD mappings) are supposed to be disabled. While it works as expected for anon+shmem, the pagecache is the problematic bit. For s390 KVM this currently means that a VM backed by a file located on filesystem with large folio support can crash when KVM tries accessing the problematic page, because the readahead logic might decide to use a PMD-sized THP and faulting it into the page tables will install a PMD mapping, something that s390 KVM cannot tolerate. This might also be a problem with HW that does not support PMD mappings, but I did not try reproducing it. Fix it by respecting the ways to disable THPs when deciding whether we can install a PMD mapping. khugepaged should already be taking care of not collapsing if THPs are effectively disabled for the hw/process/vma. This patch (of 2): Add vma_thp_disabled() and thp_disabled_by_hw() helpers to be shared by shmem_allowable_huge_orders() and __thp_vma_allowable_orders(). [david@redhat.com: rename to vma_thp_disabled(), split out thp_disabled_by_hw() ] Link: https://lkml.kernel.org/r/20241011102445.934409-2-david@redhat.com Fixes: 793917d997df ("mm/readahead: Add large folio readahead") Signed-off-by: Kefeng Wang Signed-off-by: David Hildenbrand Reported-by: Leo Fu Tested-by: Thomas Huth Reviewed-by: Ryan Roberts Cc: Boqiao Fu Cc: Christian Borntraeger Cc: Claudio Imbrenda Cc: Hugh Dickins Cc: Janosch Frank Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 67d0ab3c3bba..ef5b80e48599 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -322,6 +322,24 @@ struct thpsize { (transparent_hugepage_flags & \ (1<vm_mm->flags); +} + +static inline bool thp_disabled_by_hw(void) +{ + /* If the hardware/firmware marked hugepage support disabled. */ + return transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED); +} + unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, -- cgit v1.2.3 From 9c5bd93edf7b8834aecaa7c340b852d5990d7c78 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Sun, 13 Oct 2024 18:26:39 +0200 Subject: bpf, sockmap: SK_DROP on attempted redirects of unsupported af_vsock Don't mislead the callers of bpf_{sk,msg}_redirect_{map,hash}(): make sure to immediately and visibly fail the forwarding of unsupported af_vsock packets. Fixes: 634f1a7110b4 ("vsock: support sockmap") Signed-off-by: Michal Luczaj Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20241013-vsock-fixes-for-redir-v2-1-d6577bbfe742@rbox.co --- include/net/sock.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index c58ca8dd561b..c87295f3476d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2715,6 +2715,11 @@ static inline bool sk_is_stream_unix(const struct sock *sk) return sk->sk_family == AF_UNIX && sk->sk_type == SOCK_STREAM; } +static inline bool sk_is_vsock(const struct sock *sk) +{ + return sk->sk_family == AF_VSOCK; +} + /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from -- cgit v1.2.3 From 78b2770c935fc1434a95cc17613fe31165b02dfe Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 17 Oct 2024 11:10:55 -0400 Subject: dma-mapping: fix tracing dma_alloc/free with vmalloc'd memory Not all virtual addresses have physical addresses, such as if they were vmalloc'd. Just trace the virtual address instead of trying to trace a physical address. This aligns with the API, and is good enough to associate dma_alloc with dma_free. Fixes: 038eb433dc14 ("dma-mapping: add tracing for dma-mapping API calls") Reported-by: syzbot+b4bfacdec173efaa8567@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/670ebde5.050a0220.d9b66.0154.GAE@google.com/ Signed-off-by: Sean Anderson Signed-off-by: Christoph Hellwig --- include/trace/events/dma.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index 569f86a44aaa..b0f41265191c 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -121,7 +121,7 @@ TRACE_EVENT(dma_alloc, TP_STRUCT__entry( __string(device, dev_name(dev)) - __field(u64, phys_addr) + __field(void *, virt_addr) __field(u64, dma_addr) __field(size_t, size) __field(gfp_t, flags) @@ -130,18 +130,18 @@ TRACE_EVENT(dma_alloc, TP_fast_assign( __assign_str(device); - __entry->phys_addr = virt_to_phys(virt_addr); + __entry->virt_addr = virt_addr; __entry->dma_addr = dma_addr; __entry->size = size; __entry->flags = flags; __entry->attrs = attrs; ), - TP_printk("%s dma_addr=%llx size=%zu phys_addr=%llx flags=%s attrs=%s", + TP_printk("%s dma_addr=%llx size=%zu virt_addr=%p flags=%s attrs=%s", __get_str(device), __entry->dma_addr, __entry->size, - __entry->phys_addr, + __entry->virt_addr, show_gfp_flags(__entry->flags), decode_dma_attrs(__entry->attrs)) ); @@ -153,7 +153,7 @@ TRACE_EVENT(dma_free, TP_STRUCT__entry( __string(device, dev_name(dev)) - __field(u64, phys_addr) + __field(void *, virt_addr) __field(u64, dma_addr) __field(size_t, size) __field(unsigned long, attrs) @@ -161,17 +161,17 @@ TRACE_EVENT(dma_free, TP_fast_assign( __assign_str(device); - __entry->phys_addr = virt_to_phys(virt_addr); + __entry->virt_addr = virt_addr; __entry->dma_addr = dma_addr; __entry->size = size; __entry->attrs = attrs; ), - TP_printk("%s dma_addr=%llx size=%zu phys_addr=%llx attrs=%s", + TP_printk("%s dma_addr=%llx size=%zu virt_addr=%p attrs=%s", __get_str(device), __entry->dma_addr, __entry->size, - __entry->phys_addr, + __entry->virt_addr, decode_dma_attrs(__entry->attrs)) ); -- cgit v1.2.3 From 0fd2a743301b6b5eec0f407080f89bed98384836 Mon Sep 17 00:00:00 2001 From: Jiqian Chen Date: Sat, 12 Oct 2024 16:45:37 +0800 Subject: xen: Remove dependency between pciback and privcmd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 2fae6bb7be32 ("xen/privcmd: Add new syscall to get gsi from dev") adds a weak reverse dependency to the config XEN_PRIVCMD definition, that dependency causes xen-privcmd can't be loaded on domU, because dependent xen-pciback isn't always be loaded successfully on domU. To solve above problem, remove that dependency, and do not call pcistub_get_gsi_from_sbdf() directly, instead add a hook in drivers/xen/apci.c, xen-pciback register the real call function, then in privcmd_ioctl_pcidev_get_gsi call that hook. Fixes: 2fae6bb7be32 ("xen/privcmd: Add new syscall to get gsi from dev") Reported-by: Marek Marczykowski-Górecki Signed-off-by: Jiqian Chen Reviewed-by: Juergen Gross Message-ID: <20241012084537.1543059-1-Jiqian.Chen@amd.com> Signed-off-by: Juergen Gross --- include/xen/acpi.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/xen/acpi.h b/include/xen/acpi.h index daa96a22d257..c66a8461612e 100644 --- a/include/xen/acpi.h +++ b/include/xen/acpi.h @@ -35,6 +35,8 @@ #include +typedef int (*get_gsi_from_sbdf_t)(u32 sbdf); + #ifdef CONFIG_XEN_DOM0 #include #include @@ -72,6 +74,8 @@ int xen_acpi_get_gsi_info(struct pci_dev *dev, int *gsi_out, int *trigger_out, int *polarity_out); +void xen_acpi_register_get_gsi_func(get_gsi_from_sbdf_t func); +int xen_acpi_get_gsi_from_sbdf(u32 sbdf); #else static inline void xen_acpi_sleep_register(void) { @@ -89,12 +93,12 @@ static inline int xen_acpi_get_gsi_info(struct pci_dev *dev, { return -1; } -#endif -#ifdef CONFIG_XEN_PCI_STUB -int pcistub_get_gsi_from_sbdf(unsigned int sbdf); -#else -static inline int pcistub_get_gsi_from_sbdf(unsigned int sbdf) +static inline void xen_acpi_register_get_gsi_func(get_gsi_from_sbdf_t func) +{ +} + +static inline int xen_acpi_get_gsi_from_sbdf(u32 sbdf) { return -1; } -- cgit v1.2.3 From cfcdf395c21eeac4543d2b8fef9d29ae9e4559e9 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 8 Oct 2024 18:07:02 +0200 Subject: regulator: core: add callback to perform runtime init Provide an initialisation callback to handle runtime parameters. The idea is similar to the regulator_init() callback, but it provides regulator specific structures, instead of just the driver specific data. As an example, this allows the driver to amend the regulator constraints based on runtime parameters if necessary. Signed-off-by: Jerome Brunet Link: https://patch.msgid.link/20241008-regulator-ignored-data-v2-2-d1251e0ee507@baylibre.com Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index f230a472ccd3..d2f4427504f0 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -365,6 +365,8 @@ struct regulator_desc { int (*of_parse_cb)(struct device_node *, const struct regulator_desc *, struct regulator_config *); + int (*init_cb)(struct regulator_dev *, + struct regulator_config *); int id; unsigned int continuous_voltage_range:1; unsigned n_voltages; -- cgit v1.2.3 From 602ff58ae4fe4289b0ca71cba9fb82f7de92cd64 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 8 Oct 2024 18:07:03 +0200 Subject: regulator: core: remove machine init callback from config The machine specific regulator_init() appears to be unused. It does not allow a lot of interaction with the regulator framework, since nothing from the framework is passed along (desc, config, etc ...) Machine specific init may also be done with the added init_cb() in the regulator description, so remove regulator_init(). Signed-off-by: Jerome Brunet Link: https://patch.msgid.link/20241008-regulator-ignored-data-v2-3-d1251e0ee507@baylibre.com Signed-off-by: Mark Brown --- include/linux/regulator/machine.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 0cd76d264727..d0d700ff337a 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -285,8 +285,7 @@ struct regulator_init_data { int num_consumer_supplies; struct regulator_consumer_supply *consumer_supplies; - /* optional regulator machine specific init */ - int (*regulator_init)(void *driver_data); + /* optional regulator machine specific data */ void *driver_data; /* core does not touch this */ }; -- cgit v1.2.3