From 4314f9f4f85832b5082f4e38b07b63b11baa538c Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 8 Jun 2022 10:55:28 +0100 Subject: firmware: arm_scmi: Avoid using extended string-buffers sizes if not necessary Commit b260fccaebdc2 ("firmware: arm_scmi: Add SCMI v3.1 protocol extended names support") moved all the name string buffers to use the extended buffer size of 64 instead of the required 16 bytes. While that should be fine if the firmware terminates the string before 16 bytes, there is possibility of copying random data if the name is not NULL terminated by the firmware. SCMI base protocol agent_name/vendor_id/sub_vendor_id are defined by the specification as NULL-terminated ASCII strings up to 16-bytes in length. The underlying buffers and message descriptors are currently bigger than needed; resize them to fit only the strictly needed 16 bytes to avoid any possible leaks when reading data from the firmware. Change the size argument of strlcpy to use SCMI_SHORT_NAME_MAX_SIZE always when dealing with short domain names, so as to limit the possibility that an ill-formed non-NULL terminated short reply from the SCMI platform firmware can leak stale content laying in the underlying transport shared memory area. While at that, convert all strings handling routines to use the preferred strscpy. Link: https://lore.kernel.org/r/20220608095530.497879-1-cristian.marussi@arm.com Fixes: b260fccaebdc2 ("firmware: arm_scmi: Add SCMI v3.1 protocol extended names support") Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 1c58646ba381..704111f63993 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -13,8 +13,9 @@ #include #include -#define SCMI_MAX_STR_SIZE 64 -#define SCMI_MAX_NUM_RATES 16 +#define SCMI_MAX_STR_SIZE 64 +#define SCMI_SHORT_NAME_MAX_SIZE 16 +#define SCMI_MAX_NUM_RATES 16 /** * struct scmi_revision_info - version information structure @@ -36,8 +37,8 @@ struct scmi_revision_info { u8 num_protocols; u8 num_agents; u32 impl_ver; - char vendor_id[SCMI_MAX_STR_SIZE]; - char sub_vendor_id[SCMI_MAX_STR_SIZE]; + char vendor_id[SCMI_SHORT_NAME_MAX_SIZE]; + char sub_vendor_id[SCMI_SHORT_NAME_MAX_SIZE]; }; struct scmi_clock_info { -- cgit v1.2.3 From 034e5afad921f1c08c001bf147fb1ba76ae33498 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 10 Jun 2022 16:35:13 -0600 Subject: mm: re-allow pinning of zero pfns The commit referenced below subtly and inadvertently changed the logic to disallow pinning of zero pfns. This breaks device assignment with vfio and potentially various other users of gup. Exclude the zero page test from the negation. Link: https://lkml.kernel.org/r/165490039431.944052.12458624139225785964.stgit@omen Fixes: 1c563432588d ("mm: fix is_pinnable_page against a cma page") Signed-off-by: Alex Williamson Acked-by: Minchan Kim Acked-by: David Hildenbrand Reported-by: Yishai Hadas Cc: Paul E. McKenney Cc: John Hubbard Cc: John Dias Cc: Jason Gunthorpe Cc: Zhangfei Gao Cc: Matthew Wilcox Cc: Joao Martins Cc: Yi Liu Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index bc8f326be0ce..781fae17177d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1600,7 +1600,7 @@ static inline bool is_pinnable_page(struct page *page) if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE) return false; #endif - return !(is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page))); + return !is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page)); } #else static inline bool is_pinnable_page(struct page *page) -- cgit v1.2.3 From 67f22ba7750f940bcd7e1b12720896c505c2d63f Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Wed, 15 Jun 2022 17:32:09 +0800 Subject: mm/memory-failure: disable unpoison once hw error happens Currently unpoison_memory(unsigned long pfn) is designed for soft poison(hwpoison-inject) only. Since 17fae1294ad9d, the KPTE gets cleared on a x86 platform once hardware memory corrupts. Unpoisoning a hardware corrupted page puts page back buddy only, the kernel has a chance to access the page with *NOT PRESENT* KPTE. This leads BUG during accessing on the corrupted KPTE. Suggested by David&Naoya, disable unpoison mechanism when a real HW error happens to avoid BUG like this: Unpoison: Software-unpoisoned page 0x61234 BUG: unable to handle page fault for address: ffff888061234000 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 2c01067 P4D 2c01067 PUD 107267063 PMD 10382b063 PTE 800fffff9edcb062 Oops: 0002 [#1] PREEMPT SMP NOPTI CPU: 4 PID: 26551 Comm: stress Kdump: loaded Tainted: G M OE 5.18.0.bm.1-amd64 #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ... RIP: 0010:clear_page_erms+0x7/0x10 Code: ... RSP: 0000:ffffc90001107bc8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000901 RCX: 0000000000001000 RDX: ffffea0001848d00 RSI: ffffea0001848d40 RDI: ffff888061234000 RBP: ffffea0001848d00 R08: 0000000000000901 R09: 0000000000001276 R10: 0000000000000003 R11: 0000000000000000 R12: 0000000000000001 R13: 0000000000000000 R14: 0000000000140dca R15: 0000000000000001 FS: 00007fd8b2333740(0000) GS:ffff88813fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff888061234000 CR3: 00000001023d2005 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: prep_new_page+0x151/0x170 get_page_from_freelist+0xca0/0xe20 ? sysvec_apic_timer_interrupt+0xab/0xc0 ? asm_sysvec_apic_timer_interrupt+0x1b/0x20 __alloc_pages+0x17e/0x340 __folio_alloc+0x17/0x40 vma_alloc_folio+0x84/0x280 __handle_mm_fault+0x8d4/0xeb0 handle_mm_fault+0xd5/0x2a0 do_user_addr_fault+0x1d0/0x680 ? kvm_read_and_reset_apf_flags+0x3b/0x50 exc_page_fault+0x78/0x170 asm_exc_page_fault+0x27/0x30 Link: https://lkml.kernel.org/r/20220615093209.259374-2-pizhenwei@bytedance.com Fixes: 847ce401df392 ("HWPOISON: Add unpoisoning support") Fixes: 17fae1294ad9d ("x86/{mce,mm}: Unmap the entire page if the whole page is affected and poisoned") Signed-off-by: zhenwei pi Acked-by: David Hildenbrand Acked-by: Naoya Horiguchi Reviewed-by: Miaohe Lin Reviewed-by: Oscar Salvador Cc: Greg Kroah-Hartman Cc: [5.8+] Signed-off-by: Andrew Morton --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 781fae17177d..cf3d0d673f6b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3232,6 +3232,7 @@ enum mf_flags { MF_MUST_KILL = 1 << 2, MF_SOFT_OFFLINE = 1 << 3, MF_UNPOISON = 1 << 4, + MF_SW_SIMULATED = 1 << 5, }; extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue(unsigned long pfn, int flags); -- cgit v1.2.3 From 540a92bfe6dab7310b9df2e488ba247d784d0163 Mon Sep 17 00:00:00 2001 From: Edward Wu Date: Fri, 17 Jun 2022 11:32:20 +0800 Subject: ata: libata: add qc->flags in ata_qc_complete_template tracepoint Add flags value to check the result of ata completion Fixes: 255c03d15a29 ("libata: Add tracepoints") Cc: stable@vger.kernel.org Signed-off-by: Edward Wu Signed-off-by: Damien Le Moal --- include/trace/events/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index d4e631aa976f..6025dd8ba4aa 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -288,6 +288,7 @@ DECLARE_EVENT_CLASS(ata_qc_complete_template, __entry->hob_feature = qc->result_tf.hob_feature; __entry->nsect = qc->result_tf.nsect; __entry->hob_nsect = qc->result_tf.hob_nsect; + __entry->flags = qc->flags; ), TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s status=%s " \ -- cgit v1.2.3 From 5cf9c91ba927119fc6606b938b1895bb2459d3bc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 09:48:25 +0200 Subject: block: serialize all debugfs operations using q->debugfs_mutex Various places like I/O schedulers or the QOS infrastructure try to register debugfs files on demans, which can race with creating and removing the main queue debugfs directory. Use the existing debugfs_mutex to serialize all debugfs operations that rely on q->debugfs_dir or the directories hanging off it. To make the teardown code a little simpler declare all debugfs dentry pointers and not just the main one uncoditionally in blkdev.h. Move debugfs_mutex next to the dentries that it protects and document what it is used for. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220614074827.458955-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bb6e3c31b3b7..73c886eba8e1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -482,7 +482,6 @@ struct request_queue { #endif /* CONFIG_BLK_DEV_ZONED */ int node; - struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; #endif @@ -526,11 +525,12 @@ struct request_queue { struct bio_set bio_split; struct dentry *debugfs_dir; - -#ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; struct dentry *rqos_debugfs_dir; -#endif + /* + * Serializes all debugfs metadata operations using the above dentries. + */ + struct mutex debugfs_mutex; bool mq_sysfs_init_done; -- cgit v1.2.3 From c01d4d0a82b71857be7449380338bc53dde2da92 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 16 Jun 2022 15:00:51 +0200 Subject: random: quiet urandom warning ratelimit suppression message random.c ratelimits how much it warns about uninitialized urandom reads using __ratelimit(). When the RNG is finally initialized, it prints the number of missed messages due to ratelimiting. It has been this way since that functionality was introduced back in 2018. Recently, cc1e127bfa95 ("random: remove ratelimiting for in-kernel unseeded randomness") put a bit more stress on the urandom ratelimiting, which teased out a bug in the implementation. Specifically, when under pressure, __ratelimit() will print its own message and reset the count back to 0, making the final message at the end less useful. Secondly, it does so as a pr_warn(), which apparently is undesirable for people's CI. Fortunately, __ratelimit() has the RATELIMIT_MSG_ON_RELEASE flag exactly for this purpose, so we set the flag. Fixes: 4e00b339e264 ("random: rate limit unseeded randomness warnings") Cc: stable@vger.kernel.org Reported-by: Jon Hunter Reported-by: Ron Economos Tested-by: Ron Economos Signed-off-by: Jason A. Donenfeld --- include/linux/ratelimit_types.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h index c21c7f8103e2..002266693e50 100644 --- a/include/linux/ratelimit_types.h +++ b/include/linux/ratelimit_types.h @@ -23,12 +23,16 @@ struct ratelimit_state { unsigned long flags; }; -#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \ - .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ - .interval = interval_init, \ - .burst = burst_init, \ +#define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .interval = interval_init, \ + .burst = burst_init, \ + .flags = flags_init, \ } +#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) \ + RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, 0) + #define RATELIMIT_STATE_INIT_DISABLED \ RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST) -- cgit v1.2.3 From 9243fc4cd28c8bdddd7fe0abd5bbec3c4fdf5052 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 3 Jun 2022 14:35:29 +0900 Subject: block: remove queue from struct blk_independent_access_range The request queue pointer in struct blk_independent_access_range is unused. Remove it. Signed-off-by: Damien Le Moal Fixes: 41e46b3c2aa2 ("block: Fix potential deadlock in blk_ia_range_sysfs_show()") Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220603053529.76405-1-damien.lemoal@opensource.wdc.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 73c886eba8e1..2f7b43444c5f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -342,7 +342,6 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev, */ struct blk_independent_access_range { struct kobject kobj; - struct request_queue *queue; sector_t sector; sector_t nr_sectors; }; -- cgit v1.2.3 From 60050ffe3d770dd1df5b641aa48f49d07a54bd84 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 May 2022 23:48:09 +0100 Subject: certs: Move load_certificate_list() to be with the asymmetric keys code Move load_certificate_list(), which loads a series of binary X.509 certificates from a blob and inserts them as keys into a keyring, to be with the asymmetric keys code that it drives. This makes it easier to add FIPS selftest code in which we need to load up a private keyring for the tests to use. Signed-off-by: David Howells Reviewed-by: Simo Sorce Reviewed-by: Herbert Xu cc: keyrings@vger.kernel.org cc: linux-crypto@vger.kernel.org Link: https://lore.kernel.org/r/165515742145.1554877.13488098107542537203.stgit@warthog.procyon.org.uk/ --- include/keys/asymmetric-type.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/keys/asymmetric-type.h b/include/keys/asymmetric-type.h index 6c5d4963e15b..69a13e1e5b2e 100644 --- a/include/keys/asymmetric-type.h +++ b/include/keys/asymmetric-type.h @@ -84,6 +84,9 @@ extern struct key *find_asymmetric_key(struct key *keyring, const struct asymmetric_key_id *id_2, bool partial); +int x509_load_certificate_list(const u8 cert_list[], const unsigned long list_size, + const struct key *keyring); + /* * The payload is at the discretion of the subtype. */ -- cgit v1.2.3 From e34a07c0ae3906f97eb18df50902e2a01c1015b6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Jun 2022 12:13:53 -0700 Subject: sock: redo the psock vs ULP protection check Commit 8a59f9d1e3d4 ("sock: Introduce sk->sk_prot->psock_update_sk_prot()") has moved the inet_csk_has_ulp(sk) check from sk_psock_init() to the new tcp_bpf_update_proto() function. I'm guessing that this was done to allow creating psocks for non-inet sockets. Unfortunately the destruction path for psock includes the ULP unwind, so we need to fail the sk_psock_init() itself. Otherwise if ULP is already present we'll notice that later, and call tcp_update_ulp() with the sk_proto of the ULP itself, which will most likely result in the ULP looping its callbacks. Fixes: 8a59f9d1e3d4 ("sock: Introduce sk->sk_prot->psock_update_sk_prot()") Signed-off-by: Jakub Kicinski Reviewed-by: John Fastabend Reviewed-by: Jakub Sitnicki Tested-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20220620191353.1184629-2-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/net/inet_sock.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index c1b5dcd6597c..daead5fb389a 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -253,6 +253,11 @@ struct inet_sock { #define IP_CMSG_CHECKSUM BIT(7) #define IP_CMSG_RECVFRAGSIZE BIT(8) +static inline bool sk_is_inet(struct sock *sk) +{ + return sk->sk_family == AF_INET || sk->sk_family == AF_INET6; +} + /** * sk_to_full_sk - Access to a full socket * @sk: pointer to a socket -- cgit v1.2.3 From 23c9cd56007e90b2c2317c5eab6ab12921b4314a Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Tue, 21 Jun 2022 09:05:00 +0200 Subject: nvme: fix the CRIMS and CRWMS definitions to match the spec Adjust the values of NVME_CAP_CRMS_CRIMS and NVME_CAP_CRMS_CRWMS masks as they are different from the ones in TP4084 - Time-to-ready. Fixes: 354201c53e61 ("nvme: add support for TP4084 - Time-to-Ready Enhancements"). Signed-off-by: Joel Granados Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 29ec3e3481ff..e3934003f239 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -233,8 +233,8 @@ enum { }; enum { - NVME_CAP_CRMS_CRIMS = 1ULL << 59, - NVME_CAP_CRMS_CRWMS = 1ULL << 60, + NVME_CAP_CRMS_CRWMS = 1ULL << 59, + NVME_CAP_CRMS_CRIMS = 1ULL << 60, }; struct nvme_id_power_state { -- cgit v1.2.3 From e70b64a3f28b9f54602ae3e706b1dc1338de3df7 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Thu, 23 Jun 2022 01:37:43 -0700 Subject: io_uring: move io_uring_get_opcode out of TP_printk The TP_printk macro's are not supposed to use custom code ([1]) or else tools such as perf cannot use these events. Convert the opcode string representation to use the __string wiring that the event framework provides ([2]). [1]: https://lwn.net/Articles/379903/ [2]: https://lwn.net/Articles/381064/ Fixes: 033b87d24f72 ("io_uring: use the text representation of ops in trace") Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220623083743.2648321-1-dylany@fb.com [axboe: fixup spurious removal of sq_thread assignment] Signed-off-by: Jens Axboe --- include/trace/events/io_uring.h | 42 ++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 66fcc5a1a5b1..aa2f951b07cd 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -158,6 +158,8 @@ TRACE_EVENT(io_uring_queue_async_work, __field( unsigned int, flags ) __field( struct io_wq_work *, work ) __field( int, rw ) + + __string( op_str, io_uring_get_opcode(opcode) ) ), TP_fast_assign( @@ -168,11 +170,13 @@ TRACE_EVENT(io_uring_queue_async_work, __entry->opcode = opcode; __entry->work = work; __entry->rw = rw; + + __assign_str(op_str, io_uring_get_opcode(opcode)); ), TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p", __entry->ctx, __entry->req, __entry->user_data, - io_uring_get_opcode(__entry->opcode), + __get_str(op_str), __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work) ); @@ -198,6 +202,8 @@ TRACE_EVENT(io_uring_defer, __field( void *, req ) __field( unsigned long long, data ) __field( u8, opcode ) + + __string( op_str, io_uring_get_opcode(opcode) ) ), TP_fast_assign( @@ -205,11 +211,13 @@ TRACE_EVENT(io_uring_defer, __entry->req = req; __entry->data = user_data; __entry->opcode = opcode; + + __assign_str(op_str, io_uring_get_opcode(opcode)); ), TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s", __entry->ctx, __entry->req, __entry->data, - io_uring_get_opcode(__entry->opcode)) + __get_str(op_str)) ); /** @@ -298,6 +306,8 @@ TRACE_EVENT(io_uring_fail_link, __field( unsigned long long, user_data ) __field( u8, opcode ) __field( void *, link ) + + __string( op_str, io_uring_get_opcode(opcode) ) ), TP_fast_assign( @@ -306,11 +316,13 @@ TRACE_EVENT(io_uring_fail_link, __entry->user_data = user_data; __entry->opcode = opcode; __entry->link = link; + + __assign_str(op_str, io_uring_get_opcode(opcode)); ), TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p", __entry->ctx, __entry->req, __entry->user_data, - io_uring_get_opcode(__entry->opcode), __entry->link) + __get_str(op_str), __entry->link) ); /** @@ -390,6 +402,8 @@ TRACE_EVENT(io_uring_submit_sqe, __field( u32, flags ) __field( bool, force_nonblock ) __field( bool, sq_thread ) + + __string( op_str, io_uring_get_opcode(opcode) ) ), TP_fast_assign( @@ -400,11 +414,13 @@ TRACE_EVENT(io_uring_submit_sqe, __entry->flags = flags; __entry->force_nonblock = force_nonblock; __entry->sq_thread = sq_thread; + + __assign_str(op_str, io_uring_get_opcode(opcode)); ), TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, " "non block %d, sq_thread %d", __entry->ctx, __entry->req, - __entry->user_data, io_uring_get_opcode(__entry->opcode), + __entry->user_data, __get_str(op_str), __entry->flags, __entry->force_nonblock, __entry->sq_thread) ); @@ -435,6 +451,8 @@ TRACE_EVENT(io_uring_poll_arm, __field( u8, opcode ) __field( int, mask ) __field( int, events ) + + __string( op_str, io_uring_get_opcode(opcode) ) ), TP_fast_assign( @@ -444,11 +462,13 @@ TRACE_EVENT(io_uring_poll_arm, __entry->opcode = opcode; __entry->mask = mask; __entry->events = events; + + __assign_str(op_str, io_uring_get_opcode(opcode)); ), TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x", __entry->ctx, __entry->req, __entry->user_data, - io_uring_get_opcode(__entry->opcode), + __get_str(op_str), __entry->mask, __entry->events) ); @@ -474,6 +494,8 @@ TRACE_EVENT(io_uring_task_add, __field( unsigned long long, user_data ) __field( u8, opcode ) __field( int, mask ) + + __string( op_str, io_uring_get_opcode(opcode) ) ), TP_fast_assign( @@ -482,11 +504,13 @@ TRACE_EVENT(io_uring_task_add, __entry->user_data = user_data; __entry->opcode = opcode; __entry->mask = mask; + + __assign_str(op_str, io_uring_get_opcode(opcode)); ), TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x", __entry->ctx, __entry->req, __entry->user_data, - io_uring_get_opcode(__entry->opcode), + __get_str(op_str), __entry->mask) ); @@ -523,6 +547,8 @@ TRACE_EVENT(io_uring_req_failed, __field( u64, pad1 ) __field( u64, addr3 ) __field( int, error ) + + __string( op_str, io_uring_get_opcode(sqe->opcode) ) ), TP_fast_assign( @@ -542,6 +568,8 @@ TRACE_EVENT(io_uring_req_failed, __entry->pad1 = sqe->__pad2[0]; __entry->addr3 = sqe->addr3; __entry->error = error; + + __assign_str(op_str, io_uring_get_opcode(sqe->opcode)); ), TP_printk("ring %p, req %p, user_data 0x%llx, " @@ -550,7 +578,7 @@ TRACE_EVENT(io_uring_req_failed, "personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, " "error=%d", __entry->ctx, __entry->req, __entry->user_data, - io_uring_get_opcode(__entry->opcode), + __get_str(op_str), __entry->flags, __entry->ioprio, (unsigned long long)__entry->off, (unsigned long long) __entry->addr, __entry->len, -- cgit v1.2.3 From 20fb0c8272bbb102d15bdd11aa64f828619dd7cc Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:52 +0200 Subject: Revert "printk: Wait for the global console lock when the system is going down" This reverts commit b87f02307d3cfbda768520f0687c51ca77e14fc3. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-2-pmladek@suse.com --- include/linux/printk.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index c1e07c0652c7..cd26aab0ab2a 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -174,7 +174,6 @@ extern void printk_prefer_direct_enter(void); extern void printk_prefer_direct_exit(void); extern bool pr_flush(int timeout_ms, bool reset_on_progress); -extern void try_block_console_kthreads(int timeout_ms); /* * Please don't use printk_ratelimit(), because it shares ratelimiting state @@ -239,10 +238,6 @@ static inline bool pr_flush(int timeout_ms, bool reset_on_progress) return true; } -static inline void try_block_console_kthreads(int timeout_ms) -{ -} - static inline int printk_ratelimit(void) { return 0; -- cgit v1.2.3 From 2d9ef940f89e0ab4fde7ba6f769d82f2a450c35a Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:55 +0200 Subject: Revert "printk: extend console_lock for per-console locking" This reverts commit 8e274732115f63c1d09136284431b3555bd5cc56. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-5-pmladek@suse.com --- include/linux/console.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include') diff --git a/include/linux/console.h b/include/linux/console.h index 143653090c48..9a251e70c090 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -16,7 +16,6 @@ #include #include -#include struct vc_data; struct console_font_op; @@ -155,20 +154,6 @@ struct console { u64 seq; unsigned long dropped; struct task_struct *thread; - bool blocked; - - /* - * The per-console lock is used by printing kthreads to synchronize - * this console with callers of console_lock(). This is necessary in - * order to allow printing kthreads to run in parallel to each other, - * while each safely accessing the @blocked field and synchronizing - * against direct printing via console_lock/console_unlock. - * - * Note: For synchronizing against direct printing via - * console_trylock/console_unlock, see the static global - * variable @console_kthreads_active. - */ - struct mutex lock; void *data; struct console *next; -- cgit v1.2.3 From 5831788afb17b89c5b531fb60cbd798613ccbb63 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:56 +0200 Subject: Revert "printk: add kthread console printers" This reverts commit 09c5ba0aa2fcfdadb17d045c3ee6f86d69270df7. This reverts commit b87f02307d3cfbda768520f0687c51ca77e14fc3. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-6-pmladek@suse.com --- include/linux/console.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/console.h b/include/linux/console.h index 9a251e70c090..8c1686e2c233 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -153,8 +153,6 @@ struct console { uint ospeed; u64 seq; unsigned long dropped; - struct task_struct *thread; - void *data; struct console *next; }; -- cgit v1.2.3 From 07a22b61946f0b80065b0ddcc703b715f84355f5 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 23 Jun 2022 16:51:57 +0200 Subject: Revert "printk: add functions to prefer direct printing" This reverts commit 2bb2b7b57f81255c13f4395ea911d6bdc70c9fe2. The testing of 5.19 release candidates revealed missing synchronization between early and regular console functionality. It would be possible to start the console kthreads later as a workaround. But it is clear that console lock serialized console drivers between each other. It opens a big area of possible problems that were not considered by people involved in the development and review. printk() is crucial for debugging kernel issues and console output is very important part of it. The number of consoles is huge and a proper review would take some time. As a result it need to be reverted for 5.19. Link: https://lore.kernel.org/r/YrBdjVwBOVgLfHyb@alley Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220623145157.21938-7-pmladek@suse.com --- include/linux/printk.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index cd26aab0ab2a..091fba7283e1 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -170,9 +170,6 @@ extern void __printk_safe_exit(void); #define printk_deferred_enter __printk_safe_enter #define printk_deferred_exit __printk_safe_exit -extern void printk_prefer_direct_enter(void); -extern void printk_prefer_direct_exit(void); - extern bool pr_flush(int timeout_ms, bool reset_on_progress); /* @@ -225,14 +222,6 @@ static inline void printk_deferred_exit(void) { } -static inline void printk_prefer_direct_enter(void) -{ -} - -static inline void printk_prefer_direct_exit(void) -{ -} - static inline bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; -- cgit v1.2.3 From c7e1c443584ddd7facffca00e9e23b0d084e5bb3 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Mon, 6 Jun 2022 13:44:24 +0900 Subject: gpio: Fix kernel-doc comments to nested union Commit 48ec13d36d3f ("gpio: Properly document parent data union") is supposed to have fixed a warning from "make htmldocs" regarding kernel-doc comments to union members. However, the same warning still remains [1]. Fix the issue by following the example found in section "Nested structs/unions" of Documentation/doc-guide/kernel-doc.rst. Signed-off-by: Akira Yokosawa Reported-by: Stephen Rothwell Fixes: 48ec13d36d3f ("gpio: Properly document parent data union") Link: https://lore.kernel.org/r/20220606093302.21febee3@canb.auug.org.au/ [1] Cc: Linus Walleij Cc: Bartosz Golaszewski Cc: Joey Gouly Cc: Marc Zyngier Tested-by: Stephen Rothwell Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index b1e0f1f8ee2e..54c3c6506503 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -167,21 +167,24 @@ struct gpio_irq_chip { */ irq_flow_handler_t parent_handler; - /** - * @parent_handler_data: - * - * If @per_parent_data is false, @parent_handler_data is a single - * pointer used as the data associated with every parent interrupt. - * - * @parent_handler_data_array: - * - * If @per_parent_data is true, @parent_handler_data_array is - * an array of @num_parents pointers, and is used to associate - * different data for each parent. This cannot be NULL if - * @per_parent_data is true. - */ union { + /** + * @parent_handler_data: + * + * If @per_parent_data is false, @parent_handler_data is a + * single pointer used as the data associated with every + * parent interrupt. + */ void *parent_handler_data; + + /** + * @parent_handler_data_array: + * + * If @per_parent_data is true, @parent_handler_data_array is + * an array of @num_parents pointers, and is used to associate + * different data for each parent. This cannot be NULL if + * @per_parent_data is true. + */ void **parent_handler_data_array; }; -- cgit v1.2.3 From c346dae4f3fbce51bbd4f2ec5e8c6f9b91e93163 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 22 Jun 2022 09:29:40 +0800 Subject: virtio: disable notification hardening by default We try to harden virtio device notifications in 8b4ec69d7e09 ("virtio: harden vring IRQ"). It works with the assumption that the driver or core can properly call virtio_device_ready() at the right place. Unfortunately, this seems to be not true and uncover various bugs of the existing drivers, mainly the issue of using virtio_device_ready() incorrectly. So let's add a Kconfig option and disable it by default. It gives us time to fix the drivers and then we can consider re-enabling it. Signed-off-by: Jason Wang Message-Id: <20220622012940.21441-1-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- include/linux/virtio_config.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 49c7c32815f1..b47c2e7ed0ee 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -257,6 +257,7 @@ void virtio_device_ready(struct virtio_device *dev) WARN_ON(status & VIRTIO_CONFIG_S_DRIVER_OK); +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION /* * The virtio_synchronize_cbs() makes sure vring_interrupt() * will see the driver specific setup if it sees vq->broken @@ -264,6 +265,7 @@ void virtio_device_ready(struct virtio_device *dev) */ virtio_synchronize_cbs(dev); __virtio_unbreak_device(dev); +#endif /* * The transport should ensure the visibility of vq->broken * before setting DRIVER_OK. See the comments for the transport -- cgit v1.2.3 From 3b89b511ea0c705cc418440e2abf9d692a556d84 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 Jun 2022 16:32:32 +0300 Subject: net: fix IFF_TX_SKB_NO_LINEAR definition The "1<<31" shift has a sign extension bug so IFF_TX_SKB_NO_LINEAR is 0xffffffff80000000 instead of 0x0000000080000000. Fixes: c2ff53d8049f ("net: Add priv_flags for allow tx skb without linear") Signed-off-by: Dan Carpenter Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/YrRrcGttfEVnf85Q@kili Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f615a66c89e9..2563d30736e9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1671,7 +1671,7 @@ enum netdev_priv_flags { IFF_FAILOVER_SLAVE = 1<<28, IFF_L3MDEV_RX_HANDLER = 1<<29, IFF_LIVE_RENAME_OK = 1<<30, - IFF_TX_SKB_NO_LINEAR = 1<<31, + IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), IFF_CHANGE_PROTO_DOWN = BIT_ULL(32), }; -- cgit v1.2.3 From e34b9ed96ce3b06c79bf884009b16961ca478f87 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 22 Jun 2022 16:43:57 +0200 Subject: netfilter: nf_tables: avoid skb access on nf_stolen When verdict is NF_STOLEN, the skb might have been freed. When tracing is enabled, this can result in a use-after-free: 1. access to skb->nf_trace 2. access to skb->mark 3. computation of trace id 4. dump of packet payload To avoid 1, keep a cached copy of skb->nf_trace in the trace state struct. Refresh this copy whenever verdict is != STOLEN. Avoid 2 by skipping skb->mark access if verdict is STOLEN. 3 is avoided by precomputing the trace id. Only dump the packet when verdict is not "STOLEN". Reported-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 279ae0fff7ad..5c4e5a96a984 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1338,24 +1338,28 @@ void nft_unregister_flowtable_type(struct nf_flowtable_type *type); /** * struct nft_traceinfo - nft tracing information and state * + * @trace: other struct members are initialised + * @nf_trace: copy of skb->nf_trace before rule evaluation + * @type: event type (enum nft_trace_types) + * @skbid: hash of skb to be used as trace id + * @packet_dumped: packet headers sent in a previous traceinfo message * @pkt: pktinfo currently processed * @basechain: base chain currently processed * @chain: chain currently processed * @rule: rule that was evaluated * @verdict: verdict given by rule - * @type: event type (enum nft_trace_types) - * @packet_dumped: packet headers sent in a previous traceinfo message - * @trace: other struct members are initialised */ struct nft_traceinfo { + bool trace; + bool nf_trace; + bool packet_dumped; + enum nft_trace_types type:8; + u32 skbid; const struct nft_pktinfo *pkt; const struct nft_base_chain *basechain; const struct nft_chain *chain; const struct nft_rule_dp *rule; const struct nft_verdict *verdict; - enum nft_trace_types type; - bool packet_dumped; - bool trace; }; void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, -- cgit v1.2.3 From 8698e3bab4dd7968666e84e111d0bfd17c040e77 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 27 Jun 2022 20:47:19 +0300 Subject: fanotify: refine the validation checks on non-dir inode mask Commit ceaf69f8eadc ("fanotify: do not allow setting dirent events in mask of non-dir") added restrictions about setting dirent events in the mask of a non-dir inode mark, which does not make any sense. For backward compatibility, these restictions were added only to new (v5.17+) APIs. It also does not make any sense to set the flags FAN_EVENT_ON_CHILD or FAN_ONDIR in the mask of a non-dir inode. Add these flags to the dir-only restriction of the new APIs as well. Move the check of the dir-only flags for new APIs into the helper fanotify_events_supported(), which is only called for FAN_MARK_ADD, because there is no need to error on an attempt to remove the dir-only flags from non-dir inode. Fixes: ceaf69f8eadc ("fanotify: do not allow setting dirent events in mask of non-dir") Link: https://lore.kernel.org/linux-fsdevel/20220627113224.kr2725conevh53u4@quack3.lan/ Link: https://lore.kernel.org/r/20220627174719.2838175-1-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fanotify.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index edc28555814c..e517dbcf74ed 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -111,6 +111,10 @@ FANOTIFY_PERM_EVENTS | \ FAN_Q_OVERFLOW | FAN_ONDIR) +/* Events and flags relevant only for directories */ +#define FANOTIFY_DIRONLY_EVENT_BITS (FANOTIFY_DIRENT_EVENTS | \ + FAN_EVENT_ON_CHILD | FAN_ONDIR) + #define ALL_FANOTIFY_EVENT_BITS (FANOTIFY_OUTGOING_EVENTS | \ FANOTIFY_EVENT_FLAGS) -- cgit v1.2.3 From 0fe3dbbefb74a8575f61d7801b08dbc50523d60d Mon Sep 17 00:00:00 2001 From: Tao Liu Date: Mon, 27 Jun 2022 22:00:04 +0800 Subject: linux/dim: Fix divide by 0 in RDMA DIM Fix a divide 0 error in rdma_dim_stats_compare() when prev->cpe_ratio == 0. CallTrace: Hardware name: H3C R4900 G3/RS33M2C9S, BIOS 2.00.37P21 03/12/2020 task: ffff880194b78000 task.stack: ffffc90006714000 RIP: 0010:backport_rdma_dim+0x10e/0x240 [mlx_compat] RSP: 0018:ffff880c10e83ec0 EFLAGS: 00010202 RAX: 0000000000002710 RBX: ffff88096cd7f780 RCX: 0000000000000064 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000001 RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 000000001d7c6c09 R13: ffff88096cd7f780 R14: ffff880b174fe800 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff880c10e80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000a0965b00 CR3: 000000000200a003 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ib_poll_handler+0x43/0x80 [ib_core] irq_poll_softirq+0xae/0x110 __do_softirq+0xd1/0x28c irq_exit+0xde/0xf0 do_IRQ+0x54/0xe0 common_interrupt+0x8f/0x8f ? cpuidle_enter_state+0xd9/0x2a0 ? cpuidle_enter_state+0xc7/0x2a0 ? do_idle+0x170/0x1d0 ? cpu_startup_entry+0x6f/0x80 ? start_secondary+0x1b9/0x210 ? secondary_startup_64+0xa5/0xb0 Code: 0f 87 e1 00 00 00 8b 4c 24 14 44 8b 43 14 89 c8 4d 63 c8 44 29 c0 99 31 d0 29 d0 31 d2 48 98 48 8d 04 80 48 8d 04 80 48 c1 e0 02 <49> f7 f1 48 83 f8 0a 0f 86 c1 00 00 00 44 39 c1 7f 10 48 89 df RIP: backport_rdma_dim+0x10e/0x240 [mlx_compat] RSP: ffff880c10e83ec0 Fixes: f4915455dcf0 ("linux/dim: Implement RDMA adaptive moderation (DIM)") Link: https://lore.kernel.org/r/20220627140004.3099-1-thomas.liu@ucloud.cn Signed-off-by: Tao Liu Reviewed-by: Max Gurtovoy Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/dim.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dim.h b/include/linux/dim.h index b698266d0035..6c5733981563 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -21,7 +21,7 @@ * We consider 10% difference as significant. */ #define IS_SIGNIFICANT_DIFF(val, ref) \ - (((100UL * abs((val) - (ref))) / (ref)) > 10) + ((ref) && (((100UL * abs((val) - (ref))) / (ref)) > 10)) /* * Calculate the gap between two values. -- cgit v1.2.3 From 06e445f740c1a0fe5d16b3dff8a4ef18e124e54e Mon Sep 17 00:00:00 2001 From: Ossama Othman Date: Mon, 27 Jun 2022 18:02:42 -0700 Subject: mptcp: fix conflict with Including before the C library header causes symbol redefinition errors at compile-time due to duplicate declarations and definitions in the header included by . Explicitly include before in when __KERNEL__ is not defined so that the C library compatibility logic in is enabled when including in user space code. Fixes: c11c5906bc0a ("mptcp: add MPTCP_SUBFLOW_ADDRS getsockopt support") Signed-off-by: Ossama Othman Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 921963589904..dfe19bf13f4c 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -2,16 +2,17 @@ #ifndef _UAPI_MPTCP_H #define _UAPI_MPTCP_H +#ifndef __KERNEL__ +#include /* for sockaddr_in and sockaddr_in6 */ +#include /* for struct sockaddr */ +#endif + #include #include #include /* for sockaddr_in */ #include /* for sockaddr_in6 */ #include /* for sockaddr_storage and sa_family */ -#ifndef __KERNEL__ -#include /* for struct sockaddr */ -#endif - #define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0) #define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1) #define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2) -- cgit v1.2.3 From 9e121040e54abef9ed5542e5fdfa87911cd96204 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 7 Jun 2022 20:23:34 +0200 Subject: firmware: sysfb: Make sysfb_create_simplefb() return a pdev pointer This function just returned 0 on success or an errno code on error, but it could be useful for sysfb_init() callers to have a pointer to the device. Signed-off-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220607182338.344270-2-javierm@redhat.com --- include/linux/sysfb.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index b0dcfa26d07b..708152e9037b 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -72,8 +72,8 @@ static inline void sysfb_apply_efi_quirks(struct platform_device *pd) bool sysfb_parse_mode(const struct screen_info *si, struct simplefb_platform_data *mode); -int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode); +struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode); #else /* CONFIG_SYSFB_SIMPLE */ @@ -83,10 +83,10 @@ static inline bool sysfb_parse_mode(const struct screen_info *si, return false; } -static inline int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) +static inline struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode) { - return -EINVAL; + return ERR_PTR(-EINVAL); } #endif /* CONFIG_SYSFB_SIMPLE */ -- cgit v1.2.3 From bde376e9de3c0bc55eedc8956b0f114c05531595 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 7 Jun 2022 20:23:35 +0200 Subject: firmware: sysfb: Add sysfb_disable() helper function This can be used by subsystems to unregister a platform device registered by sysfb and also to disable future platform device registration in sysfb. Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220607182338.344270-3-javierm@redhat.com --- include/linux/sysfb.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index 708152e9037b..8ba8b5be5567 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -55,6 +55,18 @@ struct efifb_dmi_info { int flags; }; +#ifdef CONFIG_SYSFB + +void sysfb_disable(void); + +#else /* CONFIG_SYSFB */ + +static inline void sysfb_disable(void) +{ +} + +#endif /* CONFIG_SYSFB */ + #ifdef CONFIG_EFI extern struct efifb_dmi_info efifb_dmi_list[]; -- cgit v1.2.3 From 20b8264394b33adb1640a485a62a84bc1388b6a3 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 21 Jun 2022 20:39:21 +0000 Subject: drm/fourcc: fix integer type usage in uapi header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kernel uapi headers are supposed to use __[us]{8,16,32,64} types defined by as opposed to 'uint32_t' and similar. See [1] for the relevant discussion about this topic. In this particular case, the usage of 'uint64_t' escaped headers_check as these macros are not being called here. However, the following program triggers a compilation error: #include int main() { unsigned long x = AMD_FMT_MOD_CLEAR(RB); return 0; } gcc error: drm.c:5:27: error: ‘uint64_t’ undeclared (first use in this function) 5 | unsigned long x = AMD_FMT_MOD_CLEAR(RB); | ^~~~~~~~~~~~~~~~~ This patch changes AMD_FMT_MOD_{SET,CLEAR} macros to use the correct integer types, which fixes the above issue. [1] https://lkml.org/lkml/2019/6/5/18 Fixes: 8ba16d599374 ("drm/fourcc: Add AMD DRM modifiers.") Signed-off-by: Carlos Llamas Reviewed-by: Simon Ser Signed-off-by: Alex Deucher --- include/uapi/drm/drm_fourcc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index f1972154a594..0980678d502d 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1444,11 +1444,11 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define AMD_FMT_MOD_PIPE_MASK 0x7 #define AMD_FMT_MOD_SET(field, value) \ - ((uint64_t)(value) << AMD_FMT_MOD_##field##_SHIFT) + ((__u64)(value) << AMD_FMT_MOD_##field##_SHIFT) #define AMD_FMT_MOD_GET(field, value) \ (((value) >> AMD_FMT_MOD_##field##_SHIFT) & AMD_FMT_MOD_##field##_MASK) #define AMD_FMT_MOD_CLEAR(field) \ - (~((uint64_t)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT)) + (~((__u64)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT)) #if defined(__cplusplus) } -- cgit v1.2.3 From b5d281f6c16dd432b618bdfd36ddba1a58d5b603 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Mon, 20 Jun 2022 00:03:51 +0200 Subject: PM / devfreq: Rework freq_table to be local to devfreq struct On a devfreq PROBE_DEFER, the freq_table in the driver profile struct, is never reset and may be leaved in an undefined state. This comes from the fact that we store the freq_table in the driver profile struct that is commonly defined as static and not reset on PROBE_DEFER. We currently skip the reinit of the freq_table if we found it's already defined since a driver may declare his own freq_table. This logic is flawed in the case devfreq core generate a freq_table, set it in the profile struct and then PROBE_DEFER, freeing the freq_table. In this case devfreq will found a NOT NULL freq_table that has been freed, skip the freq_table generation and probe the driver based on the wrong table. To fix this and correctly handle PROBE_DEFER, use a local freq_table and max_state in the devfreq struct and never modify the freq_table present in the profile struct if it does provide it. Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device") Cc: stable@vger.kernel.org Signed-off-by: Christian Marangi Signed-off-by: Chanwoo Choi --- include/linux/devfreq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index dc10bee75a72..34aab4dd336c 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -148,6 +148,8 @@ struct devfreq_stats { * reevaluate operable frequencies. Devfreq users may use * devfreq.nb to the corresponding register notifier call chain. * @work: delayed work for load monitoring. + * @freq_table: current frequency table used by the devfreq driver. + * @max_state: count of entry present in the frequency table. * @previous_freq: previously configured frequency value. * @last_status: devfreq user device info, performance statistics * @data: Private data of the governor. The devfreq framework does not @@ -185,6 +187,9 @@ struct devfreq { struct notifier_block nb; struct delayed_work work; + unsigned long *freq_table; + unsigned int max_state; + unsigned long previous_freq; struct devfreq_dev_status last_status; -- cgit v1.2.3 From 1758bde2e4aa5ff188d53e7d9d388bbb7e12eebb Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 28 Jun 2022 12:15:08 +0200 Subject: net: phy: Don't trigger state machine while in suspend Upon system sleep, mdio_bus_phy_suspend() stops the phy_state_machine(), but subsequent interrupts may retrigger it: They may have been left enabled to facilitate wakeup and are not quiesced until the ->suspend_noirq() phase. Unwanted interrupts may hence occur between mdio_bus_phy_suspend() and dpm_suspend_noirq(), as well as between dpm_resume_noirq() and mdio_bus_phy_resume(). Retriggering the phy_state_machine() through an interrupt is not only undesirable for the reason given in mdio_bus_phy_suspend() (freezing it midway with phydev->lock held), but also because the PHY may be inaccessible after it's suspended: Accesses to USB-attached PHYs are blocked once usb_suspend_both() clears the can_submit flag and PHYs on PCI network cards may become inaccessible upon suspend as well. Amend phy_interrupt() to avoid triggering the state machine if the PHY is suspended. Signal wakeup instead if the attached net_device or its parent has been configured as a wakeup source. (Those conditions are identical to mdio_bus_phy_may_suspend().) Postpone handling of the interrupt until the PHY has resumed. Before stopping the phy_state_machine() in mdio_bus_phy_suspend(), wait for a concurrent phy_interrupt() to run to completion. That is necessary because phy_interrupt() may have checked the PHY's suspend status before the system sleep transition commenced and it may thus retrigger the state machine after it was stopped. Likewise, after re-enabling interrupt handling in mdio_bus_phy_resume(), wait for a concurrent phy_interrupt() to complete to ensure that interrupts which it postponed are properly rerun. The issue was exposed by commit 1ce8b37241ed ("usbnet: smsc95xx: Forward PHY interrupts to PHY driver to avoid polling"), but has existed since forever. Fixes: 541cd3ee00a4 ("phylib: Fix deadlock on resume") Link: https://lore.kernel.org/netdev/a5315a8a-32c2-962f-f696-de9a26d30091@samsung.com/ Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Signed-off-by: Lukas Wunner Acked-by: Rafael J. Wysocki Cc: stable@vger.kernel.org # v2.6.33+ Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/b7f386d04e9b5b0e2738f0125743e30676f309ef.1656410895.git.lukas@wunner.de Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 508f1149665b..b09f7d36cff2 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -572,6 +572,10 @@ struct macsec_ops; * @mdix_ctrl: User setting of crossover * @pma_extable: Cached value of PMA/PMD Extended Abilities Register * @interrupts: Flag interrupts have been enabled + * @irq_suspended: Flag indicating PHY is suspended and therefore interrupt + * handling shall be postponed until PHY has resumed + * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended, + * requiring a rerun of the interrupt handler after resume * @interface: enum phy_interface_t value * @skb: Netlink message for cable diagnostics * @nest: Netlink nest used for cable diagnostics @@ -626,6 +630,8 @@ struct phy_device { /* Interrupts are enabled */ unsigned interrupts:1; + unsigned irq_suspended:1; + unsigned irq_rerun:1; enum phy_state state; -- cgit v1.2.3 From 29c1ac230e6056b26846c66881802b581a78ad72 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 30 Jun 2022 13:25:57 +0100 Subject: io_uring: keep sendrecv flags in ioprio We waste a u64 SQE field for flags even though we don't need as many bits and it can be used for something more useful later. Store io_uring specific send/recv flags in sqe->ioprio instead of ->addr2. Signed-off-by: Pavel Begunkov Fixes: 0455d4ccec54 ("io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg") [axboe: change comment in io_uring.h as well] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 53e7dae92e42..f10b59d6693e 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -244,7 +244,7 @@ enum io_uring_op { #define IORING_ASYNC_CANCEL_ANY (1U << 2) /* - * send/sendmsg and recv/recvmsg flags (sqe->addr2) + * send/sendmsg and recv/recvmsg flags (sqe->ioprio) * * IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send * or receive and arm poll if that yields an -- cgit v1.2.3 From 4a557a5d1a6145ea586dc9b17a9b4e5190c9c017 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 30 Jun 2022 09:34:10 -0700 Subject: sparse: introduce conditional lock acquire function attribute The kernel tends to try to avoid conditional locking semantics because it makes it harder to think about and statically check locking rules, but we do have a few fundamental locking primitives that take locks conditionally - most obviously the 'trylock' functions. That has always been a problem for 'sparse' checking for locking imbalance, and we've had a special '__cond_lock()' macro that we've used to let sparse know how the locking works: # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) so that you can then use this to tell sparse that (for example) the spinlock trylock macro ends up acquiring the lock when it succeeds, but not when it fails: #define raw_spin_trylock(lock) __cond_lock(lock, _raw_spin_trylock(lock)) and then sparse can follow along the locking rules when you have code like if (!spin_trylock(&dentry->d_lock)) return LRU_SKIP; .. sparse sees that the lock is held here.. spin_unlock(&dentry->d_lock); and sparse ends up happy about the lock contexts. However, this '__cond_lock()' use does result in very ugly header files, and requires you to basically wrap the real function with that macro that uses '__cond_lock'. Which has made PeterZ NAK things that try to fix sparse warnings over the years [1]. To solve this, there is now a very experimental patch to sparse that basically does the exact same thing as '__cond_lock()' did, but using a function attribute instead. That seems to make PeterZ happy [2]. Note that this does not replace existing use of '__cond_lock()', but only exposes the new proposed attribute and uses it for the previously unannotated 'refcount_dec_and_lock()' family of functions. For existing sparse installations, this will make no difference (a negative output context was ignored), but if you have the experimental sparse patch it will make sparse now understand code that uses those functions, the same way '__cond_lock()' makes sparse understand the very similar 'atomic_dec_and_lock()' uses that have the old '__cond_lock()' annotations. Note that in some cases this will silence existing context imbalance warnings. But in other cases it may end up exposing new sparse warnings for code that sparse just didn't see the locking for at all before. This is a trial, in other words. I'd expect that if it ends up being successful, and new sparse releases end up having this new attribute, we'll migrate the old-style '__cond_lock()' users to use the new-style '__cond_acquires' function attribute. The actual experimental sparse patch was posted in [3]. Link: https://lore.kernel.org/all/20130930134434.GC12926@twins.programming.kicks-ass.net/ [1] Link: https://lore.kernel.org/all/Yr60tWxN4P568x3W@worktop.programming.kicks-ass.net/ [2] Link: https://lore.kernel.org/all/CAHk-=wjZfO9hGqJ2_hGQG3U_XzSh9_XaXze=HgPdvJbgrvASfA@mail.gmail.com/ [3] Acked-by: Peter Zijlstra Cc: Alexander Aring Cc: Luc Van Oostenryck Signed-off-by: Linus Torvalds --- include/linux/compiler_types.h | 2 ++ include/linux/refcount.h | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index d08dfcb0ac68..4f2a819fd60a 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -24,6 +24,7 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } /* context/locking */ # define __must_hold(x) __attribute__((context(x,1,1))) # define __acquires(x) __attribute__((context(x,0,1))) +# define __cond_acquires(x) __attribute__((context(x,0,-1))) # define __releases(x) __attribute__((context(x,1,0))) # define __acquire(x) __context__(x,1) # define __release(x) __context__(x,-1) @@ -50,6 +51,7 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } /* context/locking */ # define __must_hold(x) # define __acquires(x) +# define __cond_acquires(x) # define __releases(x) # define __acquire(x) (void)0 # define __release(x) (void)0 diff --git a/include/linux/refcount.h b/include/linux/refcount.h index b8a6e387f8f9..a62fcca97486 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -361,9 +361,9 @@ static inline void refcount_dec(refcount_t *r) extern __must_check bool refcount_dec_if_one(refcount_t *r); extern __must_check bool refcount_dec_not_one(refcount_t *r); -extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock); -extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock); +extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) __cond_acquires(lock); +extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) __cond_acquires(lock); extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock, - unsigned long *flags); + unsigned long *flags) __cond_acquires(lock); #endif /* _LINUX_REFCOUNT_H */ -- cgit v1.2.3 From b8d5109f50969ead9d49c3e8bd78ec1f82e548e3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Jul 2022 14:40:28 -0700 Subject: lockref: remove unused 'lockref_get_or_lock()' function Looking at the conditional lock acquire functions in the kernel due to the new sparse support (see commit 4a557a5d1a61 "sparse: introduce conditional lock acquire function attribute"), it became obvious that the lockref code has a couple of them, but they don't match the usual naming convention for the other ones, and their return value logic is also reversed. In the other very similar places, the naming pattern is '*_and_lock()' (eg 'atomic_put_and_lock()' and 'refcount_dec_and_lock()'), and the function returns true when the lock is taken. The lockref code is superficially very similar to the refcount code, only with the special "atomic wrt the embedded lock" semantics. But instead of the '*_and_lock()' naming it uses '*_or_lock()'. And instead of returning true in case it took the lock, it returns true if it *didn't* take the lock. Now, arguably the reflock code is quite logical: it really is a "either decrement _or_ lock" kind of situation - and the return value is about whether the operation succeeded without any special care needed. So despite the similarities, the differences do make some sense, and maybe it's not worth trying to unify the different conditional locking primitives in this area. But while looking at this all, it did become obvious that the 'lockref_get_or_lock()' function hasn't actually had any users for almost a decade. The only user it ever had was the shortlived 'd_rcu_to_refcount()' function, and it got removed and replaced with 'lockref_get_not_dead()' back in 2013 in commits 0d98439ea3c6 ("vfs: use lockred 'dead' flag to mark unrecoverably dead dentries") and e5c832d55588 ("vfs: fix dentry RCU to refcounting possibly sleeping dput()") In fact, that single use was removed less than a week after the whole function was introduced in commit b3abd80250c1 ("lockref: add 'lockref_get_or_lock() helper") so this function has been around for a decade, but only had a user for six days. Let's just put this mis-designed and unused function out of its misery. We can think about the naming and semantic oddities of the remaining 'lockref_put_or_lock()' later, but at least that function has users. And while the naming is different and the return value doesn't match, that function matches the whole '{atomic,refcount}_dec_and_test()' pattern much better (ie the magic happens when the count goes down to zero, not when it is incremented from zero). Signed-off-by: Linus Torvalds --- include/linux/lockref.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 99f17cc8e163..c3a1f78bc884 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -38,7 +38,6 @@ extern void lockref_get(struct lockref *); extern int lockref_put_return(struct lockref *); extern int lockref_get_not_zero(struct lockref *); extern int lockref_put_not_zero(struct lockref *); -extern int lockref_get_or_lock(struct lockref *); extern int lockref_put_or_lock(struct lockref *); extern void lockref_mark_dead(struct lockref *); -- cgit v1.2.3