From 3367aa7d74d240261de2543ddb35531ccad9d884 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 11 May 2022 13:30:39 +0200 Subject: fbdev: Restart conflicting fb removal loop when unregistering devices Drivers that want to remove registered conflicting framebuffers prior to register their own framebuffer, call to remove_conflicting_framebuffers(). This function takes the registration_lock mutex, to prevent a race when drivers register framebuffer devices. But if a conflicting framebuffer device is found, the underlaying platform device is unregistered and this will lead to the platform driver .remove callback to be called. Which in turn will call to unregister_framebuffer() that takes the same lock. To prevent this, a struct fb_info.forced_out field was used as indication to unregister_framebuffer() whether the mutex has to be grabbed or not. But this could be unsafe, since the fbdev core is making assumptions about what drivers may or may not do in their .remove callbacks. Allowing to run these callbacks with the registration_lock held can cause deadlocks, since the fbdev core has no control over what drivers do in their removal path. A better solution is to drop the lock before platform_device_unregister(), so unregister_framebuffer() can take it when called from the fbdev driver. The lock is acquired again after the device has been unregistered and at this point the removal loop can be restarted. Since the conflicting framebuffer device has already been removed, the loop would just finish when no more conflicting framebuffers are found. Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220511113039.1252432-1-javierm@redhat.com --- include/linux/fb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 69c67c70fa78..bbe1e4571899 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -511,7 +511,6 @@ struct fb_info { } *apertures; bool skip_vt_switch; /* no VT switch on suspend/resume required */ - bool forced_out; /* set when being removed by another driver */ }; static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { -- cgit v1.2.3 From 93984f19e7bce4c18084a6ef3dacafb155b806ed Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 29 Apr 2022 21:00:23 +0000 Subject: KVM: Fully serialize gfn=>pfn cache refresh via mutex Protect gfn=>pfn cache refresh with a mutex to fully serialize refreshes. The refresh logic doesn't protect against - concurrent unmaps, or refreshes with different GPAs (which may or may not happen in practice, for example if a cache is only used under vcpu->mutex; but it's allowed in the code) - a false negative on the memslot generation. If the first refresh sees a stale memslot generation, it will refresh the hva and generation before moving on to the hva=>pfn translation. If it then drops gpc->lock, a different user of the cache can come along, acquire gpc->lock, see that the memslot generation is fresh, and skip the hva=>pfn update due to the userspace address also matching (because it too was updated). The refresh path can already sleep during hva=>pfn resolution, so wrap the refresh with a mutex to ensure that any given refresh runs to completion before other callers can start their refresh. Cc: stable@vger.kernel.org Cc: Lai Jiangshan Signed-off-by: Sean Christopherson Message-Id: <20220429210025.3293691-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index ac1ebb37a0ff..f328a01db4fe 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -19,6 +19,7 @@ struct kvm_memslots; enum kvm_mr_change; #include +#include #include #include @@ -69,6 +70,7 @@ struct gfn_to_pfn_cache { struct kvm_vcpu *vcpu; struct list_head list; rwlock_t lock; + struct mutex refresh_lock; void *khva; kvm_pfn_t pfn; enum pfn_cache_usage usage; -- cgit v1.2.3 From 01357a5a45ed8eb9543183f5c9c6713ae60fc1f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sun, 24 Apr 2022 16:55:14 +0200 Subject: dma-buf: cleanup dma_fence_unwrap implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the code from the inline functions into exported functions. Signed-off-by: Christian König Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220518135844.3338-3-christian.koenig@amd.com --- include/linux/dma-fence-unwrap.h | 52 ++++------------------------------------ 1 file changed, 4 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h index 77e335a1bcac..e7c219da4ed7 100644 --- a/include/linux/dma-fence-unwrap.h +++ b/include/linux/dma-fence-unwrap.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * fence-chain: chain fences together in a timeline - * * Copyright (C) 2022 Advanced Micro Devices, Inc. * Authors: * Christian König @@ -10,8 +8,7 @@ #ifndef __LINUX_DMA_FENCE_UNWRAP_H #define __LINUX_DMA_FENCE_UNWRAP_H -#include -#include +struct dma_fence; /** * struct dma_fence_unwrap - cursor into the container structure @@ -33,50 +30,9 @@ struct dma_fence_unwrap { unsigned int index; }; -/* Internal helper to start new array iteration, don't use directly */ -static inline struct dma_fence * -__dma_fence_unwrap_array(struct dma_fence_unwrap * cursor) -{ - cursor->array = dma_fence_chain_contained(cursor->chain); - cursor->index = 0; - return dma_fence_array_first(cursor->array); -} - -/** - * dma_fence_unwrap_first - return the first fence from fence containers - * @head: the entrypoint into the containers - * @cursor: current position inside the containers - * - * Unwraps potential dma_fence_chain/dma_fence_array containers and return the - * first fence. - */ -static inline struct dma_fence * -dma_fence_unwrap_first(struct dma_fence *head, struct dma_fence_unwrap *cursor) -{ - cursor->chain = dma_fence_get(head); - return __dma_fence_unwrap_array(cursor); -} - -/** - * dma_fence_unwrap_next - return the next fence from a fence containers - * @cursor: current position inside the containers - * - * Continue unwrapping the dma_fence_chain/dma_fence_array containers and return - * the next fence from them. - */ -static inline struct dma_fence * -dma_fence_unwrap_next(struct dma_fence_unwrap *cursor) -{ - struct dma_fence *tmp; - - ++cursor->index; - tmp = dma_fence_array_next(cursor->array, cursor->index); - if (tmp) - return tmp; - - cursor->chain = dma_fence_chain_walk(cursor->chain); - return __dma_fence_unwrap_array(cursor); -} +struct dma_fence *dma_fence_unwrap_first(struct dma_fence *head, + struct dma_fence_unwrap *cursor); +struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor); /** * dma_fence_unwrap_for_each - iterate over all fences in containers -- cgit v1.2.3 From 8f61973718485f3e89bc4f408f929048b7b47c83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 4 May 2022 13:01:29 +0200 Subject: dma-buf: return only unsignaled fences in dma_fence_unwrap_for_each v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dma_fence_chain containers cleanup signaled fences automatically, so filter those out from arrays as well. v2: fix missing walk over the array v3: massively simplify the patch and actually update the description. Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220518135844.3338-4-christian.koenig@amd.com --- include/linux/dma-fence-unwrap.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h index e7c219da4ed7..a4d342fef8e0 100644 --- a/include/linux/dma-fence-unwrap.h +++ b/include/linux/dma-fence-unwrap.h @@ -43,9 +43,13 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor); * Unwrap dma_fence_chain and dma_fence_array containers and deep dive into all * potential fences in them. If @head is just a normal fence only that one is * returned. + * + * Note that signalled fences are opportunistically filtered out, which + * means the iteration is potentially over no fence at all. */ #define dma_fence_unwrap_for_each(fence, cursor, head) \ for (fence = dma_fence_unwrap_first(head, cursor); fence; \ - fence = dma_fence_unwrap_next(cursor)) + fence = dma_fence_unwrap_next(cursor)) \ + if (!dma_fence_is_signaled(fence)) #endif -- cgit v1.2.3 From 245a4a7b531cffb41233a716497c25b06835cf4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 25 Apr 2022 14:22:12 +0200 Subject: dma-buf: generalize dma_fence unwrap & merging v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a dma_fence_unwrap_merge() macro which allows to unwrap fences which potentially can be containers as well and then merge them back together into a flat dma_fence_array. v2: rename the function, add some more comments about how the wrapper is used, move filtering of signaled fences into the unwrap iterator, add complex selftest which covers more cases. v3: fix signaled fence filtering once more Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220518135844.3338-5-christian.koenig@amd.com --- include/linux/dma-fence-unwrap.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h index a4d342fef8e0..390de1ee9d35 100644 --- a/include/linux/dma-fence-unwrap.h +++ b/include/linux/dma-fence-unwrap.h @@ -52,4 +52,28 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor); fence = dma_fence_unwrap_next(cursor)) \ if (!dma_fence_is_signaled(fence)) +struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, + struct dma_fence **fences, + struct dma_fence_unwrap *cursors); + +/** + * dma_fence_unwrap_merge - unwrap and merge fences + * + * All fences given as parameters are unwrapped and merged back together as flat + * dma_fence_array. Useful if multiple containers need to be merged together. + * + * Implemented as a macro to allocate the necessary arrays on the stack and + * account the stack frame size to the caller. + * + * Returns NULL on memory allocation failure, a dma_fence object representing + * all the given fences otherwise. + */ +#define dma_fence_unwrap_merge(...) \ + ({ \ + struct dma_fence *__f[] = { __VA_ARGS__ }; \ + struct dma_fence_unwrap __c[ARRAY_SIZE(__f)]; \ + \ + __dma_fence_unwrap_merge(ARRAY_SIZE(__f), __f, __c); \ + }) + #endif -- cgit v1.2.3 From d8616ee2affcff37c5d315310da557a694a3303d Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Tue, 24 May 2022 15:53:11 +0800 Subject: bpf, sockmap: Fix sk->sk_forward_alloc warn_on in sk_stream_kill_queues During TCP sockmap redirect pressure test, the following warning is triggered: WARNING: CPU: 3 PID: 2145 at net/core/stream.c:205 sk_stream_kill_queues+0xbc/0xd0 CPU: 3 PID: 2145 Comm: iperf Kdump: loaded Tainted: G W 5.10.0+ #9 Call Trace: inet_csk_destroy_sock+0x55/0x110 inet_csk_listen_stop+0xbb/0x380 tcp_close+0x41b/0x480 inet_release+0x42/0x80 __sock_release+0x3d/0xa0 sock_close+0x11/0x20 __fput+0x9d/0x240 task_work_run+0x62/0x90 exit_to_user_mode_prepare+0x110/0x120 syscall_exit_to_user_mode+0x27/0x190 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The reason we observed is that: When the listener is closing, a connection may have completed the three-way handshake but not accepted, and the client has sent some packets. The child sks in accept queue release by inet_child_forget()->inet_csk_destroy_sock(), but psocks of child sks have not released. To fix, add sock_map_destroy to release psocks. Signed-off-by: Wang Yufen Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Acked-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20220524075311.649153-1-wangyufen@huawei.com --- include/linux/bpf.h | 1 + include/linux/skmsg.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2b914a56a2c5..8e6092d0ea95 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2104,6 +2104,7 @@ int sock_map_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); void sock_map_unhash(struct sock *sk); +void sock_map_destroy(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else static inline int bpf_prog_offload_init(struct bpf_prog *prog, diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c5a2d6f50f25..153b6dec9b6a 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -95,6 +95,7 @@ struct sk_psock { spinlock_t link_lock; refcount_t refcnt; void (*saved_unhash)(struct sock *sk); + void (*saved_destroy)(struct sock *sk); void (*saved_close)(struct sock *sk, long timeout); void (*saved_write_space)(struct sock *sk); void (*saved_data_ready)(struct sock *sk); -- cgit v1.2.3 From 8d5976089c97a4beeeda4de59e2fba9862946893 Mon Sep 17 00:00:00 2001 From: Xiang wangx Date: Mon, 6 Jun 2022 10:23:13 +0800 Subject: platform/chrome: cros_ec_commands: Fix syntax errors in comments Delete the redundant word 'using'. Signed-off-by: Xiang wangx Reviewed-by: Tzung-Bi Shih Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20220606022313.22912-1-wangxiang@cdjrlc.com --- include/linux/platform_data/cros_ec_commands.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 8cfa8cfca77e..e59f51c41a1c 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -787,7 +787,7 @@ struct ec_host_response { * * Packets always start with a request or response header. They are followed * by data_len bytes of data. If the data_crc_present flag is set, the data - * bytes are followed by a CRC-8 of that data, using using x^8 + x^2 + x + 1 + * bytes are followed by a CRC-8 of that data, using x^8 + x^2 + x + 1 * polynomial. * * Host algorithm when sending a request q: -- cgit v1.2.3 From 657f8bd88cb5a968d907fd1c891cee52dc156caa Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 May 2022 13:10:23 +0200 Subject: spi: fix typo in comment Spelling mistake (triple letters) in comment. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall Link: https://lore.kernel.org/r/20220521111145.81697-13-Julia.Lawall@inria.fr Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index d361ba26203b..eadfd3ba3cbc 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -1127,7 +1127,7 @@ spi_max_transfer_size(struct spi_device *spi) if (ctlr->max_transfer_size) tr_max = ctlr->max_transfer_size(spi); - /* transfer size limit must not be greater than messsage size limit */ + /* transfer size limit must not be greater than message size limit */ return min(tr_max, msg_max); } -- cgit v1.2.3 From 6598b91b5ac32bc756d7c3000a31f775d4ead1c4 Mon Sep 17 00:00:00 2001 From: David Jander Date: Tue, 24 May 2022 11:18:08 +0200 Subject: spi: spi.c: Convert statistics to per-cpu u64_stats_t This change gives a dramatic performance improvement in the hot path, since many costly spin_lock_irqsave() calls can be avoided. On an i.MX8MM system with a MCP2518FD CAN controller connected via SPI, the time the driver takes to handle interrupts, or in other words the time the IRQ line of the CAN controller stays low is mainly dominated by the time it takes to do 3 relatively short sync SPI transfers. The effect of this patch is a reduction of this time from 136us down to only 98us. Suggested-by: Andrew Lunn Signed-off-by: David Jander Link: https://lore.kernel.org/r/20220524091808.2269898-1-david@protonic.nl Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 52 +++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index eadfd3ba3cbc..eac8d3caf954 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -17,6 +17,7 @@ #include #include +#include struct dma_chan; struct software_node; @@ -59,37 +60,42 @@ extern struct bus_type spi_bus_type; * maxsize limit */ struct spi_statistics { - spinlock_t lock; /* lock for the whole structure */ + struct u64_stats_sync syncp; - unsigned long messages; - unsigned long transfers; - unsigned long errors; - unsigned long timedout; + u64_stats_t messages; + u64_stats_t transfers; + u64_stats_t errors; + u64_stats_t timedout; - unsigned long spi_sync; - unsigned long spi_sync_immediate; - unsigned long spi_async; + u64_stats_t spi_sync; + u64_stats_t spi_sync_immediate; + u64_stats_t spi_async; - unsigned long long bytes; - unsigned long long bytes_rx; - unsigned long long bytes_tx; + u64_stats_t bytes; + u64_stats_t bytes_rx; + u64_stats_t bytes_tx; #define SPI_STATISTICS_HISTO_SIZE 17 - unsigned long transfer_bytes_histo[SPI_STATISTICS_HISTO_SIZE]; + u64_stats_t transfer_bytes_histo[SPI_STATISTICS_HISTO_SIZE]; - unsigned long transfers_split_maxsize; + u64_stats_t transfers_split_maxsize; }; -#define SPI_STATISTICS_ADD_TO_FIELD(stats, field, count) \ - do { \ - unsigned long flags; \ - spin_lock_irqsave(&(stats)->lock, flags); \ - (stats)->field += count; \ - spin_unlock_irqrestore(&(stats)->lock, flags); \ +#define SPI_STATISTICS_ADD_TO_FIELD(pcpu_stats, field, count) \ + do { \ + struct spi_statistics *__lstats = this_cpu_ptr(pcpu_stats); \ + u64_stats_update_begin(&__lstats->syncp); \ + u64_stats_add(&__lstats->field, count); \ + u64_stats_update_end(&__lstats->syncp); \ } while (0) -#define SPI_STATISTICS_INCREMENT_FIELD(stats, field) \ - SPI_STATISTICS_ADD_TO_FIELD(stats, field, 1) +#define SPI_STATISTICS_INCREMENT_FIELD(pcpu_stats, field) \ + do { \ + struct spi_statistics *__lstats = this_cpu_ptr(pcpu_stats); \ + u64_stats_update_begin(&__lstats->syncp); \ + u64_stats_inc(&__lstats->field); \ + u64_stats_update_end(&__lstats->syncp); \ + } while (0) /** * struct spi_delay - SPI delay information @@ -194,7 +200,7 @@ struct spi_device { struct spi_delay cs_inactive; /* the statistics */ - struct spi_statistics statistics; + struct spi_statistics __percpu *pcpu_statistics; /* * likely need more hooks for more protocol options affecting how @@ -647,7 +653,7 @@ struct spi_controller { s8 max_native_cs; /* statistics */ - struct spi_statistics statistics; + struct spi_statistics __percpu *pcpu_statistics; /* DMA channels for use with core dmaengine helpers */ struct dma_chan *dma_tx; -- cgit v1.2.3 From fc602b4f692cb83c937b5f79628bca32b60c4402 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Sat, 4 Jun 2022 12:32:50 +0100 Subject: mtd: spinand: Add support for ATO25D1GA Add support for the ATO25D1GA SPI NAND flash. Datasheet: - https://atta.szlcsc.com/upload/public/pdf/source/20191212/C469320_04599D67B03B078044EB65FF5AEDDDE9.pdf Signed-off-by: Aidan MacDonald Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220604113250.4745-1-aidanmacdonald.0x0@gmail.com --- include/linux/mtd/spinand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 5584d3bb6556..6d3392a7edc6 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -260,6 +260,7 @@ struct spinand_manufacturer { }; /* SPI NAND manufacturers */ +extern const struct spinand_manufacturer ato_spinand_manufacturer; extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; extern const struct spinand_manufacturer macronix_spinand_manufacturer; extern const struct spinand_manufacturer micron_spinand_manufacturer; -- cgit v1.2.3 From 39d649602be2ceb3f8f8e98483d09e4d71133c6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 1 Jun 2022 10:17:58 +0200 Subject: of: constify of_property_check_flags() prop argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This argument is not modified and thus can be set as const. Signed-off-by: Clément Léger Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220601081801.348571-2-clement.leger@bootlin.com --- include/linux/of.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index f0a5d6b10c5a..d74fd82a6963 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -207,7 +207,7 @@ static inline void of_node_clear_flag(struct device_node *n, unsigned long flag) } #if defined(CONFIG_OF_DYNAMIC) || defined(CONFIG_SPARC) -static inline int of_property_check_flag(struct property *p, unsigned long flag) +static inline int of_property_check_flag(const struct property *p, unsigned long flag) { return test_bit(flag, &p->_flags); } @@ -814,7 +814,8 @@ static inline void of_node_clear_flag(struct device_node *n, unsigned long flag) { } -static inline int of_property_check_flag(struct property *p, unsigned long flag) +static inline int of_property_check_flag(const struct property *p, + unsigned long flag) { return 0; } -- cgit v1.2.3 From ff8372a467fa28752610f57a6512c5365413faa2 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 6 Jun 2022 10:24:34 +0800 Subject: net: skb: move enum skb_drop_reason to standalone header file As the skb drop reasons are getting more and more, move the enum 'skb_drop_reason' and related function to the standalone header 'dropreason.h', as Jakub Kicinski suggested. Signed-off-by: Menglong Dong Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 179 +------------------------------------------------ 1 file changed, 1 insertion(+), 178 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d3d10556f0fa..82edf0359ab3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -43,6 +43,7 @@ #include #endif #include +#include /** * DOC: skb checksums @@ -337,184 +338,6 @@ struct sk_buff_head { struct sk_buff; -/* The reason of skb drop, which is used in kfree_skb_reason(). - * en...maybe they should be splited by group? - * - * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is - * used to translate the reason to string. - */ -enum skb_drop_reason { - SKB_NOT_DROPPED_YET = 0, - SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */ - SKB_DROP_REASON_NO_SOCKET, /* socket not found */ - SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */ - SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ - SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ - SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ - SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ - SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current - * host (interface is in promisc - * mode) - */ - SKB_DROP_REASON_IP_CSUM, /* IP checksum error */ - SKB_DROP_REASON_IP_INHDR, /* there is something wrong with - * IP header (see - * IPSTATS_MIB_INHDRERRORS) - */ - SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed. - * see the document for rp_filter - * in ip-sysctl.rst for more - * information - */ - SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2 - * is multicast, but L3 is - * unicast. - */ - SKB_DROP_REASON_XFRM_POLICY, /* xfrm policy check failed */ - SKB_DROP_REASON_IP_NOPROTO, /* no support for IP protocol */ - SKB_DROP_REASON_SOCKET_RCVBUFF, /* socket receive buff is full */ - SKB_DROP_REASON_PROTO_MEM, /* proto memory limition, such as - * udp packet drop out of - * udp_memory_allocated. - */ - SKB_DROP_REASON_TCP_MD5NOTFOUND, /* no MD5 hash and one - * expected, corresponding - * to LINUX_MIB_TCPMD5NOTFOUND - */ - SKB_DROP_REASON_TCP_MD5UNEXPECTED, /* MD5 hash and we're not - * expecting one, corresponding - * to LINUX_MIB_TCPMD5UNEXPECTED - */ - SKB_DROP_REASON_TCP_MD5FAILURE, /* MD5 hash and its wrong, - * corresponding to - * LINUX_MIB_TCPMD5FAILURE - */ - SKB_DROP_REASON_SOCKET_BACKLOG, /* failed to add skb to socket - * backlog (see - * LINUX_MIB_TCPBACKLOGDROP) - */ - SKB_DROP_REASON_TCP_FLAGS, /* TCP flags invalid */ - SKB_DROP_REASON_TCP_ZEROWINDOW, /* TCP receive window size is zero, - * see LINUX_MIB_TCPZEROWINDOWDROP - */ - SKB_DROP_REASON_TCP_OLD_DATA, /* the TCP data reveived is already - * received before (spurious retrans - * may happened), see - * LINUX_MIB_DELAYEDACKLOST - */ - SKB_DROP_REASON_TCP_OVERWINDOW, /* the TCP data is out of window, - * the seq of the first byte exceed - * the right edges of receive - * window - */ - SKB_DROP_REASON_TCP_OFOMERGE, /* the data of skb is already in - * the ofo queue, corresponding to - * LINUX_MIB_TCPOFOMERGE - */ - SKB_DROP_REASON_TCP_RFC7323_PAWS, /* PAWS check, corresponding to - * LINUX_MIB_PAWSESTABREJECTED - */ - SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */ - SKB_DROP_REASON_TCP_RESET, /* Invalid RST packet */ - SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */ - SKB_DROP_REASON_TCP_CLOSE, /* TCP socket in CLOSE state */ - SKB_DROP_REASON_TCP_FASTOPEN, /* dropped by FASTOPEN request socket */ - SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */ - SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */ - SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */ - SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */ - SKB_DROP_REASON_TCP_OFO_DROP, /* data already in receive queue */ - SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ - SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by - * BPF_PROG_TYPE_CGROUP_SKB - * eBPF program - */ - SKB_DROP_REASON_IPV6DISABLED, /* IPv6 is disabled on the device */ - SKB_DROP_REASON_NEIGH_CREATEFAIL, /* failed to create neigh - * entry - */ - SKB_DROP_REASON_NEIGH_FAILED, /* neigh entry in failed state */ - SKB_DROP_REASON_NEIGH_QUEUEFULL, /* arp_queue for neigh - * entry is full - */ - SKB_DROP_REASON_NEIGH_DEAD, /* neigh entry is dead */ - SKB_DROP_REASON_TC_EGRESS, /* dropped in TC egress HOOK */ - SKB_DROP_REASON_QDISC_DROP, /* dropped by qdisc when packet - * outputting (failed to enqueue to - * current qdisc) - */ - SKB_DROP_REASON_CPU_BACKLOG, /* failed to enqueue the skb to - * the per CPU backlog queue. This - * can be caused by backlog queue - * full (see netdev_max_backlog in - * net.rst) or RPS flow limit - */ - SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ - SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ - SKB_DROP_REASON_UNHANDLED_PROTO, /* protocol not implemented - * or not supported - */ - SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation - * error - */ - SKB_DROP_REASON_SKB_GSO_SEG, /* gso segmentation error */ - SKB_DROP_REASON_SKB_UCOPY_FAULT, /* failed to copy data from - * user space, e.g., via - * zerocopy_sg_from_iter() - * or skb_orphan_frags_rx() - */ - SKB_DROP_REASON_DEV_HDR, /* device driver specific - * header/metadata is invalid - */ - /* the device is not ready to xmit/recv due to any of its data - * structure that is not up/ready/initialized, e.g., the IFF_UP is - * not set, or driver specific tun->tfiles[txq] is not initialized - */ - SKB_DROP_REASON_DEV_READY, - SKB_DROP_REASON_FULL_RING, /* ring buffer is full */ - SKB_DROP_REASON_NOMEM, /* error due to OOM */ - SKB_DROP_REASON_HDR_TRUNC, /* failed to trunc/extract the header - * from networking data, e.g., failed - * to pull the protocol header from - * frags via pskb_may_pull() - */ - SKB_DROP_REASON_TAP_FILTER, /* dropped by (ebpf) filter directly - * attached to tun/tap, e.g., via - * TUNSETFILTEREBPF - */ - SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented - * at tun/tap, e.g., check_filter() - */ - SKB_DROP_REASON_ICMP_CSUM, /* ICMP checksum error */ - SKB_DROP_REASON_INVALID_PROTO, /* the packet doesn't follow RFC - * 2211, such as a broadcasts - * ICMP_TIMESTAMP - */ - SKB_DROP_REASON_IP_INADDRERRORS, /* host unreachable, corresponding - * to IPSTATS_MIB_INADDRERRORS - */ - SKB_DROP_REASON_IP_INNOROUTES, /* network unreachable, corresponding - * to IPSTATS_MIB_INADDRERRORS - */ - SKB_DROP_REASON_PKT_TOO_BIG, /* packet size is too big (maybe exceed - * the MTU) - */ - SKB_DROP_REASON_MAX, -}; - -#define SKB_DR_INIT(name, reason) \ - enum skb_drop_reason name = SKB_DROP_REASON_##reason -#define SKB_DR(name) \ - SKB_DR_INIT(name, NOT_SPECIFIED) -#define SKB_DR_SET(name, reason) \ - (name = SKB_DROP_REASON_##reason) -#define SKB_DR_OR(name, reason) \ - do { \ - if (name == SKB_DROP_REASON_NOT_SPECIFIED || \ - name == SKB_NOT_DROPPED_YET) \ - SKB_DR_SET(name, reason); \ - } while (0) - /* To allow 64K frame to be packed as single skb without frag_list we * require 64K/PAGE_SIZE pages plus 1 additional page to allow for * buffers which do not start on a page boundary. -- cgit v1.2.3 From 5f69a6577bc33d8f6d6bbe02bccdeb357b287f56 Mon Sep 17 00:00:00 2001 From: Chen Wandun Date: Thu, 26 May 2022 20:26:56 +0800 Subject: psi: dont alloc memory for psi by default Memory about struct psi_group is allocated by default for each cgroup even if psi_disabled is true, in this case, these allocated memory is waste, so alloc memory for struct psi_group only when psi_disabled is false. Signed-off-by: Chen Wandun Acked-by: Johannes Weiner Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- include/linux/cgroup.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1bfcfb1af352..672de25e3ec8 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -475,7 +475,7 @@ struct cgroup { struct work_struct release_agent_work; /* used to track pressure stalls */ - struct psi_group psi; + struct psi_group *psi; /* used to store eBPF programs */ struct cgroup_bpf bpf; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0d1ada8968d7..ed53bfe7c46c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -674,7 +674,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) { - return &cgrp->psi; + return cgrp->psi; } bool cgroup_psi_enabled(void); -- cgit v1.2.3 From 6089fb325cf737eeb2c4d236c94697112ca860da Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 6 Jun 2022 23:26:00 -0700 Subject: bpf: Add btf enum64 support Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM. But in kernel, some enum indeed has 64bit values, e.g., in uapi bpf.h, we have enum { BPF_F_INDEX_MASK = 0xffffffffULL, BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK, BPF_F_CTXLEN_MASK = (0xfffffULL << 32), }; In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK as 0, which certainly is incorrect. This patch added a new btf kind, BTF_KIND_ENUM64, which permits 64bit value to cover the above use case. The BTF_KIND_ENUM64 has the following three fields followed by the common type: struct bpf_enum64 { __u32 nume_off; __u32 val_lo32; __u32 val_hi32; }; Currently, btf type section has an alignment of 4 as all element types are u32. Representing the value with __u64 will introduce a pad for bpf_enum64 and may also introduce misalignment for the 64bit value. Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues. The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64 to indicate whether the value is signed or unsigned. The kflag intends to provide consistent output of BTF C fortmat with the original source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff. The format C has two choices, printing out 0xffffffff or -1 and current libbpf prints out as unsigned value. But if the signedness is preserved in btf, the value can be printed the same as the original source code. The kflag value 0 means unsigned values, which is consistent to the default by libbpf and should also cover most cases as well. The new BTF_KIND_ENUM64 is intended to support the enum value represented as 64bit value. But it can represent all BTF_KIND_ENUM values as well. The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has to be represented with 64 bits. In addition, a static inline function btf_kind_core_compat() is introduced which will be used later when libbpf relo_core.c changed. Here the kernel shares the same relo_core.c with libbpf. [1] https://reviews.llvm.org/D124641 Acked-by: Andrii Nakryiko Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index 2611cea2c2b6..1bfed7fa0428 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -177,6 +177,19 @@ static inline bool btf_type_is_enum(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; } +static inline bool btf_is_any_enum(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM || + BTF_INFO_KIND(t->info) == BTF_KIND_ENUM64; +} + +static inline bool btf_kind_core_compat(const struct btf_type *t1, + const struct btf_type *t2) +{ + return BTF_INFO_KIND(t1->info) == BTF_INFO_KIND(t2->info) || + (btf_is_any_enum(t1) && btf_is_any_enum(t2)); +} + static inline bool str_is_empty(const char *s) { return !s || !s[0]; @@ -192,6 +205,16 @@ static inline bool btf_is_enum(const struct btf_type *t) return btf_kind(t) == BTF_KIND_ENUM; } +static inline bool btf_is_enum64(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_ENUM64; +} + +static inline u64 btf_enum64_value(const struct btf_enum64 *e) +{ + return ((u64)e->val_hi32 << 32) | e->val_lo32; +} + static inline bool btf_is_composite(const struct btf_type *t) { u16 kind = btf_kind(t); @@ -332,6 +355,11 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t) return (struct btf_enum *)(t + 1); } +static inline struct btf_enum64 *btf_enum64(const struct btf_type *t) +{ + return (struct btf_enum64 *)(t + 1); +} + static inline const struct btf_var_secinfo *btf_type_var_secinfo( const struct btf_type *t) { -- cgit v1.2.3 From 0e3c3b901c00364198d31482fa2552ccf2d5c899 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Jun 2022 18:42:59 -0400 Subject: No need of likely/unlikely on calls of check_copy_size() it's inline and unlikely() inside of it (including the implicit one in WARN_ON_ONCE()) suffice to convince the compiler that getting false from check_copy_size() is unlikely. Spotted-by: Jens Axboe Reviewed-by: Christoph Hellwig Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- include/linux/uaccess.h | 4 ++-- include/linux/uio.h | 15 ++++++--------- 2 files changed, 8 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 5a328cf02b75..47e5d374c7eb 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -148,7 +148,7 @@ _copy_to_user(void __user *, const void *, unsigned long); static __always_inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - if (likely(check_copy_size(to, n, false))) + if (check_copy_size(to, n, false)) n = _copy_from_user(to, from, n); return n; } @@ -156,7 +156,7 @@ copy_from_user(void *to, const void __user *from, unsigned long n) static __always_inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { - if (likely(check_copy_size(from, n, true))) + if (check_copy_size(from, n, true)) n = _copy_to_user(to, from, n); return n; } diff --git a/include/linux/uio.h b/include/linux/uio.h index 739285fe5a2f..76d305f3d4c2 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -156,19 +156,17 @@ static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, static __always_inline __must_check size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, true))) - return 0; - else + if (check_copy_size(addr, bytes, true)) return _copy_to_iter(addr, bytes, i); + return 0; } static __always_inline __must_check size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, false))) - return 0; - else + if (check_copy_size(addr, bytes, false)) return _copy_from_iter(addr, bytes, i); + return 0; } static __always_inline __must_check @@ -184,10 +182,9 @@ bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) static __always_inline __must_check size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, false))) - return 0; - else + if (check_copy_size(addr, bytes, false)) return _copy_from_iter_nocache(addr, bytes, i); + return 0; } static __always_inline __must_check -- cgit v1.2.3 From b1d288d9c3c5ca28df062214656a59cf7ee370e0 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Mon, 6 Jun 2022 20:18:03 +0000 Subject: platform/chrome: cros_ec_proto: Rename cros_ec_command function cros_ec_command() is the name of a function as well as a struct, as such it can confuse indexing tools (like ctags). Avoid this by renaming it to cros_ec_cmd(). Update all the callsites to use the new name. This patch is a find-and-replace, so should not introduce any functional changes. Suggested-by: Stephen Boyd Signed-off-by: Prashant Malani Acked-by: Lee Jones Reviewed-by: Stephen Boyd Reviewed-by: Guenter Roeck Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20220606201825.763788-3-pmalani@chromium.org --- include/linux/platform_data/cros_ec_proto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 138fd912c808..816da4eef3e5 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -231,7 +231,7 @@ bool cros_ec_check_features(struct cros_ec_dev *ec, int feature); int cros_ec_get_sensor_count(struct cros_ec_dev *ec); -int cros_ec_command(struct cros_ec_device *ec_dev, unsigned int version, int command, void *outdata, +int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, void *outdata, int outsize, void *indata, int insize); /** -- cgit v1.2.3 From f87e15fbf6d8cdb51f953338d41a4a52ad1aca14 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Mon, 6 Jun 2022 20:18:05 +0000 Subject: platform/chrome: cros_ec_proto: Update size arg types cros_ec_cmd() takes 2 size arguments. Update them to be of the more appropriate type size_t. Suggested-by: Stephen Boyd Signed-off-by: Prashant Malani Reviewed-by: Stephen Boyd Reviewed-by: Guenter Roeck Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20220606201825.763788-4-pmalani@chromium.org --- include/linux/platform_data/cros_ec_proto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 816da4eef3e5..85e29300f63d 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -232,7 +232,7 @@ bool cros_ec_check_features(struct cros_ec_dev *ec, int feature); int cros_ec_get_sensor_count(struct cros_ec_dev *ec); int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, void *outdata, - int outsize, void *indata, int insize); + size_t outsize, void *indata, size_t insize); /** * cros_ec_get_time_ns() - Return time in ns. -- cgit v1.2.3 From 5dfac65b621733e69b789150a0a3f1bf2f9095a3 Mon Sep 17 00:00:00 2001 From: David Jander Date: Wed, 8 Jun 2022 17:33:09 +0200 Subject: spi: : Add missing documentation for struct members Fixes these "make htmldocs" warnings: include/linux/spi/spi.h:82: warning: Function parameter or member 'syncp' not described in 'spi_statistics' include/linux/spi/spi.h:213: warning: Function parameter or member 'pcpu_statistics' not described in 'spi_device' include/linux/spi/spi.h:676: warning: Function parameter or member 'pcpu_statistics' not described in 'spi_controller' Fixes: 6598b91b5ac3 ("spi: spi.c: Convert statistics to per-cpu u64_stats_t") Reported-by: Stephen Rothwell Signed-off-by: David Jander Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220608153309.2899565-1-david@protonic.nl Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index eac8d3caf954..2e63b4935deb 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -35,7 +35,8 @@ extern struct bus_type spi_bus_type; /** * struct spi_statistics - statistics for spi transfers - * @lock: lock protecting this structure + * @syncp: seqcount to protect members in this struct for per-cpu udate + * on 32-bit systems * * @messages: number of spi-messages handled * @transfers: number of spi_transfers handled @@ -155,7 +156,7 @@ extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer); * @cs_inactive: delay to be introduced by the controller after CS is * deasserted. If @cs_change_delay is used from @spi_transfer, then the * two delays will be added up. - * @statistics: statistics for the spi_device + * @pcpu_statistics: statistics for the spi_device * * A @spi_device is used to interchange data between an SPI slave * (usually a discrete chip) and CPU memory. @@ -439,7 +440,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @max_native_cs: When cs_gpiods is used, and this field is filled in, * spi_register_controller() will validate all native CS (including the * unused native CS) against this value. - * @statistics: statistics for the spi_controller + * @pcpu_statistics: statistics for the spi_controller * @dma_tx: DMA transmit channel * @dma_rx: DMA receive channel * @dummy_rx: dummy receive buffer for full-duplex devices -- cgit v1.2.3 From 0c90466a7985d39355f743e9cd2139da3e86c4d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 3 Jun 2022 23:07:45 +0200 Subject: mtd: hyperbus: Make hyperbus_unregister_device() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only thing that could theoretically fail in that function is mtd_device_unregister(). However it's not supposed to fail and when used correctly it doesn't. So wail loudly if it does anyhow. This matches how other drivers (e.g. nand/raw/nandsim.c) use mtd_device_unregister(). This is a preparation for making platform remove callbacks return void. Signed-off-by: Uwe Kleine-König Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220603210758.148493-2-u.kleine-koenig@pengutronix.de --- include/linux/mtd/hyperbus.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/hyperbus.h b/include/linux/mtd/hyperbus.h index 0ce612428aea..bb6b7121a542 100644 --- a/include/linux/mtd/hyperbus.h +++ b/include/linux/mtd/hyperbus.h @@ -89,9 +89,7 @@ int hyperbus_register_device(struct hyperbus_device *hbdev); /** * hyperbus_unregister_device - deregister HyperBus slave memory device * @hbdev: hyperbus_device to be unregistered - * - * Return: 0 for success, others for failure. */ -int hyperbus_unregister_device(struct hyperbus_device *hbdev); +void hyperbus_unregister_device(struct hyperbus_device *hbdev); #endif /* __LINUX_MTD_HYPERBUS_H__ */ -- cgit v1.2.3 From 0949ee75da6c918fcbd567e1bfa4943a56ab4e5d Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 7 Jun 2022 20:23:34 +0200 Subject: firmware: sysfb: Make sysfb_create_simplefb() return a pdev pointer This function just returned 0 on success or an errno code on error, but it could be useful for sysfb_init() callers to have a pointer to the device. Signed-off-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220607182338.344270-2-javierm@redhat.com --- include/linux/sysfb.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index b0dcfa26d07b..708152e9037b 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -72,8 +72,8 @@ static inline void sysfb_apply_efi_quirks(struct platform_device *pd) bool sysfb_parse_mode(const struct screen_info *si, struct simplefb_platform_data *mode); -int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode); +struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode); #else /* CONFIG_SYSFB_SIMPLE */ @@ -83,10 +83,10 @@ static inline bool sysfb_parse_mode(const struct screen_info *si, return false; } -static inline int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) +static inline struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode) { - return -EINVAL; + return ERR_PTR(-EINVAL); } #endif /* CONFIG_SYSFB_SIMPLE */ -- cgit v1.2.3 From bc824922b264aff40eba8c160972ee07a95e7dd4 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 7 Jun 2022 20:23:35 +0200 Subject: firmware: sysfb: Add sysfb_disable() helper function This can be used by subsystems to unregister a platform device registered by sysfb and also to disable future platform device registration in sysfb. Suggested-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220607182338.344270-3-javierm@redhat.com --- include/linux/sysfb.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index 708152e9037b..8ba8b5be5567 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -55,6 +55,18 @@ struct efifb_dmi_info { int flags; }; +#ifdef CONFIG_SYSFB + +void sysfb_disable(void); + +#else /* CONFIG_SYSFB */ + +static inline void sysfb_disable(void) +{ +} + +#endif /* CONFIG_SYSFB */ + #ifdef CONFIG_EFI extern struct efifb_dmi_info efifb_dmi_list[]; -- cgit v1.2.3 From ea7f0f777d28db6e500a05836f2a9d467c7012de Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Thu, 9 Jun 2022 08:49:37 +0000 Subject: platform/chrome: cros_ec_commands: fix compile errors Fix compile errors when including cros_ec_commands.h solely. 1. cros_ec_commands.h:587:9: error: unknown type name 'uint8_t' 587 | uint8_t flags; | ^~~~~~~ 2. cros_ec_commands.h:1105:43: error: implicit declaration of function 'BIT' 1105 | EC_COMMS_STATUS_PROCESSING = BIT(0), | ^~~ Reviewed-by: Guenter Roeck Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20220609084957.3684698-2-tzungbi@kernel.org --- include/linux/platform_data/cros_ec_commands.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index e59f51c41a1c..f13568b3e247 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -13,8 +13,8 @@ #ifndef __CROS_EC_COMMANDS_H #define __CROS_EC_COMMANDS_H - - +#include +#include #define BUILD_ASSERT(_cond) -- cgit v1.2.3 From 3db0c9e5de7bd9dbe52580eb9752b2b3049e38da Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Thu, 9 Jun 2022 08:49:39 +0000 Subject: platform/chrome: use macros for passthru indexes Move passthru indexes for EC and PD devices to common header. Also use them instead of literal constants. Reviewed-by: Guenter Roeck Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20220609084957.3684698-4-tzungbi@kernel.org --- include/linux/platform_data/cros_ec_proto.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 85e29300f63d..c82a8285d936 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -21,6 +21,9 @@ #define CROS_EC_DEV_SCP_NAME "cros_scp" #define CROS_EC_DEV_TP_NAME "cros_tp" +#define CROS_EC_DEV_EC_INDEX 0 +#define CROS_EC_DEV_PD_INDEX 1 + /* * The EC is unresponsive for a time after a reboot command. Add a * simple delay to make sure that the bus stays locked. -- cgit v1.2.3 From d62607c3fe45911b2331fac073355a8c914bbde2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jun 2022 21:39:55 -0700 Subject: net: rename reference+tracking helpers Netdev reference helpers have a dev_ prefix for historic reasons. Renaming the old helpers would be too much churn but we can rename the tracking ones which are relatively recent and should be the default for new code. Rename: dev_hold_track() -> netdev_hold() dev_put_track() -> netdev_put() dev_replace_track() -> netdev_ref_replace() Link: https://lore.kernel.org/r/20220608043955.919359-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f615a66c89e9..e2e5088888b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3981,8 +3981,8 @@ static inline void netdev_tracker_free(struct net_device *dev, #endif } -static inline void dev_hold_track(struct net_device *dev, - netdevice_tracker *tracker, gfp_t gfp) +static inline void netdev_hold(struct net_device *dev, + netdevice_tracker *tracker, gfp_t gfp) { if (dev) { __dev_hold(dev); @@ -3990,8 +3990,8 @@ static inline void dev_hold_track(struct net_device *dev, } } -static inline void dev_put_track(struct net_device *dev, - netdevice_tracker *tracker) +static inline void netdev_put(struct net_device *dev, + netdevice_tracker *tracker) { if (dev) { netdev_tracker_free(dev, tracker); @@ -4004,11 +4004,11 @@ static inline void dev_put_track(struct net_device *dev, * @dev: network device * * Hold reference to device to keep it from being freed. - * Try using dev_hold_track() instead. + * Try using netdev_hold() instead. */ static inline void dev_hold(struct net_device *dev) { - dev_hold_track(dev, NULL, GFP_ATOMIC); + netdev_hold(dev, NULL, GFP_ATOMIC); } /** @@ -4016,17 +4016,17 @@ static inline void dev_hold(struct net_device *dev) * @dev: network device * * Release reference to device to allow it to be freed. - * Try using dev_put_track() instead. + * Try using netdev_put() instead. */ static inline void dev_put(struct net_device *dev) { - dev_put_track(dev, NULL); + netdev_put(dev, NULL); } -static inline void dev_replace_track(struct net_device *odev, - struct net_device *ndev, - netdevice_tracker *tracker, - gfp_t gfp) +static inline void netdev_ref_replace(struct net_device *odev, + struct net_device *ndev, + netdevice_tracker *tracker, + gfp_t gfp) { if (odev) netdev_tracker_free(odev, tracker); -- cgit v1.2.3 From 09cca53c1656960c38c04bb12da30f61952ca660 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 08:46:32 -0700 Subject: vlan: adopt u64_stats_t As explained in commit 316580b69d0a ("u64_stats: provide u64_stats_t type") we should use u64_stats_t and related accessors to avoid load/store tearing. Add READ_ONCE() when reading rx_errors & tx_dropped. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/if_macvlan.h | 6 +++--- include/linux/if_vlan.h | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index b42294739063..523025106a64 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -46,10 +46,10 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, pcpu_stats = get_cpu_ptr(vlan->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); - pcpu_stats->rx_packets++; - pcpu_stats->rx_bytes += len; + u64_stats_inc(&pcpu_stats->rx_packets); + u64_stats_add(&pcpu_stats->rx_bytes, len); if (multicast) - pcpu_stats->rx_multicast++; + u64_stats_inc(&pcpu_stats->rx_multicast); u64_stats_update_end(&pcpu_stats->syncp); put_cpu_ptr(vlan->pcpu_stats); } else { diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 2be4dd7e90a9..e00c4ee81ff7 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -118,11 +118,11 @@ static inline void vlan_drop_rx_stag_filter_info(struct net_device *dev) * @tx_dropped: number of tx drops */ struct vlan_pcpu_stats { - u64 rx_packets; - u64 rx_bytes; - u64 rx_multicast; - u64 tx_packets; - u64 tx_bytes; + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_multicast; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; struct u64_stats_sync syncp; u32 rx_errors; u32 tx_dropped; -- cgit v1.2.3 From 9962acefbcb92736c268aafe5f52200948f60f3e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 08:46:37 -0700 Subject: net: adopt u64_stats_t in struct pcpu_sw_netstats As explained in commit 316580b69d0a ("u64_stats: provide u64_stats_t type") we should use u64_stats_t and related accessors to avoid load/store tearing. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e2e5088888b1..89afa4f7747d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2636,10 +2636,10 @@ struct packet_offload { /* often modified stats are per-CPU, other are shared (netdev->stats) */ struct pcpu_sw_netstats { - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; struct u64_stats_sync syncp; } __aligned(4 * sizeof(u64)); @@ -2656,8 +2656,8 @@ static inline void dev_sw_netstats_rx_add(struct net_device *dev, unsigned int l struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->rx_bytes += len; - tstats->rx_packets++; + u64_stats_add(&tstats->rx_bytes, len); + u64_stats_inc(&tstats->rx_packets); u64_stats_update_end(&tstats->syncp); } @@ -2668,8 +2668,8 @@ static inline void dev_sw_netstats_tx_add(struct net_device *dev, struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += len; - tstats->tx_packets += packets; + u64_stats_add(&tstats->tx_bytes, len); + u64_stats_add(&tstats->tx_packets, packets); u64_stats_update_end(&tstats->syncp); } -- cgit v1.2.3 From 9ec321aba2eaca109139a2a67c65ede7f07bd8ec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 08:46:40 -0700 Subject: team: adopt u64_stats_t As explained in commit 316580b69d0a ("u64_stats: provide u64_stats_t type") we should use u64_stats_t and related accessors to avoid load/store tearing. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/if_team.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_team.h b/include/linux/if_team.h index add607943c95..fc985e5c739d 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -12,11 +12,11 @@ #include struct team_pcpu_stats { - u64 rx_packets; - u64 rx_bytes; - u64 rx_multicast; - u64 tx_packets; - u64 tx_bytes; + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_multicast; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; struct u64_stats_sync syncp; u32 rx_dropped; u32 tx_dropped; -- cgit v1.2.3 From 21ab562c1f65e583b5d89081acaf096805522482 Mon Sep 17 00:00:00 2001 From: Po Hao Huang Date: Wed, 8 Jun 2022 19:32:22 +0800 Subject: ieee80211: add trigger frame definition Define trigger stype of control frame, and its checking function, struct and trigger type within common_info of trigger. Signed-off-by: Po Hao Huang Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220608113224.11193-2-pkshih@realtek.com --- include/linux/ieee80211.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 75d40acb60c1..5c65ae6b8154 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -76,6 +76,7 @@ #define IEEE80211_STYPE_ACTION 0x00D0 /* control */ +#define IEEE80211_STYPE_TRIGGER 0x0020 #define IEEE80211_STYPE_CTL_EXT 0x0060 #define IEEE80211_STYPE_BACK_REQ 0x0080 #define IEEE80211_STYPE_BACK 0x0090 @@ -295,6 +296,17 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) #define IEEE80211_HT_CTL_LEN 4 +/* trigger type within common_info of trigger frame */ +#define IEEE80211_TRIGGER_TYPE_MASK 0xf +#define IEEE80211_TRIGGER_TYPE_BASIC 0x0 +#define IEEE80211_TRIGGER_TYPE_BFRP 0x1 +#define IEEE80211_TRIGGER_TYPE_MU_BAR 0x2 +#define IEEE80211_TRIGGER_TYPE_MU_RTS 0x3 +#define IEEE80211_TRIGGER_TYPE_BSRP 0x4 +#define IEEE80211_TRIGGER_TYPE_GCR_MU_BAR 0x5 +#define IEEE80211_TRIGGER_TYPE_BQRP 0x6 +#define IEEE80211_TRIGGER_TYPE_NFRP 0x7 + struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; @@ -324,6 +336,15 @@ struct ieee80211_qos_hdr { __le16 qos_ctrl; } __packed __aligned(2); +struct ieee80211_trigger { + __le16 frame_control; + __le16 duration; + u8 ra[ETH_ALEN]; + u8 ta[ETH_ALEN]; + __le64 common_info; + u8 variable[]; +} __packed __aligned(2); + /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set * @fc: frame control bytes in little-endian byteorder @@ -729,6 +750,16 @@ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); } +/** + * ieee80211_is_trigger - check if frame is trigger frame + * @fc: frame control field in little-endian byteorder + */ +static inline bool ieee80211_is_trigger(__le16 fc) +{ + return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == + cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_TRIGGER); +} + /** * ieee80211_is_any_nullfunc - check if frame is regular or QoS nullfunc frame * @fc: frame control bytes in little-endian byteorder -- cgit v1.2.3 From e3fa404a261bb8b7795b62be1b9af7b7dc1030f6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 28 May 2022 16:59:17 +0200 Subject: USB: Follow-up to SPDX identifiers addition - remove now useless comments All these files have been updated in the commit given in the Fixes: tag below. When the SPDX-License-Identifier: has been added, the corresponding text at the beginning of the files has not been deleted. All these texts are about GPL-2.0, with different variation in the wording. Remove these now useless lines to save some LoC. Fixes: 5fd54ace4721 ("USB: add SPDX identifiers to all remaining files in drivers/usb/") Signed-off-by: Christophe JAILLET Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/audio-v2.h | 3 --- include/linux/usb/audio.h | 3 --- include/linux/usb/cdc-wdm.h | 4 ---- include/linux/usb/cdc.h | 4 ---- include/linux/usb/gadget.h | 2 -- include/linux/usb/input.h | 4 ---- include/linux/usb/isp1301.h | 10 ---------- include/linux/usb/m66592.h | 14 -------------- include/linux/usb/of.h | 2 -- include/linux/usb/r8a66597.h | 14 -------------- include/linux/usb/serial.h | 5 ----- include/linux/usb/storage.h | 2 -- include/linux/usb/tegra_usb_phy.h | 10 ---------- include/linux/usb/ulpi.h | 4 ---- include/linux/usb/xhci-dbgp.h | 4 ---- 15 files changed, 85 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h index 8fc2abd7aecb..ca796dc1a984 100644 --- a/include/linux/usb/audio-v2.h +++ b/include/linux/usb/audio-v2.h @@ -2,9 +2,6 @@ /* * Copyright (c) 2010 Daniel Mack * - * This software is distributed under the terms of the GNU General Public - * License ("GPL") version 2, as published by the Free Software Foundation. - * * This file holds USB constants and structures defined * by the USB Device Class Definition for Audio Devices in version 2.0. * Comments below reference relevant sections of the documents contained diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h index 170acd500ea1..0747b24a1a7c 100644 --- a/include/linux/usb/audio.h +++ b/include/linux/usb/audio.h @@ -6,9 +6,6 @@ * Developed for Thumtronics by Grey Innovation * Ben Williamson * - * This software is distributed under the terms of the GNU General Public - * License ("GPL") version 2, as published by the Free Software Foundation. - * * This file holds USB constants and structures defined * by the USB Device Class Definition for Audio Devices. * Comments below reference relevant sections of that document: diff --git a/include/linux/usb/cdc-wdm.h b/include/linux/usb/cdc-wdm.h index 9f5a51f79ba5..85417f00a89a 100644 --- a/include/linux/usb/cdc-wdm.h +++ b/include/linux/usb/cdc-wdm.h @@ -3,10 +3,6 @@ * USB CDC Device Management subdriver * * Copyright (c) 2012 Bjørn Mork - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. */ #ifndef __LINUX_USB_CDC_WDM_H diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h index 35d784cf32a4..0af0db51bc89 100644 --- a/include/linux/usb/cdc.h +++ b/include/linux/usb/cdc.h @@ -3,10 +3,6 @@ * USB CDC common helpers * * Copyright (c) 2015 Oliver Neukum - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. */ #ifndef __LINUX_USB_CDC_H #define __LINUX_USB_CDC_H diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 3ad58b7a0824..dc3092cea99e 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -10,8 +10,6 @@ * * (C) Copyright 2002-2004 by David Brownell * All Rights Reserved. - * - * This software is licensed under the GNU GPL version 2. */ #ifndef __LINUX_USB_GADGET_H diff --git a/include/linux/usb/input.h b/include/linux/usb/input.h index 974befa72ac0..5e759b2cf551 100644 --- a/include/linux/usb/input.h +++ b/include/linux/usb/input.h @@ -1,10 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2005 Dmitry Torokhov - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #ifndef __LINUX_USB_INPUT_H diff --git a/include/linux/usb/isp1301.h b/include/linux/usb/isp1301.h index dedb3b2473e8..fa986b926a12 100644 --- a/include/linux/usb/isp1301.h +++ b/include/linux/usb/isp1301.h @@ -3,16 +3,6 @@ * NXP ISP1301 USB transceiver driver * * Copyright (C) 2012 Roland Stigge - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * */ #ifndef __LINUX_USB_ISP1301_H diff --git a/include/linux/usb/m66592.h b/include/linux/usb/m66592.h index 2dfe68183495..5f04de2b47fd 100644 --- a/include/linux/usb/m66592.h +++ b/include/linux/usb/m66592.h @@ -3,20 +3,6 @@ * M66592 driver platform data * * Copyright (C) 2009 Renesas Solutions Corp. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * */ #ifndef __LINUX_USB_M66592_H diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h index dba55ccb9b53..98487fd7ab11 100644 --- a/include/linux/usb/of.h +++ b/include/linux/usb/of.h @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* * OF helpers for usb devices. - * - * This file is released under the GPLv2 */ #ifndef __LINUX_USB_OF_H diff --git a/include/linux/usb/r8a66597.h b/include/linux/usb/r8a66597.h index c0753d026bbf..f0fa7ddadbaa 100644 --- a/include/linux/usb/r8a66597.h +++ b/include/linux/usb/r8a66597.h @@ -5,20 +5,6 @@ * Copyright (C) 2009 Renesas Solutions Corp. * * Author : Yoshihiro Shimoda - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * */ #ifndef __LINUX_USB_R8A66597_H diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 16ea5a4cc586..8ea319f89e1f 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -4,11 +4,6 @@ * * Copyright (C) 1999 - 2012 * Greg Kroah-Hartman (greg@kroah.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * */ #ifndef __LINUX_USB_SERIAL_H diff --git a/include/linux/usb/storage.h b/include/linux/usb/storage.h index e0240f864548..2827ce72e502 100644 --- a/include/linux/usb/storage.h +++ b/include/linux/usb/storage.h @@ -9,8 +9,6 @@ * * This file contains definitions taken from the * USB Mass Storage Class Specification Overview - * - * Distributed under the terms of the GNU GPL, version two. */ /* Storage subclass codes */ diff --git a/include/linux/usb/tegra_usb_phy.h b/include/linux/usb/tegra_usb_phy.h index d3e65eb9e16f..46e73584b6e6 100644 --- a/include/linux/usb/tegra_usb_phy.h +++ b/include/linux/usb/tegra_usb_phy.h @@ -1,16 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2010 Google, Inc. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * */ #ifndef __TEGRA_USB_PHY_H diff --git a/include/linux/usb/ulpi.h b/include/linux/usb/ulpi.h index 36c2982780ad..5050f502c1ed 100644 --- a/include/linux/usb/ulpi.h +++ b/include/linux/usb/ulpi.h @@ -3,10 +3,6 @@ * ulpi.h -- ULPI defines and function prorotypes * * Copyright (C) 2010 Nokia Corporation - * - * This software is distributed under the terms of the GNU General - * Public License ("GPL") as published by the Free Software Foundation, - * version 2 of that License. */ #ifndef __LINUX_USB_ULPI_H diff --git a/include/linux/usb/xhci-dbgp.h b/include/linux/usb/xhci-dbgp.h index 01fe768873f9..171fd74b1cfc 100644 --- a/include/linux/usb/xhci-dbgp.h +++ b/include/linux/usb/xhci-dbgp.h @@ -5,10 +5,6 @@ * Copyright (C) 2016 Intel Corporation * * Author: Lu Baolu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __LINUX_XHCI_DBGP_H -- cgit v1.2.3 From 3e00a22fdc9a9457f1c64d885532710e174babd8 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 28 May 2022 17:13:56 +0200 Subject: USB: Follow-up to SPDX GPL-2.0+ identifiers addition - remove now useless comments All these files have been updated in the commit given in the Fixes: tag below. When the SPDX-License-Identifier: has been added, the corresponding text at the beginning of the files has not been deleted. All these texts are about GPL-2.0+, with different variation in the wording. Remove these now useless lines to save some LoC. Fixes: 5fd54ace4721 ("USB: add SPDX identifiers to all remaining files in drivers/usb/") Signed-off-by: Christophe JAILLET Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/c67x00.h | 15 --------------- include/linux/usb/composite.h | 14 -------------- include/linux/usb/ehci_def.h | 14 -------------- include/linux/usb/ehci_pdriver.h | 14 -------------- include/linux/usb/g_hid.h | 14 -------------- include/linux/usb/hcd.h | 14 -------------- include/linux/usb/musb-ux500.h | 10 ---------- include/linux/usb/net2280.h | 14 -------------- include/linux/usb/ohci_pdriver.h | 14 -------------- include/linux/usb/otg-fsm.h | 17 ++--------------- include/linux/usb/phy_companion.h | 10 ---------- include/linux/usb/rndis_host.h | 14 -------------- include/linux/usb/usb338x.h | 11 ----------- include/linux/usb/usbnet.h | 14 -------------- 14 files changed, 2 insertions(+), 187 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/c67x00.h b/include/linux/usb/c67x00.h index 2fc39e3b7281..45e0757e58f3 100644 --- a/include/linux/usb/c67x00.h +++ b/include/linux/usb/c67x00.h @@ -3,21 +3,6 @@ * usb_c67x00.h: platform definitions for the Cypress C67X00 USB chip * * Copyright (C) 2006-2008 Barco N.V. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301 USA. */ #ifndef _LINUX_USB_C67X00_H diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 9d2762279286..43ac3fa760db 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -3,20 +3,6 @@ * composite.h -- framework for usb gadgets which are composite devices * * Copyright (C) 2006-2008 David Brownell - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __LINUX_USB_COMPOSITE_H diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h index c892c5bc6638..fbabadd3b372 100644 --- a/include/linux/usb/ehci_def.h +++ b/include/linux/usb/ehci_def.h @@ -1,20 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (c) 2001-2002 by David Brownell - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __LINUX_USB_EHCI_DEF_H diff --git a/include/linux/usb/ehci_pdriver.h b/include/linux/usb/ehci_pdriver.h index 89fc901e778f..0f1b166f5aa0 100644 --- a/include/linux/usb/ehci_pdriver.h +++ b/include/linux/usb/ehci_pdriver.h @@ -1,20 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2012 Hauke Mehrtens - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __USB_CORE_EHCI_PDRIVER_H diff --git a/include/linux/usb/g_hid.h b/include/linux/usb/g_hid.h index 7581e488c237..d56bfedeb079 100644 --- a/include/linux/usb/g_hid.h +++ b/include/linux/usb/g_hid.h @@ -3,20 +3,6 @@ * g_hid.h -- Header file for USB HID gadget driver * * Copyright (C) 2010 Fabien Chouteau - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef __LINUX_USB_G_HID_H diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 2c1fc9212cf2..37ccb31b1d40 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -1,20 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (c) 2001-2002 by David Brownell - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __USB_CORE_HCD_H diff --git a/include/linux/usb/musb-ux500.h b/include/linux/usb/musb-ux500.h index c4b7ad9850ca..d60dcfc56b5a 100644 --- a/include/linux/usb/musb-ux500.h +++ b/include/linux/usb/musb-ux500.h @@ -1,16 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2013 ST-Ericsson AB - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef __MUSB_UX500_H__ diff --git a/include/linux/usb/net2280.h b/include/linux/usb/net2280.h index 08b85caecfaf..f29fe6a1f415 100644 --- a/include/linux/usb/net2280.h +++ b/include/linux/usb/net2280.h @@ -5,20 +5,6 @@ * * Copyright (C) 2002 NetChip Technology, Inc. (http://www.netchip.com) * Copyright (C) 2003 David Brownell - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef __LINUX_USB_NET2280_H diff --git a/include/linux/usb/ohci_pdriver.h b/include/linux/usb/ohci_pdriver.h index 7eb16cf587ee..2447c78b1766 100644 --- a/include/linux/usb/ohci_pdriver.h +++ b/include/linux/usb/ohci_pdriver.h @@ -1,20 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2012 Hauke Mehrtens - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __USB_CORE_OHCI_PDRIVER_H diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h index 784659d4dc99..6135d076c53d 100644 --- a/include/linux/usb/otg-fsm.h +++ b/include/linux/usb/otg-fsm.h @@ -1,19 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ -/* Copyright (C) 2007,2008 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. +/* + * Copyright (C) 2007,2008 Freescale Semiconductor, Inc. */ #ifndef __LINUX_USB_OTG_FSM_H diff --git a/include/linux/usb/phy_companion.h b/include/linux/usb/phy_companion.h index 263196f05015..862aaeca2319 100644 --- a/include/linux/usb/phy_companion.h +++ b/include/linux/usb/phy_companion.h @@ -3,18 +3,8 @@ * phy-companion.h -- phy companion to indicate the comparator part of PHY * * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. * * Author: Kishon Vijay Abraham I - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * */ #ifndef __DRIVERS_PHY_COMPANION_H diff --git a/include/linux/usb/rndis_host.h b/include/linux/usb/rndis_host.h index cc42db51bbba..489cfb1d00f6 100644 --- a/include/linux/usb/rndis_host.h +++ b/include/linux/usb/rndis_host.h @@ -2,20 +2,6 @@ /* * Host Side support for RNDIS Networking Links * Copyright (C) 2005 by David Brownell - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef __LINUX_USB_RNDIS_HOST_H diff --git a/include/linux/usb/usb338x.h b/include/linux/usb/usb338x.h index 20020c1336d5..70a7e3cdb3c9 100644 --- a/include/linux/usb/usb338x.h +++ b/include/linux/usb/usb338x.h @@ -6,17 +6,6 @@ * Copyright (C) 2002 NetChip Technology, Inc. (http://www.netchip.com) * Copyright (C) 2003 David Brownell * Copyright (C) 2014 Ricardo Ribalda - Qtechnology/AS - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * */ #ifndef __LINUX_USB_USB338X_H diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 1b4d72d5e891..4f5608cbd9ab 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -4,20 +4,6 @@ * * Copyright (C) 2000-2005 by David Brownell * Copyright (C) 2003-2005 David Hollis - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef __LINUX_USB_USBNET_H -- cgit v1.2.3 From 4173f018aae16b6496d292c234b858241f85254f Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 7 Jun 2022 12:49:12 +0200 Subject: tty/vt: consolemap: rename and document struct uni_pagedir struct uni_pagedir contains 32 unicode page directories, so the name of the structure is a bit misleading. Rename the structure to uni_pagedict, so it looks like this: struct uni_pagedict -> 32 page dirs -> 32 rows -> 64 glyphs Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220607104946.18710-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/console_struct.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index d5b9c8d40c18..f75033f0277f 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -17,7 +17,7 @@ #include #include -struct uni_pagedir; +struct uni_pagedict; struct uni_screen; #define NPAR 16 @@ -157,8 +157,8 @@ struct vc_data { unsigned int vc_bell_duration; /* Console bell duration */ unsigned short vc_cur_blink_ms; /* Cursor blink duration */ struct vc_data **vc_display_fg; /* [!] Ptr to var holding fg console for this display */ - struct uni_pagedir *vc_uni_pagedir; - struct uni_pagedir **vc_uni_pagedir_loc; /* [!] Location of uni_pagedir variable for this console */ + struct uni_pagedict *vc_uni_pagedir; + struct uni_pagedict **vc_uni_pagedir_loc; /* [!] Location of uni_pagedict variable for this console */ struct uni_screen *vc_uni_screen; /* unicode screen content */ /* additional information is in vt_kern.h */ }; -- cgit v1.2.3 From 0b75f7968d61566d150747512344cabaa59e54c6 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 7 Jun 2022 12:49:15 +0200 Subject: tty/vt: consolemap: remove extern from function decls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The extern keyword is not needed for function declarations. Remove it, so that the consolemap header conforms to other tty headers. Reviewed-by: Ilpo Järvinen Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220607104946.18710-5-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index bcfce748c9d8..abc28e4450bf 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -17,12 +17,11 @@ #ifdef CONFIG_CONSOLE_TRANSLATIONS struct vc_data; -extern u16 inverse_translate(const struct vc_data *conp, int glyph, - int use_unicode); -extern unsigned short *set_translate(int m, struct vc_data *vc); -extern int conv_uni_to_pc(struct vc_data *conp, long ucs); -extern u32 conv_8bit_to_uni(unsigned char c); -extern int conv_uni_to_8bit(u32 uni); +u16 inverse_translate(const struct vc_data *conp, int glyph, int use_unicode); +unsigned short *set_translate(int m, struct vc_data *vc); +int conv_uni_to_pc(struct vc_data *conp, long ucs); +u32 conv_8bit_to_uni(unsigned char c); +int conv_uni_to_8bit(u32 uni); void console_map_init(void); #else #define inverse_translate(conp, glyph, uni) ((uint16_t)glyph) -- cgit v1.2.3 From f827c754f9b6247f4d4f9cbb508b22bff2cf8e6d Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 7 Jun 2022 12:49:16 +0200 Subject: tty/vt: consolemap: convert macros to static inlines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit changes !CONFIG_CONSOLE_TRANSLATIONS definitions to real (inline) functions. So the commit: * makes type checking much stronger, * removes the need of many parentheses and casts, and * makes the code more readable. Reviewed-by: Ilpo Järvinen Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220607104946.18710-6-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index abc28e4450bf..98171dbed51f 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -14,9 +14,9 @@ #include -#ifdef CONFIG_CONSOLE_TRANSLATIONS struct vc_data; +#ifdef CONFIG_CONSOLE_TRANSLATIONS u16 inverse_translate(const struct vc_data *conp, int glyph, int use_unicode); unsigned short *set_translate(int m, struct vc_data *vc); int conv_uni_to_pc(struct vc_data *conp, long ucs); @@ -24,12 +24,33 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); #else -#define inverse_translate(conp, glyph, uni) ((uint16_t)glyph) -#define set_translate(m, vc) ((unsigned short *)NULL) -#define conv_uni_to_pc(conp, ucs) ((int) (ucs > 0xff ? -1: ucs)) -#define conv_8bit_to_uni(c) ((uint32_t)(c)) -#define conv_uni_to_8bit(c) ((int) ((c) & 0xff)) -#define console_map_init(c) do { ; } while (0) +static inline u16 inverse_translate(const struct vc_data *conp, int glyph, + int use_unicode) +{ + return glyph; +} + +static inline unsigned short *set_translate(int m, struct vc_data *vc) +{ + return NULL; +} + +static inline int conv_uni_to_pc(struct vc_data *conp, long ucs) +{ + return ucs > 0xff ? -1 : ucs; +} + +static inline u32 conv_8bit_to_uni(unsigned char c) +{ + return c; +} + +static inline int conv_uni_to_8bit(u32 uni) +{ + return uni & 0xff; +} + +static inline void console_map_init(void) { } #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit v1.2.3 From d9ebb906a45adc71a62aceb1e3608c847cafa660 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 7 Jun 2022 12:49:17 +0200 Subject: tty/vt: consolemap: make parameters of inverse_translate() saner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - int use_unicode -> bool: it's used as bool at some places already, so make it explicit. - int glyph -> u16: every caller passes a u16 in. So make it explicit too. And remove a negative check from inverse_translate() as it never could be negative. Reviewed-by: Ilpo Järvinen Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220607104946.18710-7-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 98171dbed51f..1ff2bf55eb85 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -17,15 +17,15 @@ struct vc_data; #ifdef CONFIG_CONSOLE_TRANSLATIONS -u16 inverse_translate(const struct vc_data *conp, int glyph, int use_unicode); +u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode); unsigned short *set_translate(int m, struct vc_data *vc); int conv_uni_to_pc(struct vc_data *conp, long ucs); u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); #else -static inline u16 inverse_translate(const struct vc_data *conp, int glyph, - int use_unicode) +static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, + bool use_unicode) { return glyph; } -- cgit v1.2.3 From 5a904a936b407624cd1ff5ee3f1675ca3d2366a5 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 7 Jun 2022 12:49:27 +0200 Subject: tty/vt: consolemap: introduce enum translation_map and use it Again, instead of magic constants in the code, declare an enum and be a little bit more explicit. Both in the translations definition and in the loops etc. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220607104946.18710-17-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 1ff2bf55eb85..c35db4896c37 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -7,10 +7,15 @@ #ifndef __LINUX_CONSOLEMAP_H__ #define __LINUX_CONSOLEMAP_H__ -#define LAT1_MAP 0 -#define GRAF_MAP 1 -#define IBMPC_MAP 2 -#define USER_MAP 3 +enum translation_map { + LAT1_MAP, + GRAF_MAP, + IBMPC_MAP, + USER_MAP, + + FIRST_MAP = LAT1_MAP, + LAST_MAP = USER_MAP, +}; #include @@ -18,7 +23,7 @@ struct vc_data; #ifdef CONFIG_CONSOLE_TRANSLATIONS u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode); -unsigned short *set_translate(int m, struct vc_data *vc); +unsigned short *set_translate(enum translation_map m, struct vc_data *vc); int conv_uni_to_pc(struct vc_data *conp, long ucs); u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); @@ -30,7 +35,8 @@ static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, return glyph; } -static inline unsigned short *set_translate(int m, struct vc_data *vc) +static inline unsigned short *set_translate(enum translation_map m, + struct vc_data *vc) { return NULL; } -- cgit v1.2.3 From 8322b1f527159de578aab277629296575a11eb3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 6 Jun 2022 13:03:58 +0300 Subject: serial: Add uart_rs485_config() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A few serial drivers make a call to rs485_config() themselves (all these seem to relate to init). Convert them all to use a common helper which makes it easy to make adjustments on tasks related to it as serial_rs485 struct sanitization is going to be added. In pci_fintek_setup() (in 8250_pci.c), the rs485_config() call was made with NULL, however, it can be changed to pass uart_port's rs485 struct. No other callers should pass NULL into rs485_config() so the NULL check can now be eliminated. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220606100433.13793-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index cbd5070bc87f..d3ebb4db2d80 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -592,4 +592,5 @@ static inline int uart_handle_break(struct uart_port *port) !((cflag) & CLOCAL)) int uart_get_rs485_mode(struct uart_port *port); +int uart_rs485_config(struct uart_port *port); #endif /* LINUX_SERIAL_CORE_H */ -- cgit v1.2.3 From 8925c31c1ac2f1e05da988581f2a70a2a8c4d638 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 6 Jun 2022 13:04:00 +0300 Subject: serial: Add rs485_supported to uart_port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Preparing to move serial_rs485 struct sanitization into serial core, each driver has to provide what fields/flags it supports. This information is pointed into by rs485_supported. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220606100433.13793-4-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index d3ebb4db2d80..5518b70177b3 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -254,6 +254,7 @@ struct uart_port { struct attribute_group *attr_group; /* port specific attributes */ const struct attribute_group **tty_groups; /* all attributes (serial core use only) */ struct serial_rs485 rs485; + const struct serial_rs485 *rs485_supported; /* Supported mask for serial_rs485 */ struct gpio_desc *rs485_term_gpio; /* enable RS485 bus termination */ struct serial_iso7816 iso7816; void *private_data; /* generic platform data pointer */ -- cgit v1.2.3 From 6bb6fa6908ebd3cb4e14cd4f0ce272ec885d2eb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 6 Jun 2022 18:36:51 +0300 Subject: tty: Implement lookahead to process XON/XOFF timely MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When tty is not read from, XON/XOFF may get stuck into an intermediate buffer. As those characters are there to do software flow-control, it is not very useful. In the case where neither end reads from ttys, the receiving ends might not be able receive the XOFF characters and just keep sending more data to the opposite direction. This problem is almost guaranteed to occur with DMA which sends data in large chunks. If TTY is slow to process characters, that is, eats less than given amount in receive_buf, invoke lookahead for the rest of the chars to process potential XON/XOFF characters. We need to keep track of how many characters have been processed by the lookahead to avoid processing the flow control char again on the normal path. Bookkeeping occurs parallel on two layers (tty_buffer and n_tty) to avoid passing the lookahead_count through the whole call chain. When a flow-control char is processed, two things must occur: a) it must not be treated as normal char b) if not yet processed, flow-control actions need to be taken The return value of n_tty_receive_char_flow_ctrl() tells caller a), and b) is kept internal to n_tty_receive_char_flow_ctrl(). If characters were previous looked ahead, __receive_buf() makes two calls to the appropriate n_tty_receive_buf_* function. First call is made with lookahead_done=true for the characters that were subject to lookahead earlier and then with lookahead=false for the new characters. Either of the calls might be skipped when it has no characters to handle. Reported-by: Gilles Buloz Reviewed-by: Andy Shevchenko Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220606153652.63554-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_buffer.h | 1 + include/linux/tty_ldisc.h | 14 ++++++++++++++ include/linux/tty_port.h | 2 ++ 3 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty_buffer.h b/include/linux/tty_buffer.h index 3b9d77604291..1796648c2907 100644 --- a/include/linux/tty_buffer.h +++ b/include/linux/tty_buffer.h @@ -15,6 +15,7 @@ struct tty_buffer { int used; int size; int commit; + int lookahead; /* Lazy update on recv, can become less than "read" */ int read; int flags; /* Data points here */ diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index e85002b56752..33678e1936f6 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -186,6 +186,18 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * indicate all data received is %TTY_NORMAL. If assigned, prefer this * function for automatic flow control. * + * @lookahead_buf: [DRV] ``void ()(struct tty_struct *tty, + * const unsigned char *cp, const char *fp, int count) + * + * This function is called by the low-level tty driver for characters + * not eaten by ->receive_buf() or ->receive_buf2(). It is useful for + * processing high-priority characters such as software flow-control + * characters that could otherwise get stuck into the intermediate + * buffer until tty has room to receive them. Ldisc must be able to + * handle later a ->receive_buf() or ->receive_buf2() call for the + * same characters (e.g. by skipping the actions for high-priority + * characters already handled by ->lookahead_buf()). + * * @owner: module containting this ldisc (for reference counting) * * This structure defines the interface between the tty line discipline @@ -229,6 +241,8 @@ struct tty_ldisc_ops { void (*dcd_change)(struct tty_struct *tty, unsigned int status); int (*receive_buf2)(struct tty_struct *tty, const unsigned char *cp, const char *fp, int count); + void (*lookahead_buf)(struct tty_struct *tty, const unsigned char *cp, + const unsigned char *fp, unsigned int count); struct module *owner; }; diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index 58e9619116b7..fa3c3bdaa234 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -40,6 +40,8 @@ struct tty_port_operations { struct tty_port_client_operations { int (*receive_buf)(struct tty_port *port, const unsigned char *, const unsigned char *, size_t); + void (*lookahead_buf)(struct tty_port *port, const unsigned char *cp, + const unsigned char *fp, unsigned int count); void (*write_wakeup)(struct tty_port *port); }; -- cgit v1.2.3 From 67b9d64139e13621d3ab8bb0daad7602e5fe0778 Mon Sep 17 00:00:00 2001 From: David Jander Date: Thu, 9 Jun 2022 14:13:34 +0200 Subject: spi: Fix per-cpu stats access on 32 bit systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32 bit systems, the following kernel BUG is hit: BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 caller is debug_smp_processor_id+0x18/0x24 CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.19.0-rc1-00001-g6ae0aec8a366 #181 Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) Backtrace: dump_backtrace from show_stack+0x20/0x24 r7:81024ffd r6:00000000 r5:81024ffd r4:60000013 show_stack from dump_stack_lvl+0x60/0x78 dump_stack_lvl from dump_stack+0x14/0x1c r7:81024ffd r6:80f652de r5:80bec180 r4:819a2500 dump_stack from check_preemption_disabled+0xc8/0xf0 check_preemption_disabled from debug_smp_processor_id+0x18/0x24 r8:8119b7e0 r7:81205534 r6:819f5c00 r5:819f4c00 r4:c083d724 debug_smp_processor_id from __spi_sync+0x78/0x220 __spi_sync from spi_sync+0x34/0x4c r10:bb7bf4e0 r9:c083d724 r8:00000007 r7:81a068c0 r6:822a83c0 r5:c083d724 r4:819f4c00 spi_sync from spi_mem_exec_op+0x338/0x370 r5:000000b4 r4:c083d910 spi_mem_exec_op from spi_nor_read_id+0x98/0xdc r10:bb7bf4e0 r9:00000000 r8:00000000 r7:00000000 r6:00000000 r5:82358040 r4:819f7c40 spi_nor_read_id from spi_nor_detect+0x38/0x114 r7:82358040 r6:00000000 r5:819f7c40 r4:819f7c40 spi_nor_detect from spi_nor_scan+0x11c/0xbec r10:bb7bf4e0 r9:00000000 r8:00000000 r7:c083da4c r6:00000000 r5:00010101 r4:819f7c40 spi_nor_scan from spi_nor_probe+0x10c/0x2d0 r10:bb7bf4e0 r9:bb7bf4d0 r8:00000000 r7:819f4c00 r6:00000000 r5:00000000 r4:819f7c40 per-cpu access needs to be guarded against preemption. Fixes: 6598b91b5ac3 ("spi: spi.c: Convert statistics to per-cpu u64_stats_t") Reported-by: Marc Kleine-Budde Signed-off-by: David Jander Tested-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20220609121334.2984808-1-david@protonic.nl Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 2e63b4935deb..c96f526d9a20 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -84,18 +84,24 @@ struct spi_statistics { #define SPI_STATISTICS_ADD_TO_FIELD(pcpu_stats, field, count) \ do { \ - struct spi_statistics *__lstats = this_cpu_ptr(pcpu_stats); \ + struct spi_statistics *__lstats; \ + get_cpu(); \ + __lstats = this_cpu_ptr(pcpu_stats); \ u64_stats_update_begin(&__lstats->syncp); \ u64_stats_add(&__lstats->field, count); \ u64_stats_update_end(&__lstats->syncp); \ + put_cpu(); \ } while (0) #define SPI_STATISTICS_INCREMENT_FIELD(pcpu_stats, field) \ do { \ - struct spi_statistics *__lstats = this_cpu_ptr(pcpu_stats); \ + struct spi_statistics *__lstats; \ + get_cpu(); \ + __lstats = this_cpu_ptr(pcpu_stats); \ u64_stats_update_begin(&__lstats->syncp); \ u64_stats_inc(&__lstats->field); \ u64_stats_update_end(&__lstats->syncp); \ + put_cpu(); \ } while (0) /** -- cgit v1.2.3 From a6546f89eac9bfad4b50a7f8ccfbefe0ae01e7df Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jun 2022 16:11:10 +0200 Subject: treewide: Replace GPLv2 boilerplate/reference with SPDX - gpl-2.0_8.RULE Based on the normalized pattern: this program is free software you can redistribute it and/or modify it under the terms of the gnu general public license version 2 as published by the free software foundation extracted by the scancode license scanner the SPDX license identifier GPL-2.0-only has been chosen to replace the boilerplate/reference. Reviewed-by: Allison Randal Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/input/elan-i2c-ids.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/input/elan-i2c-ids.h b/include/linux/input/elan-i2c-ids.h index 520858d12680..51cca17ee94c 100644 --- a/include/linux/input/elan-i2c-ids.h +++ b/include/linux/input/elan-i2c-ids.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Elan I2C/SMBus Touchpad device whitelist * @@ -11,10 +12,6 @@ * copyright (c) 2011-2012 Cypress Semiconductor, Inc. * copyright (c) 2011-2012 Google, Inc. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - * * Trademarks are the property of their respective owners. */ -- cgit v1.2.3 From 2aec85b26f39fa9e036c5872950c0ef9b479a1ec Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jun 2022 16:11:13 +0200 Subject: treewide: Replace GPLv2 boilerplate/reference with SPDX - gpl-2.0_30.RULE (part 2) Based on the normalized pattern: this program is free software you can redistribute it and/or modify it under the terms of the gnu general public license as published by the free software foundation version 2 this program is distributed as is without any warranty of any kind whether express or implied without even the implied warranty of merchantability or fitness for a particular purpose see the gnu general public license for more details extracted by the scancode license scanner the SPDX license identifier GPL-2.0-only has been chosen to replace the boilerplate/reference. Reviewed-by: Allison Randal Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/lp873x.h | 10 +--------- include/linux/mfd/tps65217.h | 10 +--------- include/linux/platform_data/davinci_asp.h | 10 +--------- include/linux/platform_data/gpio-davinci.h | 10 +--------- include/linux/platform_data/uio_dmem_genirq.h | 10 +--------- include/linux/platform_data/uio_pruss.h | 10 +--------- include/linux/reset/bcm63xx_pmb.h | 10 +--------- include/linux/soc/ti/knav_dma.h | 10 +--------- include/linux/soc/ti/knav_qmss.h | 10 +--------- include/linux/sram.h | 14 ++------------ include/linux/ti-emif-sram.h | 10 +--------- include/linux/wkup_m3_ipc.h | 10 +--------- 12 files changed, 13 insertions(+), 111 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/lp873x.h b/include/linux/mfd/lp873x.h index 5546688c7da7..fe8174cc8637 100644 --- a/include/linux/mfd/lp873x.h +++ b/include/linux/mfd/lp873x.h @@ -1,16 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Functions to access LP873X power management chip. * * Copyright (C) 2016 Texas Instruments Incorporated - https://www.ti.com/ - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef __LINUX_MFD_LP873X_H diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h index db7091824ed0..877d9c41c53d 100644 --- a/include/linux/mfd/tps65217.h +++ b/include/linux/mfd/tps65217.h @@ -1,18 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * linux/mfd/tps65217.h * * Functions to access TPS65217 power management chip. * * Copyright (C) 2011 Texas Instruments Incorporated - https://www.ti.com/ - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef __LINUX_MFD_TPS65217_H diff --git a/include/linux/platform_data/davinci_asp.h b/include/linux/platform_data/davinci_asp.h index 76b13ef67562..c8645b2ed3c0 100644 --- a/include/linux/platform_data/davinci_asp.h +++ b/include/linux/platform_data/davinci_asp.h @@ -1,16 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * TI DaVinci Audio Serial Port support * * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/ - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef __DAVINCI_ASP_H diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h index e182a46e609f..b82e44662efe 100644 --- a/include/linux/platform_data/gpio-davinci.h +++ b/include/linux/platform_data/gpio-davinci.h @@ -1,16 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * DaVinci GPIO Platform Related Defines * * Copyright (C) 2013 Texas Instruments Incorporated - https://www.ti.com/ - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef __DAVINCI_GPIO_PLATFORM_H diff --git a/include/linux/platform_data/uio_dmem_genirq.h b/include/linux/platform_data/uio_dmem_genirq.h index 973c1bb32168..c8f6de685306 100644 --- a/include/linux/platform_data/uio_dmem_genirq.h +++ b/include/linux/platform_data/uio_dmem_genirq.h @@ -1,16 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * include/linux/platform_data/uio_dmem_genirq.h * * Copyright (C) 2012 Damian Hobson-Garcia - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef _UIO_DMEM_GENIRQ_H diff --git a/include/linux/platform_data/uio_pruss.h b/include/linux/platform_data/uio_pruss.h index 31f2e22661bc..f76fa393b802 100644 --- a/include/linux/platform_data/uio_pruss.h +++ b/include/linux/platform_data/uio_pruss.h @@ -1,18 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * include/linux/platform_data/uio_pruss.h * * Platform data for uio_pruss driver * * Copyright (C) 2010-11 Texas Instruments Incorporated - https://www.ti.com/ - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef _UIO_PRUSS_H_ diff --git a/include/linux/reset/bcm63xx_pmb.h b/include/linux/reset/bcm63xx_pmb.h index bb4af7b5eb36..c77b6999518a 100644 --- a/include/linux/reset/bcm63xx_pmb.h +++ b/include/linux/reset/bcm63xx_pmb.h @@ -1,17 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Broadcom BCM63xx Processor Monitor Bus shared routines (SMP and reset) * * Copyright (C) 2015, Broadcom Corporation * Author: Florian Fainelli - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef __BCM63XX_PMB_H #define __BCM63XX_PMB_H diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h index 7127ec301537..18d806a8e52c 100644 --- a/include/linux/soc/ti/knav_dma.h +++ b/include/linux/soc/ti/knav_dma.h @@ -1,17 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2014 Texas Instruments Incorporated * Authors: Sandeep Nair - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__ diff --git a/include/linux/soc/ti/knav_qmss.h b/include/linux/soc/ti/knav_qmss.h index c75ef99c99ca..175f466ebcc3 100644 --- a/include/linux/soc/ti/knav_qmss.h +++ b/include/linux/soc/ti/knav_qmss.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Keystone Navigator Queue Management Sub-System header * @@ -5,15 +6,6 @@ * Author: Sandeep Nair * Cyril Chemparathy * Santosh Shilimkar - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef __SOC_TI_KNAV_QMSS_H__ diff --git a/include/linux/sram.h b/include/linux/sram.h index 4fb405fb0480..d7dee19505c6 100644 --- a/include/linux/sram.h +++ b/include/linux/sram.h @@ -1,15 +1,5 @@ -/* - * Generic SRAM Driver Interface - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Generic SRAM Driver Interface */ #ifndef __LINUX_SRAM_H__ #define __LINUX_SRAM_H__ diff --git a/include/linux/ti-emif-sram.h b/include/linux/ti-emif-sram.h index 2fc854155c27..441b2988e66a 100644 --- a/include/linux/ti-emif-sram.h +++ b/include/linux/ti-emif-sram.h @@ -1,17 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * TI AM33XX EMIF Routines * * Copyright (C) 2016-2017 Texas Instruments Inc. * Dave Gerlach - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef __LINUX_TI_EMIF_H #define __LINUX_TI_EMIF_H diff --git a/include/linux/wkup_m3_ipc.h b/include/linux/wkup_m3_ipc.h index 26d1eb058fa3..5e1b26f988e2 100644 --- a/include/linux/wkup_m3_ipc.h +++ b/include/linux/wkup_m3_ipc.h @@ -1,17 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * TI Wakeup M3 for AMx3 SoCs Power Management Routines * * Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/ * Dave Gerlach - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef _LINUX_WKUP_M3_IPC_H -- cgit v1.2.3 From 1accad5e74635d7f9d994e692649fbe736afe150 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jun 2022 16:11:20 +0200 Subject: treewide: Replace GPLv2 boilerplate/reference with SPDX - gpl-2.0_149.RULE Based on the normalized pattern: netapp provides this source code under the gpl v2 license the gpl v2 license is available at https://opensource org/licenses/gpl-license php this software is provided by the copyright holders and contributors as is and any express or implied warranties including but not limited to the implied warranties of merchantability and fitness for a particular purpose are disclaimed in no event shall the copyright owner or contributors be liable for any direct indirect incidental special exemplary or consequential damages (including but not limited to procurement of substitute goods or services loss of use data or profits or business interruption) however caused and on any theory of liability whether in contract strict liability or tort (including negligence or otherwise) arising in any way out of the use of this software even if advised of the possibility of such damage extracted by the scancode license scanner the SPDX license identifier GPL-2.0-only has been chosen to replace the boilerplate/reference. Reviewed-by: Allison Randal Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/sunrpc/bc_xprt.h | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index f07c334c599f..db30a159f9d5 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -1,22 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** (c) 2008 NetApp. All Rights Reserved. -NetApp provides this source code under the GPL v2 License. -The GPL v2 license is available at -https://opensource.org/licenses/gpl-license.php. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ @@ -83,4 +69,3 @@ static inline void xprt_free_bc_request(struct rpc_rqst *req) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ #endif /* _LINUX_SUNRPC_BC_XPRT_H */ - -- cgit v1.2.3 From 298b95f111be85f2d5a18dc0177eb9a64130f9b4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jun 2022 16:11:21 +0200 Subject: treewide: Replace GPLv2 boilerplate/reference with SPDX - gpl-2.0_152.RULE Based on the normalized pattern: this software is distributed under the terms of the gnu general public license ( gpl ) version 2 as published by the free software foundation this software is provided by the copyright holders and contributors as is and any express or implied warranties including but not limited to the implied warranties of merchantability and fitness for a particular purpose are disclaimed in no event shall the copyright owner or contributors be liable for any direct indirect incidental special exemplary or consequential damages (including but not limited to procurement of substitute goods or services loss of use data or profits or business interruption) however caused and on any theory of liability whether in contract strict liability or tort (including negligence or otherwise) arising in any way out of the use of this software even if advised of the possibility of such damage extracted by the scancode license scanner the SPDX license identifier GPL-2.0-only has been chosen to replace the boilerplate/reference. Reviewed-by: Allison Randal Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/usb-omap.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/usb-omap.h b/include/linux/platform_data/usb-omap.h index 5e70d667031c..580978e468f8 100644 --- a/include/linux/platform_data/usb-omap.h +++ b/include/linux/platform_data/usb-omap.h @@ -1,22 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * usb-omap.h - Platform data for the various OMAP USB IPs * * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com - * - * This software is distributed under the terms of the GNU General Public - * License ("GPL") version 2, as published by the Free Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. */ #define OMAP3_HS_USB_PORTS 3 -- cgit v1.2.3 From abd462747539bbc630e50e8be59e3aebca63441d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jun 2022 16:11:31 +0200 Subject: treewide: Replace GPLv2 boilerplate/reference with SPDX - gpl-2.0_319.RULE Based on the normalized pattern: this program is free software you can redistribute it and/or modify it under the terms of the gnu general public license version 2 as published by the free software foundation this program is distributed as is without any warranty of any kind whether expressed or implied without even the implied warranty of merchantability or fitness for a particular purpose see the gnu general public license version 2 for more details extracted by the scancode license scanner the SPDX license identifier GPL-2.0-only has been chosen to replace the boilerplate/reference. Reviewed-by: Allison Randal Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/tps65086.h | 10 +--------- include/linux/mfd/tps65218.h | 10 +--------- include/linux/mfd/tps65912.h | 10 +--------- 3 files changed, 3 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tps65086.h b/include/linux/mfd/tps65086.h index e0a417e53766..16f87cccc003 100644 --- a/include/linux/mfd/tps65086.h +++ b/include/linux/mfd/tps65086.h @@ -1,16 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/ * Andrew F. Davis * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether expressed or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License version 2 for more details. - * * Based on the TPS65912 driver */ diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h index 122e24ddbd4b..2946be2f15f3 100644 --- a/include/linux/mfd/tps65218.h +++ b/include/linux/mfd/tps65218.h @@ -1,18 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * linux/mfd/tps65218.h * * Functions to access TPS65218 power management chip. * * Copyright (C) 2014 Texas Instruments Incorporated - https://www.ti.com/ - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether expressed or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License version 2 for more details. */ #ifndef __LINUX_MFD_TPS65218_H diff --git a/include/linux/mfd/tps65912.h b/include/linux/mfd/tps65912.h index 8a61386cb8c1..860ec0a16c96 100644 --- a/include/linux/mfd/tps65912.h +++ b/include/linux/mfd/tps65912.h @@ -1,16 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/ * Andrew F. Davis * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether expressed or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License version 2 for more details. - * * Based on the TPS65218 driver and the previous TPS65912 driver by * Margarita Olaya Cabrera */ -- cgit v1.2.3 From 5a729246e57eac410e4a13f5aba66ae2dc552632 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jun 2022 16:11:32 +0200 Subject: treewide: Replace GPLv2 boilerplate/reference with SPDX - gpl-2.0_320.RULE Based on the normalized pattern: this program is free software you can redistribute it and/or modify it under the terms of the gnu general public license version 2 as published by the free software foundation this program is distributed as is without any warranty of any kind whether express or implied without even the implied warranty of merchantability or fitness for a particular purpose see the gnu general public license for more details extracted by the scancode license scanner the SPDX license identifier GPL-2.0-only has been chosen to replace the boilerplate/reference. Reviewed-by: Allison Randal Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/clk/ti.h | 10 +--------- include/linux/pm_wakeirq.h | 14 ++------------ include/linux/soc/ti/ti-msgmgr.h | 10 +--------- 3 files changed, 4 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 3486f20a3753..cbfcbf186ce3 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -1,16 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * TI clock drivers support * * Copyright (C) 2013 Texas Instruments, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef __LINUX_CLK_TI_H__ #define __LINUX_CLK_TI_H__ diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h index e63a63aa47a3..dd42d16945d0 100644 --- a/include/linux/pm_wakeirq.h +++ b/include/linux/pm_wakeirq.h @@ -1,15 +1,5 @@ -/* - * pm_wakeirq.h - Device wakeirq helper functions - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* pm_wakeirq.h - Device wakeirq helper functions */ #ifndef _LINUX_PM_WAKEIRQ_H #define _LINUX_PM_WAKEIRQ_H diff --git a/include/linux/soc/ti/ti-msgmgr.h b/include/linux/soc/ti/ti-msgmgr.h index 69a8d7682c4b..543da257a5f2 100644 --- a/include/linux/soc/ti/ti-msgmgr.h +++ b/include/linux/soc/ti/ti-msgmgr.h @@ -1,17 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Texas Instruments' Message Manager * * Copyright (C) 2015-2022 Texas Instruments Incorporated - https://www.ti.com/ * Nishanth Menon - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef TI_MSGMGR_H -- cgit v1.2.3 From 35ba63b8f6d07d353159505423cfca5a4378a11c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Jun 2022 10:41:05 +0200 Subject: vme: move back to staging The VME subsystem graduated from staging into a top-level subsystem in 2012, with commit db3b9e990e75 ("Staging: VME: move VME drivers out of staging") stating: The VME device drivers have not moved out yet due to some API questions they are still working through, that should happen soon, hopefully. However, this never happened: maintenance of drivers/vme effectively stopped in 2017, with all subsequent changes being treewide cleanups. No hardware driver remains in staging, only the limited user-level access, and I just removed one of the two bridge drivers and the only remaining board. drivers/staging/vme/devices/ was recently moved to drivers/staging/vme_user/, but as the vme_user driver is the only one remaining for this subsystem, it is easier to just move the remaining three source files into this directory rather than keeping the original hierarchy. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20220606084109.4108188-3-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/vme.h | 190 ---------------------------------------------------- 1 file changed, 190 deletions(-) delete mode 100644 include/linux/vme.h (limited to 'include/linux') diff --git a/include/linux/vme.h b/include/linux/vme.h deleted file mode 100644 index b204a9b4be1b..000000000000 --- a/include/linux/vme.h +++ /dev/null @@ -1,190 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _VME_H_ -#define _VME_H_ - -/* Resource Type */ -enum vme_resource_type { - VME_MASTER, - VME_SLAVE, - VME_DMA, - VME_LM -}; - -/* VME Address Spaces */ -#define VME_A16 0x1 -#define VME_A24 0x2 -#define VME_A32 0x4 -#define VME_A64 0x8 -#define VME_CRCSR 0x10 -#define VME_USER1 0x20 -#define VME_USER2 0x40 -#define VME_USER3 0x80 -#define VME_USER4 0x100 - -#define VME_A16_MAX 0x10000ULL -#define VME_A24_MAX 0x1000000ULL -#define VME_A32_MAX 0x100000000ULL -#define VME_A64_MAX 0x10000000000000000ULL -#define VME_CRCSR_MAX 0x1000000ULL - - -/* VME Cycle Types */ -#define VME_SCT 0x1 -#define VME_BLT 0x2 -#define VME_MBLT 0x4 -#define VME_2eVME 0x8 -#define VME_2eSST 0x10 -#define VME_2eSSTB 0x20 - -#define VME_2eSST160 0x100 -#define VME_2eSST267 0x200 -#define VME_2eSST320 0x400 - -#define VME_SUPER 0x1000 -#define VME_USER 0x2000 -#define VME_PROG 0x4000 -#define VME_DATA 0x8000 - -/* VME Data Widths */ -#define VME_D8 0x1 -#define VME_D16 0x2 -#define VME_D32 0x4 -#define VME_D64 0x8 - -/* Arbitration Scheduling Modes */ -#define VME_R_ROBIN_MODE 0x1 -#define VME_PRIORITY_MODE 0x2 - -#define VME_DMA_PATTERN (1<<0) -#define VME_DMA_PCI (1<<1) -#define VME_DMA_VME (1<<2) - -#define VME_DMA_PATTERN_BYTE (1<<0) -#define VME_DMA_PATTERN_WORD (1<<1) -#define VME_DMA_PATTERN_INCREMENT (1<<2) - -#define VME_DMA_VME_TO_MEM (1<<0) -#define VME_DMA_MEM_TO_VME (1<<1) -#define VME_DMA_VME_TO_VME (1<<2) -#define VME_DMA_MEM_TO_MEM (1<<3) -#define VME_DMA_PATTERN_TO_VME (1<<4) -#define VME_DMA_PATTERN_TO_MEM (1<<5) - -struct vme_dma_attr { - u32 type; - void *private; -}; - -struct vme_resource { - enum vme_resource_type type; - struct list_head *entry; -}; - -extern struct bus_type vme_bus_type; - -/* Number of VME interrupt vectors */ -#define VME_NUM_STATUSID 256 - -/* VME_MAX_BRIDGES comes from the type of vme_bus_numbers */ -#define VME_MAX_BRIDGES (sizeof(unsigned int)*8) -#define VME_MAX_SLOTS 32 - -#define VME_SLOT_CURRENT -1 -#define VME_SLOT_ALL -2 - -/** - * struct vme_dev - Structure representing a VME device - * @num: The device number - * @bridge: Pointer to the bridge device this device is on - * @dev: Internal device structure - * @drv_list: List of devices (per driver) - * @bridge_list: List of devices (per bridge) - */ -struct vme_dev { - int num; - struct vme_bridge *bridge; - struct device dev; - struct list_head drv_list; - struct list_head bridge_list; -}; - -/** - * struct vme_driver - Structure representing a VME driver - * @name: Driver name, should be unique among VME drivers and usually the same - * as the module name. - * @match: Callback used to determine whether probe should be run. - * @probe: Callback for device binding, called when new device is detected. - * @remove: Callback, called on device removal. - * @driver: Underlying generic device driver structure. - * @devices: List of VME devices (struct vme_dev) associated with this driver. - */ -struct vme_driver { - const char *name; - int (*match)(struct vme_dev *); - int (*probe)(struct vme_dev *); - void (*remove)(struct vme_dev *); - struct device_driver driver; - struct list_head devices; -}; - -void *vme_alloc_consistent(struct vme_resource *, size_t, dma_addr_t *); -void vme_free_consistent(struct vme_resource *, size_t, void *, - dma_addr_t); - -size_t vme_get_size(struct vme_resource *); -int vme_check_window(u32 aspace, unsigned long long vme_base, - unsigned long long size); - -struct vme_resource *vme_slave_request(struct vme_dev *, u32, u32); -int vme_slave_set(struct vme_resource *, int, unsigned long long, - unsigned long long, dma_addr_t, u32, u32); -int vme_slave_get(struct vme_resource *, int *, unsigned long long *, - unsigned long long *, dma_addr_t *, u32 *, u32 *); -void vme_slave_free(struct vme_resource *); - -struct vme_resource *vme_master_request(struct vme_dev *, u32, u32, u32); -int vme_master_set(struct vme_resource *, int, unsigned long long, - unsigned long long, u32, u32, u32); -int vme_master_get(struct vme_resource *, int *, unsigned long long *, - unsigned long long *, u32 *, u32 *, u32 *); -ssize_t vme_master_read(struct vme_resource *, void *, size_t, loff_t); -ssize_t vme_master_write(struct vme_resource *, void *, size_t, loff_t); -unsigned int vme_master_rmw(struct vme_resource *, unsigned int, unsigned int, - unsigned int, loff_t); -int vme_master_mmap(struct vme_resource *resource, struct vm_area_struct *vma); -void vme_master_free(struct vme_resource *); - -struct vme_resource *vme_dma_request(struct vme_dev *, u32); -struct vme_dma_list *vme_new_dma_list(struct vme_resource *); -struct vme_dma_attr *vme_dma_pattern_attribute(u32, u32); -struct vme_dma_attr *vme_dma_pci_attribute(dma_addr_t); -struct vme_dma_attr *vme_dma_vme_attribute(unsigned long long, u32, u32, u32); -void vme_dma_free_attribute(struct vme_dma_attr *); -int vme_dma_list_add(struct vme_dma_list *, struct vme_dma_attr *, - struct vme_dma_attr *, size_t); -int vme_dma_list_exec(struct vme_dma_list *); -int vme_dma_list_free(struct vme_dma_list *); -int vme_dma_free(struct vme_resource *); - -int vme_irq_request(struct vme_dev *, int, int, - void (*callback)(int, int, void *), void *); -void vme_irq_free(struct vme_dev *, int, int); -int vme_irq_generate(struct vme_dev *, int, int); - -struct vme_resource *vme_lm_request(struct vme_dev *); -int vme_lm_count(struct vme_resource *); -int vme_lm_set(struct vme_resource *, unsigned long long, u32, u32); -int vme_lm_get(struct vme_resource *, unsigned long long *, u32 *, u32 *); -int vme_lm_attach(struct vme_resource *, int, void (*callback)(void *), void *); -int vme_lm_detach(struct vme_resource *, int); -void vme_lm_free(struct vme_resource *); - -int vme_slot_num(struct vme_dev *); -int vme_bus_num(struct vme_dev *); - -int vme_register_driver(struct vme_driver *, unsigned int); -void vme_unregister_driver(struct vme_driver *); - - -#endif /* _VME_H_ */ - -- cgit v1.2.3 From 2f8c3ae8288e4a4018330ed5c4e758b878d9c555 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 1 Jun 2022 00:07:00 -0700 Subject: driver core: Add wait_for_init_devices_probe helper function Some devices might need to be probed and bound successfully before the kernel boot sequence can finish and move on to init/userspace. For example, a network interface might need to be bound to be able to mount a NFS rootfs. With fw_devlink=on by default, some of these devices might be blocked from probing because they are waiting on a optional supplier that doesn't have a driver. While fw_devlink will eventually identify such devices and unblock the probing automatically, it might be too late by the time it unblocks the probing of devices. For example, the IP4 autoconfig might timeout before fw_devlink unblocks probing of the network interface. This function is available to temporarily try and probe all devices that have a driver even if some of their suppliers haven't been added or don't have drivers. The drivers can then decide which of the suppliers are optional vs mandatory and probe the device if possible. By the time this function returns, all such "best effort" probes are guaranteed to be completed. If a device successfully probes in this mode, we delete all fw_devlink discovered dependencies of that device where the supplier hasn't yet probed successfully because they have to be optional dependencies. This also means that some devices that aren't needed for init and could have waited for their optional supplier to probe (when the supplier's module is loaded later on) would end up probing prematurely with limited functionality. So call this function only when boot would fail without it. Tested-by: Geert Uytterhoeven Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20220601070707.3946847-5-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 700453017e1c..2114d65b862f 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -129,6 +129,7 @@ extern struct device_driver *driver_find(const char *name, struct bus_type *bus); extern int driver_probe_done(void); extern void wait_for_device_probe(void); +void __init wait_for_init_devices_probe(void); /* sysfs interface for exporting driver attributes */ -- cgit v1.2.3 From 9cbffc7a59561be950ecc675d19a3d2b45202b2b Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 1 Jun 2022 00:07:05 -0700 Subject: driver core: Delete driver_deferred_probe_check_state() The function is no longer used. So delete it. Tested-by: Geert Uytterhoeven Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20220601070707.3946847-10-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 2114d65b862f..7acaabde5396 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -242,7 +242,6 @@ driver_find_device_by_acpi_dev(struct device_driver *drv, const void *adev) extern int driver_deferred_probe_timeout; void driver_deferred_probe_add(struct device *dev); -int driver_deferred_probe_check_state(struct device *dev); void driver_init(void); /** -- cgit v1.2.3 From 82b070beae1ef55b0049768c8dc91d87565bb191 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 10 Jun 2022 15:02:18 +0300 Subject: driver core: Introduce device_find_any_child() helper There are several places in the kernel where this kind of functionality is being used. Provide a generic helper for such cases. Reviewed-by: Rafael J. Wysocki Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220610120219.18988-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index dc941997795c..424b55df0272 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -905,6 +905,8 @@ struct device *device_find_child(struct device *dev, void *data, int (*match)(struct device *dev, void *data)); struct device *device_find_child_by_name(struct device *parent, const char *name); +struct device *device_find_any_child(struct device *parent); + int device_rename(struct device *dev, const char *new_name); int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order); -- cgit v1.2.3 From 73b4b53276a1d6290cd4f47dbbc885b6e6e59ac6 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Thu, 19 May 2022 09:47:28 -0400 Subject: Revert "workqueue: remove unused cancel_work()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 6417250d3f894e66a68ba1cd93676143f2376a6f. amdpgu need this function in order to prematurly stop pending reset works when another reset work already in progress. Acked-by: Tejun Heo Signed-off-by: Andrey Grodzovsky Reviewed-by: Lai Jiangshan Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/linux/workqueue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 7fee9b6cfede..9e41e1226193 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -453,6 +453,7 @@ extern int schedule_on_each_cpu(work_func_t func); int execute_in_process_context(work_func_t fn, struct execute_work *); extern bool flush_work(struct work_struct *work); +extern bool cancel_work(struct work_struct *work); extern bool cancel_work_sync(struct work_struct *work); extern bool flush_delayed_work(struct delayed_work *dwork); -- cgit v1.2.3 From 36518b6b4da7e8d4387bc19ad21e772f1060e9d7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Jun 2022 16:04:03 -0400 Subject: teach iomap_dio_rw() to suppress dsync New flag, equivalent to removal of IOCB_DSYNC from iocb flags. This mimics what btrfs is doing (and that's what btrfs will switch to). However, I'm not at all sure that we want to suppress REQ_FUA for those - all btrfs hack really cares about is suppression of generic_write_sync(). For now let's keep the existing behaviour, but I really want to hear more detailed arguments pro or contra. [folded brain fix from willy] Suggested-by: Christoph Hellwig Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- include/linux/iomap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e552097c67e0..c8622d8f064e 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -353,6 +353,12 @@ struct iomap_dio_ops { */ #define IOMAP_DIO_PARTIAL (1 << 2) +/* + * The caller will sync the write if needed; do not sync it within + * iomap_dio_rw. Overrides IOMAP_DIO_FORCE_WAIT. + */ +#define IOMAP_DIO_NOSYNC (1 << 3) + ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, unsigned int dio_flags, void *private, size_t done_before); -- cgit v1.2.3 From e87f2c26c8085dac59978dee1beeb05cef31a9dd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 May 2022 09:28:12 -0400 Subject: struct file: use anonymous union member for rcuhead and llist Once upon a time we couldn't afford anon unions; these days minimal gcc version had been raised enough to take care of that. Reviewed-by: Jan Kara Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ad5e3520fae..6a2a4906041f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -924,9 +924,9 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) struct file { union { - struct llist_node fu_llist; - struct rcu_head fu_rcuhead; - } f_u; + struct llist_node f_llist; + struct rcu_head f_rcuhead; + }; struct path f_path; struct inode *f_inode; /* cached value */ const struct file_operations *f_op; -- cgit v1.2.3 From 91b94c5d6ae55d1161633047ffeea644b110b35f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 May 2022 09:39:27 -0400 Subject: iocb: delay evaluation of IS_SYNC(...) until we want to check IOCB_DSYNC New helper to be used instead of direct checks for IOCB_DSYNC: iocb_is_dsync(iocb). Checks converted, which allows to avoid the IS_SYNC(iocb->ki_filp->f_mapping->host) part (4 cache lines) from iocb_flags() - it's checked in iocb_is_dsync() instead Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- include/linux/fs.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6a2a4906041f..380a1292f4f9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2720,6 +2720,12 @@ extern int vfs_fsync(struct file *file, int datasync); extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes, unsigned int flags); +static inline bool iocb_is_dsync(const struct kiocb *iocb) +{ + return (iocb->ki_flags & IOCB_DSYNC) || + IS_SYNC(iocb->ki_filp->f_mapping->host); +} + /* * Sync the bytes written if this was a synchronous write. Expect ki_pos * to already be updated for the write, and will return either the amount @@ -2727,7 +2733,7 @@ extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes, */ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) { - if (iocb->ki_flags & IOCB_DSYNC) { + if (iocb_is_dsync(iocb)) { int ret = vfs_fsync_range(iocb->ki_filp, iocb->ki_pos - count, iocb->ki_pos - 1, (iocb->ki_flags & IOCB_SYNC) ? 0 : 1); @@ -3262,7 +3268,7 @@ static inline int iocb_flags(struct file *file) res |= IOCB_APPEND; if (file->f_flags & O_DIRECT) res |= IOCB_DIRECT; - if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host)) + if (file->f_flags & O_DSYNC) res |= IOCB_DSYNC; if (file->f_flags & __O_SYNC) res |= IOCB_SYNC; -- cgit v1.2.3 From 164f4064ca81eefcea29f7f5dcf394f92be1d0c0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 May 2022 11:38:11 -0400 Subject: keep iocb_flags() result cached in struct file * calculate at the time we set FMODE_OPENED (do_dentry_open() for normal opens, alloc_file() for pipe()/socket()/etc.) * update when handling F_SETFL * keep in a new field - file->f_iocb_flags; since that thing is needed only before the refcount reaches zero, we can put it into the same anon union where ->f_rcuhead and ->f_llist live - those are used only after refcount reaches zero. Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- include/linux/fs.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 380a1292f4f9..c82b9d442f56 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -926,6 +926,7 @@ struct file { union { struct llist_node f_llist; struct rcu_head f_rcuhead; + unsigned int f_iocb_flags; }; struct path f_path; struct inode *f_inode; /* cached value */ @@ -2199,13 +2200,11 @@ static inline bool HAS_UNMAPPED_ID(struct user_namespace *mnt_userns, !gid_valid(i_gid_into_mnt(mnt_userns, inode)); } -static inline int iocb_flags(struct file *file); - static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { .ki_filp = filp, - .ki_flags = iocb_flags(filp), + .ki_flags = filp->f_iocb_flags, .ki_ioprio = get_current_ioprio(), }; } -- cgit v1.2.3 From 7cbb6681d7e5b88688234ad370e027a9346ff7a9 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Wed, 27 Apr 2022 12:08:04 -0700 Subject: iio: common: cros_ec_sensors: Add label attribute When sensor location is known, populate iio sysfs "label" attribute: * "accel-base" : the sensor is in the base of the convertible (2-1) device. * "accel-display" : the sensor is in the lid/display plane of the device. * "accel-camera" : the sensor is in the swivel camera subassembly. The non-standard |location| attribute is removed, the field |loc| in cros_ec_sensors_core_state is removed. It apply to standalone accelerometer as well as IMU (accelerometer + gyroscope) and sensors where the location is known (light). Signed-off-by: Gwendal Grignou Link: https://lore.kernel.org/r/20220427190804.961697-3-gwendal@chromium.org Signed-off-by: Jonathan Cameron --- include/linux/iio/common/cros_ec_sensors_core.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h index c582e1a14232..a8259c8822f5 100644 --- a/include/linux/iio/common/cros_ec_sensors_core.h +++ b/include/linux/iio/common/cros_ec_sensors_core.h @@ -41,7 +41,6 @@ typedef irqreturn_t (*cros_ec_sensors_capture_t)(int irq, void *p); * @param: motion sensor parameters structure * @resp: motion sensor response structure * @type: type of motion sensor - * @loc: location where the motion sensor is placed * @range_updated: True if the range of the sensor has been * updated. * @curr_range: If updated, the current range value. @@ -67,7 +66,6 @@ struct cros_ec_sensors_core_state { struct ec_response_motion_sense *resp; enum motionsensor_type type; - enum motionsensor_location loc; bool range_updated; int curr_range; -- cgit v1.2.3 From ccd8a9351f7b44bc1c0c8e4d39c0d6593b106fc4 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Fri, 10 Jun 2022 23:30:08 +0900 Subject: can: skb: move can_dropped_invalid_skb() and can_skb_headroom_valid() to skb.c The functions can_dropped_invalid_skb() and can_skb_headroom_valid() grew a lot over the years to a point which it does not make much sense to have them defined as static inline in header files. Move those two functions to the .c counterpart of skb.h. can_skb_headroom_valid()'s only caller being can_dropped_invalid_skb(), the declaration is removed from the header. Only can_dropped_invalid_skb() gets its symbol exported. While doing so, do a small cleanup: add brackets around the else block in can_dropped_invalid_skb(). Link: https://lore.kernel.org/all/20220610143009.323579-7-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Reported-by: kernel test robot Acked-by: Max Staudt Tested-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 59 +------------------------------------------------ 1 file changed, 1 insertion(+), 58 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index fdb22b00674a..182749e858b3 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -31,6 +31,7 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, struct canfd_frame **cfd); struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf); +bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb); /* * The struct can_skb_priv is used to transport additional information along @@ -96,64 +97,6 @@ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) return nskb; } -/* Check for outgoing skbs that have not been created by the CAN subsystem */ -static inline bool can_skb_headroom_valid(struct net_device *dev, - struct sk_buff *skb) -{ - /* af_packet creates a headroom of HH_DATA_MOD bytes which is fine */ - if (WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct can_skb_priv))) - return false; - - /* af_packet does not apply CAN skb specific settings */ - if (skb->ip_summed == CHECKSUM_NONE) { - /* init headroom */ - can_skb_prv(skb)->ifindex = dev->ifindex; - can_skb_prv(skb)->skbcnt = 0; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - - /* perform proper loopback on capable devices */ - if (dev->flags & IFF_ECHO) - skb->pkt_type = PACKET_LOOPBACK; - else - skb->pkt_type = PACKET_HOST; - - skb_reset_mac_header(skb); - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - } - - return true; -} - -/* Drop a given socketbuffer if it does not contain a valid CAN frame. */ -static inline bool can_dropped_invalid_skb(struct net_device *dev, - struct sk_buff *skb) -{ - const struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - - if (skb->protocol == htons(ETH_P_CAN)) { - if (unlikely(skb->len != CAN_MTU || - cfd->len > CAN_MAX_DLEN)) - goto inval_skb; - } else if (skb->protocol == htons(ETH_P_CANFD)) { - if (unlikely(skb->len != CANFD_MTU || - cfd->len > CANFD_MAX_DLEN)) - goto inval_skb; - } else - goto inval_skb; - - if (!can_skb_headroom_valid(dev, skb)) - goto inval_skb; - - return false; - -inval_skb: - kfree_skb(skb); - dev->stats.tx_dropped++; - return true; -} - static inline bool can_is_canfd_skb(const struct sk_buff *skb) { /* the CAN specific type of skb is identified by its data length */ -- cgit v1.2.3 From 662a60102c122e44fdaf5c826f7f415eb57d48ad Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 2 May 2022 16:20:56 +0300 Subject: usb: typec: Separate USB Power Delivery from USB Type-C Introducing a small device class for USB Power Delivery. The idea with it is that we do not mix any more USB Power Delivery information into the USB Type-C connectors only. This separation will make it possible to register USB Power Delivery devices also from other places, for example from USB Type-C Bridges (see USB Type-C Bridge Specification). The device class will not always deal with only the messages and objects that were negotiated with the partner, but instead messages and objects that can be used in the negotiation. That allows the USB PD devices to be shared and reconfigured. The ports can decide which objects are to be advertised to the partner before the contract is negotiated. It is also possible to allow the user space to make that decision if needed. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20220502132058.86236-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd.h | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/usb/typec.h | 10 ++++++++++ 2 files changed, 48 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index 96b7ff66f074..c59fb79a42e8 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -495,4 +495,42 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_P_SNK_STDBY_MW 2500 /* 2500 mW */ +#if IS_ENABLED(CONFIG_TYPEC) + +struct usb_power_delivery; + +/** + * usb_power_delivery_desc - USB Power Delivery Descriptor + * @revision: USB Power Delivery Specification Revision + * @version: USB Power Delivery Specicication Version - optional + */ +struct usb_power_delivery_desc { + u16 revision; + u16 version; +}; + +/** + * usb_power_delivery_capabilities_desc - Description of USB Power Delivery Capabilities Message + * @pdo: The Power Data Objects in the Capability Message + * @role: Power role of the capabilities + */ +struct usb_power_delivery_capabilities_desc { + u32 pdo[PDO_MAX_OBJECTS]; + enum typec_role role; +}; + +struct usb_power_delivery_capabilities * +usb_power_delivery_register_capabilities(struct usb_power_delivery *pd, + struct usb_power_delivery_capabilities_desc *desc); +void usb_power_delivery_unregister_capabilities(struct usb_power_delivery_capabilities *cap); + +struct usb_power_delivery *usb_power_delivery_register(struct device *parent, + struct usb_power_delivery_desc *desc); +void usb_power_delivery_unregister(struct usb_power_delivery *pd); + +int usb_power_delivery_link_device(struct usb_power_delivery *pd, struct device *dev); +void usb_power_delivery_unlink_device(struct usb_power_delivery *pd, struct device *dev); + +#endif /* CONFIG_TYPEC */ + #endif /* __LINUX_USB_PD_H */ diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index fdf737d48b3b..45e28d14ae56 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -52,6 +52,16 @@ enum typec_role { TYPEC_SOURCE, }; +static inline int is_sink(enum typec_role role) +{ + return role == TYPEC_SINK; +} + +static inline int is_source(enum typec_role role) +{ + return role == TYPEC_SOURCE; +} + enum typec_pwr_opmode { TYPEC_PWR_MODE_USB, TYPEC_PWR_MODE_1_5A, -- cgit v1.2.3 From a7cff92f0635c794e2198a69a7ff4ecfe0decab9 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 2 May 2022 16:20:57 +0300 Subject: usb: typec: USB Power Delivery helpers for ports and partners All the USB Type-C Connector Class devices are protected, so the drivers can not directly access them. This will adds a few helpers that can be used to link the ports and partners to the correct USB Power Delivery objects. For ports a new optional sysfs attribute file is also added that can be used to select the USB Power Delivery capabilities that the port will advertise to the partner. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20220502132058.86236-3-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 45e28d14ae56..7751bedcae5d 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -22,6 +22,8 @@ struct typec_altmode_ops; struct fwnode_handle; struct device; +struct usb_power_delivery; + enum typec_port_type { TYPEC_PORT_SRC, TYPEC_PORT_SNK, @@ -223,6 +225,8 @@ struct typec_partner_desc { * @pr_set: Set Power Role * @vconn_set: Source VCONN * @port_type_set: Set port type + * @pd_get: Get available USB Power Delivery Capabilities. + * @pd_set: Set USB Power Delivery Capabilities. */ struct typec_operations { int (*try_role)(struct typec_port *port, int role); @@ -231,6 +235,8 @@ struct typec_operations { int (*vconn_set)(struct typec_port *port, enum typec_role role); int (*port_type_set)(struct typec_port *port, enum typec_port_type type); + struct usb_power_delivery **(*pd_get)(struct typec_port *port); + int (*pd_set)(struct typec_port *port, struct usb_power_delivery *pd); }; enum usb_pd_svdm_ver { @@ -250,6 +256,7 @@ enum usb_pd_svdm_ver { * @accessory: Supported Accessory Modes * @fwnode: Optional fwnode of the port * @driver_data: Private pointer for driver specific info + * @pd: Optional USB Power Delivery Support * @ops: Port operations vector * * Static capabilities of a single USB Type-C port. @@ -267,6 +274,8 @@ struct typec_capability { struct fwnode_handle *fwnode; void *driver_data; + struct usb_power_delivery *pd; + const struct typec_operations *ops; }; @@ -318,4 +327,8 @@ void typec_partner_set_svdm_version(struct typec_partner *partner, enum usb_pd_svdm_ver svdm_version); int typec_get_negotiated_svdm_version(struct typec_port *port); +int typec_port_set_usb_power_delivery(struct typec_port *port, struct usb_power_delivery *pd); +int typec_partner_set_usb_power_delivery(struct typec_partner *partner, + struct usb_power_delivery *pd); + #endif /* __LINUX_USB_TYPEC_H */ -- cgit v1.2.3 From e146caf303493c4f2458173d7f1598b76a9b1396 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 6 May 2022 19:18:07 +0300 Subject: usb: Avoid extra usb SET_SEL requests when enabling link power management The host needs to tell the device the exit latencies using the SET_SEL request before device initiated link powermanagement can be enabled. The exit latency values do not change after enumeration, it's enough to set them once. So do like Windows 10 and issue the SET_SEL request once just before setting the configuration. This is also the sequence described in USB 3.2 specs "9.1.2 Bus enumeration". SET_SEL is issued once before the Set Configuration request, and won't be cleared by the Set Configuration, Set Interface or ClearFeature (STALL) requests. Only warm reset, hot reset, set Address 0 clears the exit latencies. See USB 3.2 section 9.4.14 Table 9-10 Device parameters and events Add udev->lpm_devinit_allow, and set it if SET_SEL was successful. If not set, then don't try to enable device initiated LPM We used to issue a SET_SEL request every time lpm is enabled for either U1 or U2 link states, meaning a SET_SEL was issued twice after every Set Configuration and Set Interface requests, easily accumulating to over 15 SET_SEL requets during a USB3 webcam enumeration. Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220506161807.3369439-1-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 60bee864d897..f7a9914fc97f 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -584,6 +584,7 @@ struct usb3_lpm_parameters { * @authenticated: Crypto authentication passed * @wusb: device is Wireless USB * @lpm_capable: device supports LPM + * @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM * @usb2_hw_lpm_besl_capable: device can perform USB2 hardware BESL LPM * @usb2_hw_lpm_enabled: USB2 hardware LPM is enabled @@ -666,6 +667,7 @@ struct usb_device { unsigned authenticated:1; unsigned wusb:1; unsigned lpm_capable:1; + unsigned lpm_devinit_allow:1; unsigned usb2_hw_lpm_capable:1; unsigned usb2_hw_lpm_besl_capable:1; unsigned usb2_hw_lpm_enabled:1; -- cgit v1.2.3 From c04245328dd7e915e21ac6395ffd218616e22754 Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Fri, 10 Jun 2022 17:10:17 +0800 Subject: net: make __sys_accept4_file() static __sys_accept4_file() isn't used outside of the file, make it static. As the same time, move file_flags and nofile parameters into __sys_accept4_file(). Signed-off-by: Yajun Deng Signed-off-by: David S. Miller --- include/linux/socket.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 17311ad9f9af..414b8c7bb8f7 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -428,10 +428,6 @@ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, extern int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, struct sockaddr __user *addr, int addr_len); -extern int __sys_accept4_file(struct file *file, unsigned file_flags, - struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags, - unsigned long nofile); extern struct file *do_accept(struct file *file, unsigned file_flags, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); -- cgit v1.2.3 From 0eb6584068642767baab17c2e4385a6b9c029caa Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 27 May 2022 04:34:36 +0200 Subject: platform/surface: aggregator: Allow is_ssam_device() to be used when CONFIG_SURFACE_AGGREGATOR_BUS is disabled In SSAM subsystem drivers that handle both ACPI and SSAM-native client devices, we may want to check whether we have a SSAM (native) client device. Further, we may want to do this even when instantiation thereof cannot happen due to CONFIG_SURFACE_AGGREGATOR_BUS=n. Currently, doing so causes an error due to an undefined reference error due to ssam_device_type being placed in the bus source unit. Therefore, if CONFIG_SURFACE_AGGREGATOR_BUS is not defined, simply let is_ssam_device() return false to prevent this error. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20220527023447.2460025-2-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/surface_aggregator/device.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index cc257097eb05..62b38b4487eb 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -177,6 +177,8 @@ struct ssam_device_driver { void (*remove)(struct ssam_device *sdev); }; +#ifdef CONFIG_SURFACE_AGGREGATOR_BUS + extern struct bus_type ssam_bus_type; extern const struct device_type ssam_device_type; @@ -193,6 +195,15 @@ static inline bool is_ssam_device(struct device *d) return d->type == &ssam_device_type; } +#else /* CONFIG_SURFACE_AGGREGATOR_BUS */ + +static inline bool is_ssam_device(struct device *d) +{ + return false; +} + +#endif /* CONFIG_SURFACE_AGGREGATOR_BUS */ + /** * to_ssam_device() - Casts the given device to a SSAM client device. * @d: The device to cast. -- cgit v1.2.3 From dc0393c76f378f68961587fd4f32de29fb8f0c79 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 27 May 2022 04:34:37 +0200 Subject: platform/surface: aggregator: Allow devices to be marked as hot-removed Some SSAM devices, notably the keyboard cover (keyboard and touchpad) on the Surface Pro 8, can be hot-removed. When this occurs, communication with the device may fail and time out. This timeout can unnecessarily block and slow down device removal and even cause issues when the devices are detached and re-attached quickly. Thus, communication should generally be avoided once hot-removal is detected. While we already remove a device as soon as we detect its (hot-)removal, the corresponding device driver may still attempt to communicate with the device during teardown. This is especially critical as communication failure may also extend to disabling of events, which is typically done at that stage. Add a flag to allow marking devices as hot-removed. This can then be used during client driver teardown to check if any communication attempts should be avoided. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20220527023447.2460025-3-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/surface_aggregator/device.h | 48 +++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index 62b38b4487eb..6df7c8d4e50e 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -148,17 +148,30 @@ struct ssam_device_uid { #define SSAM_SDEV(cat, tid, iid, fun) \ SSAM_DEVICE(SSAM_DOMAIN_SERIALHUB, SSAM_SSH_TC_##cat, tid, iid, fun) +/* + * enum ssam_device_flags - Flags for SSAM client devices. + * @SSAM_DEVICE_HOT_REMOVED_BIT: + * The device has been hot-removed. Further communication with it may time + * out and should be avoided. + */ +enum ssam_device_flags { + SSAM_DEVICE_HOT_REMOVED_BIT = 0, +}; + /** * struct ssam_device - SSAM client device. - * @dev: Driver model representation of the device. - * @ctrl: SSAM controller managing this device. - * @uid: UID identifying the device. + * @dev: Driver model representation of the device. + * @ctrl: SSAM controller managing this device. + * @uid: UID identifying the device. + * @flags: Device state flags, see &enum ssam_device_flags. */ struct ssam_device { struct device dev; struct ssam_controller *ctrl; struct ssam_device_uid uid; + + unsigned long flags; }; /** @@ -251,6 +264,35 @@ struct ssam_device *ssam_device_alloc(struct ssam_controller *ctrl, int ssam_device_add(struct ssam_device *sdev); void ssam_device_remove(struct ssam_device *sdev); +/** + * ssam_device_mark_hot_removed() - Mark the given device as hot-removed. + * @sdev: The device to mark as hot-removed. + * + * Mark the device as having been hot-removed. This signals drivers using the + * device that communication with the device should be avoided and may lead to + * timeouts. + */ +static inline void ssam_device_mark_hot_removed(struct ssam_device *sdev) +{ + dev_dbg(&sdev->dev, "marking device as hot-removed\n"); + set_bit(SSAM_DEVICE_HOT_REMOVED_BIT, &sdev->flags); +} + +/** + * ssam_device_is_hot_removed() - Check if the given device has been + * hot-removed. + * @sdev: The device to check. + * + * Checks if the given device has been marked as hot-removed. See + * ssam_device_mark_hot_removed() for more details. + * + * Return: Returns ``true`` if the device has been marked as hot-removed. + */ +static inline bool ssam_device_is_hot_removed(struct ssam_device *sdev) +{ + return test_bit(SSAM_DEVICE_HOT_REMOVED_BIT, &sdev->flags); +} + /** * ssam_device_get() - Increment reference count of SSAM client device. * @sdev: The device to increment the reference count of. -- cgit v1.2.3 From 5c1e88b98c60e4074796e9a05d3c674479ab1919 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 27 May 2022 04:34:38 +0200 Subject: platform/surface: aggregator: Allow notifiers to avoid communication on unregistering When SSAM client devices have been (physically) hot-removed, communication attempts with those devices may fail and time out. This can even extend to event notifiers, due to which timeouts may occur during device removal, slowing down that process. Add a parameter to the notifier unregister function that allows skipping communication with the EC to prevent this. Furthermore, add wrappers for registering and unregistering notifiers belonging to SSAM client devices that automatically check if the device has been marked as hot-removed and communication should be avoided. Note that non-SSAM client devices can generally not be hot-removed, so also add a convenience wrapper for those, defaulting to allow communication. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20220527023447.2460025-4-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/surface_aggregator/controller.h | 24 +++++++++- include/linux/surface_aggregator/device.h | 66 +++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h index 74bfdffaf7b0..50a2b4926c06 100644 --- a/include/linux/surface_aggregator/controller.h +++ b/include/linux/surface_aggregator/controller.h @@ -835,8 +835,28 @@ struct ssam_event_notifier { int ssam_notifier_register(struct ssam_controller *ctrl, struct ssam_event_notifier *n); -int ssam_notifier_unregister(struct ssam_controller *ctrl, - struct ssam_event_notifier *n); +int __ssam_notifier_unregister(struct ssam_controller *ctrl, + struct ssam_event_notifier *n, bool disable); + +/** + * ssam_notifier_unregister() - Unregister an event notifier. + * @ctrl: The controller the notifier has been registered on. + * @n: The event notifier to unregister. + * + * Unregister an event notifier. Decrement the usage counter of the associated + * SAM event if the notifier is not marked as an observer. If the usage counter + * reaches zero, the event will be disabled. + * + * Return: Returns zero on success, %-ENOENT if the given notifier block has + * not been registered on the controller. If the given notifier block was the + * last one associated with its specific event, returns the status of the + * event-disable EC-command. + */ +static inline int ssam_notifier_unregister(struct ssam_controller *ctrl, + struct ssam_event_notifier *n) +{ + return __ssam_notifier_unregister(ctrl, n, true); +} int ssam_controller_event_enable(struct ssam_controller *ctrl, struct ssam_event_registry reg, diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index 6df7c8d4e50e..c418f7f2732d 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -483,4 +483,70 @@ static inline void ssam_remove_clients(struct device *dev) {} sdev->uid.instance, ret); \ } + +/* -- Helpers for client-device notifiers. ---------------------------------- */ + +/** + * ssam_device_notifier_register() - Register an event notifier for the + * specified client device. + * @sdev: The device the notifier should be registered on. + * @n: The event notifier to register. + * + * Register an event notifier. Increment the usage counter of the associated + * SAM event if the notifier is not marked as an observer. If the event is not + * marked as an observer and is currently not enabled, it will be enabled + * during this call. If the notifier is marked as an observer, no attempt will + * be made at enabling any event and no reference count will be modified. + * + * Notifiers marked as observers do not need to be associated with one specific + * event, i.e. as long as no event matching is performed, only the event target + * category needs to be set. + * + * Return: Returns zero on success, %-ENOSPC if there have already been + * %INT_MAX notifiers for the event ID/type associated with the notifier block + * registered, %-ENOMEM if the corresponding event entry could not be + * allocated, %-ENODEV if the device is marked as hot-removed. If this is the + * first time that a notifier block is registered for the specific associated + * event, returns the status of the event-enable EC-command. + */ +static inline int ssam_device_notifier_register(struct ssam_device *sdev, + struct ssam_event_notifier *n) +{ + /* + * Note that this check does not provide any guarantees whatsoever as + * hot-removal could happen at any point and we can't protect against + * it. Nevertheless, if we can detect hot-removal, bail early to avoid + * communication timeouts. + */ + if (ssam_device_is_hot_removed(sdev)) + return -ENODEV; + + return ssam_notifier_register(sdev->ctrl, n); +} + +/** + * ssam_device_notifier_unregister() - Unregister an event notifier for the + * specified client device. + * @sdev: The device the notifier has been registered on. + * @n: The event notifier to unregister. + * + * Unregister an event notifier. Decrement the usage counter of the associated + * SAM event if the notifier is not marked as an observer. If the usage counter + * reaches zero, the event will be disabled. + * + * In case the device has been marked as hot-removed, the event will not be + * disabled on the EC, as in those cases any attempt at doing so may time out. + * + * Return: Returns zero on success, %-ENOENT if the given notifier block has + * not been registered on the controller. If the given notifier block was the + * last one associated with its specific event, returns the status of the + * event-disable EC-command. + */ +static inline int ssam_device_notifier_unregister(struct ssam_device *sdev, + struct ssam_event_notifier *n) +{ + return __ssam_notifier_unregister(sdev->ctrl, n, + !ssam_device_is_hot_removed(sdev)); +} + #endif /* _LINUX_SURFACE_AGGREGATOR_DEVICE_H */ -- cgit v1.2.3 From 25e2ca7301bd3ca5a63a6be41d729eb42202bc21 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 27 May 2022 04:34:43 +0200 Subject: platform/surface: aggregator: Add comment for KIP subsystem category The KIP subsystem (full name unknown, abbreviation has been obtained through reverse engineering) handles detachable peripherals such as the keyboard cover on the Surface Pro X and Surface Pro 8. It is currently not entirely clear what this subsystem entails, but at the very least it provides event notifications for when the keyboard cover on the Surface Pro X and Surface Pro 8 have been detached or re-attached, as well as the state that the keyboard cover is currently in (e.g. folded-back, folded laptop-like, closed, etc.). Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20220527023447.2460025-9-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/surface_aggregator/serial_hub.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/serial_hub.h b/include/linux/surface_aggregator/serial_hub.h index c3de43edcffa..26b95ec12733 100644 --- a/include/linux/surface_aggregator/serial_hub.h +++ b/include/linux/surface_aggregator/serial_hub.h @@ -306,7 +306,7 @@ enum ssam_ssh_tc { SSAM_SSH_TC_LPC = 0x0b, SSAM_SSH_TC_TCL = 0x0c, SSAM_SSH_TC_SFL = 0x0d, - SSAM_SSH_TC_KIP = 0x0e, + SSAM_SSH_TC_KIP = 0x0e, /* Manages detachable peripherals (Pro X/8 keyboard cover) */ SSAM_SSH_TC_EXT = 0x0f, SSAM_SSH_TC_BLD = 0x10, SSAM_SSH_TC_BAS = 0x11, /* Detachment system (Surface Book 2/3). */ -- cgit v1.2.3 From 546093206ba16623c18e344630dbfdd71a4327e0 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Sat, 11 Jun 2022 17:23:04 +0800 Subject: audit: make is_audit_feature_set() static Currently nobody use is_audit_feature_set() outside this file, so make it static. Signed-off-by: Xiu Jianfeng Signed-off-by: Paul Moore --- include/linux/audit.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index cece70231138..00f7a80f1a3e 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -119,8 +119,6 @@ enum audit_nfcfgop { AUDIT_NFT_OP_INVALID, }; -extern int is_audit_feature_set(int which); - extern int __init audit_register_class(int class, unsigned *list); extern int audit_classify_syscall(int abi, unsigned syscall); extern int audit_classify_arch(int arch); -- cgit v1.2.3 From 795e10b450a88b2943a241a5eaa6e86ae4f47694 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 7 Jun 2022 15:47:43 +0300 Subject: net/mlx5: Introduce header-modify-pattern ICM properties Added new fields for device memory capabilities, in order to support creation of ICM memory for modify header patterns. Signed-off-by: Erez Shitrit Signed-off-by: Yevgeny Kliteynik Signed-off-by: Leon Romanovsky Acked-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fd7d083a34d3..789d64400744 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1086,11 +1086,14 @@ struct mlx5_ifc_device_mem_cap_bits { u8 log_sw_icm_alloc_granularity[0x6]; u8 log_steering_sw_icm_size[0x8]; - u8 reserved_at_120[0x20]; + u8 reserved_at_120[0x18]; + u8 log_header_modify_pattern_sw_icm_size[0x8]; u8 header_modify_sw_icm_start_address[0x40]; - u8 reserved_at_180[0x80]; + u8 reserved_at_180[0x40]; + + u8 header_modify_pattern_sw_icm_start_address[0x40]; u8 memic_operations[0x20]; -- cgit v1.2.3 From 667658364b2056f344a2769280b939a5e45610be Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 7 Jun 2022 15:47:44 +0300 Subject: net/mlx5: Manage ICM of type modify-header pattern Added support for managing new type of ICM for devices that support sw_owner_v2. Signed-off-by: Erez Shitrit Signed-off-by: Yevgeny Kliteynik Signed-off-by: Leon Romanovsky Acked-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5040cd774c5a..76d7661e3e63 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -676,6 +676,7 @@ struct mlx5e_resources { enum mlx5_sw_icm_type { MLX5_SW_ICM_TYPE_STEERING, MLX5_SW_ICM_TYPE_HEADER_MODIFY, + MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN, }; #define MLX5_MAX_RESERVED_GIDS 8 -- cgit v1.2.3 From f5d23ee137e51b4e5cd5d263b144d5e6719f6e52 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Wed, 8 Jun 2022 13:04:47 -0700 Subject: net/mlx5: Add IFC bits and enums for flow meter Add/extend structure layouts and defines for flow meter. Signed-off-by: Jianbo Liu Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 1 + include/linux/mlx5/mlx5_ifc.h | 114 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 111 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 604b85dd770a..15ac02eeed4f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -455,6 +455,7 @@ enum { MLX5_OPCODE_UMR = 0x25, + MLX5_OPCODE_ACCESS_ASO = 0x2d, }; enum { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 789d64400744..91872afb2bfe 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -442,7 +442,9 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 max_modify_header_actions[0x8]; u8 max_ft_level[0x8]; - u8 reserved_at_40[0x20]; + u8 reserved_at_40[0x6]; + u8 execute_aso[0x1]; + u8 reserved_at_47[0x19]; u8 reserved_at_60[0x2]; u8 reformat_insert[0x1]; @@ -940,7 +942,17 @@ struct mlx5_ifc_qos_cap_bits { u8 max_tsar_bw_share[0x20]; - u8 reserved_at_100[0x700]; + u8 reserved_at_100[0x20]; + + u8 reserved_at_120[0x3]; + u8 log_meter_aso_granularity[0x5]; + u8 reserved_at_128[0x3]; + u8 log_meter_aso_max_alloc[0x5]; + u8 reserved_at_130[0x3]; + u8 log_max_num_meter_aso[0x5]; + u8 reserved_at_138[0x8]; + + u8 reserved_at_140[0x6c0]; }; struct mlx5_ifc_debug_cap_bits { @@ -3280,6 +3292,7 @@ enum { MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800, MLX5_FLOW_CONTEXT_ACTION_IPSEC_DECRYPT = 0x1000, MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT = 0x2000, + MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO = 0x4000, }; enum { @@ -3295,6 +3308,38 @@ struct mlx5_ifc_vlan_bits { u8 vid[0xc]; }; +enum { + MLX5_FLOW_METER_COLOR_RED = 0x0, + MLX5_FLOW_METER_COLOR_YELLOW = 0x1, + MLX5_FLOW_METER_COLOR_GREEN = 0x2, + MLX5_FLOW_METER_COLOR_UNDEFINED = 0x3, +}; + +enum { + MLX5_EXE_ASO_FLOW_METER = 0x2, +}; + +struct mlx5_ifc_exe_aso_ctrl_flow_meter_bits { + u8 return_reg_id[0x4]; + u8 aso_type[0x4]; + u8 reserved_at_8[0x14]; + u8 action[0x1]; + u8 init_color[0x2]; + u8 meter_id[0x1]; +}; + +union mlx5_ifc_exe_aso_ctrl { + struct mlx5_ifc_exe_aso_ctrl_flow_meter_bits exe_aso_ctrl_flow_meter; +}; + +struct mlx5_ifc_execute_aso_bits { + u8 valid[0x1]; + u8 reserved_at_1[0x7]; + u8 aso_object_id[0x18]; + + union mlx5_ifc_exe_aso_ctrl exe_aso_ctrl; +}; + struct mlx5_ifc_flow_context_bits { struct mlx5_ifc_vlan_bits push_vlan; @@ -3326,7 +3371,9 @@ struct mlx5_ifc_flow_context_bits { struct mlx5_ifc_fte_match_param_bits match_value; - u8 reserved_at_1200[0x600]; + struct mlx5_ifc_execute_aso_bits execute_aso[4]; + + u8 reserved_at_1300[0x500]; union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[]; }; @@ -5973,7 +6020,9 @@ struct mlx5_ifc_general_obj_in_cmd_hdr_bits { u8 obj_id[0x20]; - u8 reserved_at_60[0x20]; + u8 reserved_at_60[0x3]; + u8 log_obj_range[0x5]; + u8 reserved_at_68[0x18]; }; struct mlx5_ifc_general_obj_out_cmd_hdr_bits { @@ -11373,12 +11422,14 @@ enum { MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT_ULL(0xc), MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT_ULL(0x13), MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT_ULL(0x20), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = BIT_ULL(0x24), }; enum { MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc, MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13, MLX5_GENERAL_OBJECT_TYPES_SAMPLER = 0x20, + MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24, }; enum { @@ -11451,6 +11502,61 @@ struct mlx5_ifc_create_encryption_key_in_bits { struct mlx5_ifc_encryption_key_obj_bits encryption_key_object; }; +enum { + MLX5_FLOW_METER_MODE_BYTES_IP_LENGTH = 0x0, + MLX5_FLOW_METER_MODE_BYTES_CALC_WITH_L2 = 0x1, + MLX5_FLOW_METER_MODE_BYTES_CALC_WITH_L2_IPG = 0x2, + MLX5_FLOW_METER_MODE_NUM_PACKETS = 0x3, +}; + +struct mlx5_ifc_flow_meter_parameters_bits { + u8 valid[0x1]; + u8 bucket_overflow[0x1]; + u8 start_color[0x2]; + u8 both_buckets_on_green[0x1]; + u8 reserved_at_5[0x1]; + u8 meter_mode[0x2]; + u8 reserved_at_8[0x18]; + + u8 reserved_at_20[0x20]; + + u8 reserved_at_40[0x3]; + u8 cbs_exponent[0x5]; + u8 cbs_mantissa[0x8]; + u8 reserved_at_50[0x3]; + u8 cir_exponent[0x5]; + u8 cir_mantissa[0x8]; + + u8 reserved_at_60[0x20]; + + u8 reserved_at_80[0x3]; + u8 ebs_exponent[0x5]; + u8 ebs_mantissa[0x8]; + u8 reserved_at_90[0x3]; + u8 eir_exponent[0x5]; + u8 eir_mantissa[0x8]; + + u8 reserved_at_a0[0x60]; +}; + +struct mlx5_ifc_flow_meter_aso_obj_bits { + u8 modify_field_select[0x40]; + + u8 reserved_at_40[0x40]; + + u8 reserved_at_80[0x8]; + u8 meter_aso_access_pd[0x18]; + + u8 reserved_at_a0[0x160]; + + struct mlx5_ifc_flow_meter_parameters_bits flow_meter_parameters[2]; +}; + +struct mlx5_ifc_create_flow_meter_aso_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_flow_meter_aso_obj_bits flow_meter_aso_obj; +}; + struct mlx5_ifc_sampler_obj_bits { u8 modify_field_select[0x40]; -- cgit v1.2.3 From 3e94e61bd44d90070dcda53b647fdc826097ef26 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 8 Jun 2022 13:04:48 -0700 Subject: net/mlx5: Add HW definitions of vport debug counters total_q_under_processor_handle - number of queues in error state due to an async error or errored command. send_queue_priority_update_flow - number of QP/SQ priority/SL update events. cq_overrun - number of times CQ entered an error state due to an overflow. async_eq_overrun -number of time an EQ mapped to async events was overrun. comp_eq_overrun - number of time an EQ mapped to completion events was overrun. quota_exceeded_command - number of commands issued and failed due to quota exceeded. invalid_command - number of commands issued and failed dues to any reason other than quota exceeded. Signed-off-by: Saeed Mahameed Signed-off-by: Michael Guralnik Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 91872afb2bfe..f678ad88a7d5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1441,7 +1441,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_120[0xa]; u8 log_max_ra_req_dc[0x6]; - u8 reserved_at_130[0xa]; + u8 reserved_at_130[0x9]; + u8 vnic_env_cq_overrun[0x1]; u8 log_max_ra_res_dc[0x6]; u8 reserved_at_140[0x5]; @@ -1636,7 +1637,11 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 nic_receive_steering_discard[0x1]; u8 receive_discard_vport_down[0x1]; u8 transmit_discard_vport_down[0x1]; - u8 reserved_at_343[0x5]; + u8 eq_overrun_count[0x1]; + u8 reserved_at_344[0x1]; + u8 invalid_command_count[0x1]; + u8 quota_exceeded_count[0x1]; + u8 reserved_at_347[0x1]; u8 log_max_flow_counter_bulk[0x8]; u8 max_flow_counter_15_0[0x10]; @@ -3441,11 +3446,21 @@ struct mlx5_ifc_vnic_diagnostic_statistics_bits { u8 transmit_discard_vport_down[0x40]; - u8 reserved_at_140[0xa0]; + u8 async_eq_overrun[0x20]; + + u8 comp_eq_overrun[0x20]; + + u8 reserved_at_180[0x20]; + + u8 invalid_command[0x20]; + + u8 quota_exceeded_command[0x20]; u8 internal_rq_out_of_buffer[0x20]; - u8 reserved_at_200[0xe00]; + u8 cq_overrun[0x20]; + + u8 reserved_at_220[0xde0]; }; struct mlx5_ifc_traffic_counter_bits { -- cgit v1.2.3 From 91707779a481aab9c7f1d7a5ea3033ce87dc4fd6 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Wed, 8 Jun 2022 13:04:49 -0700 Subject: net/mlx5: Add support EXECUTE_ASO action for flow entry Attach flow meter to FTE with object id and index. Use metadata register C5 to store the packet color meter result. Signed-off-by: Jianbo Liu Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 8135713b0d2d..ece3e35622d7 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -212,6 +212,19 @@ struct mlx5_flow_group * mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in); void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); +struct mlx5_exe_aso { + u32 object_id; + u8 type; + u8 return_reg_id; + union { + u32 ctrl_data; + struct { + u8 meter_idx; + u8 init_color; + } flow_meter; + }; +}; + struct mlx5_fs_vlan { u16 ethtype; u16 vid; @@ -237,6 +250,7 @@ struct mlx5_flow_act { struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH]; struct ib_counters *counters; struct mlx5_flow_group *fg; + struct mlx5_exe_aso exe_aso; }; #define MLX5_DECLARE_FLOW_ACT(name) \ -- cgit v1.2.3 From d107ba1f7c067b08eb4b8ca7c51187fb7c4d97f2 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 8 Jun 2022 13:04:51 -0700 Subject: net/mlx5: Remove not used MLX5_CAP_BITS_RW_MASK Remove not used MLX5_CAP_BITS_RW_MASK. While at it, remove CAP_MASK, MLX5_CAP_OFF_CMDIF_CSUM and MLX5_DEV_CAP_FLAG_*, since MLX5_CAP_BITS_RW_MASK was their only user. Signed-off-by: Shay Drory Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 15ac02eeed4f..95a4fa0fd40a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -386,21 +386,6 @@ enum { MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG = 9, }; -enum { - MLX5_DEV_CAP_FLAG_XRC = 1LL << 3, - MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8, - MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9, - MLX5_DEV_CAP_FLAG_APM = 1LL << 17, - MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18, - MLX5_DEV_CAP_FLAG_BLOCK_MCAST = 1LL << 23, - MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24, - MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29, - MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30, - MLX5_DEV_CAP_FLAG_DCT = 1LL << 37, - MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40, - MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46, -}; - enum { MLX5_ROCE_VERSION_1 = 0, MLX5_ROCE_VERSION_2 = 2, @@ -496,10 +481,6 @@ enum { MLX5_MAX_PAGE_SHIFT = 31 }; -enum { - MLX5_CAP_OFF_CMDIF_CSUM = 46, -}; - enum { /* * Max wqe size for rdma read is 512 bytes, so this -- cgit v1.2.3 From cdcdce948d64139aea1c6dfea4b04f5c8ad2784e Mon Sep 17 00:00:00 2001 From: Ofer Levi Date: Wed, 8 Jun 2022 13:04:52 -0700 Subject: net/mlx5: Add bits and fields to support enhanced CQE compression Expose ifc bits and add needed structure fields and methods to support enhanced CQE compression feature. The enhanced CQE compression feature improves cpu utiliziation with better packet latency from nic to host. Signed-off-by: Ofer Levi Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 16 +++++++++++++++- include/linux/mlx5/mlx5_ifc.h | 7 +++++-- 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 95a4fa0fd40a..b5f58fd37a0f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -822,7 +822,10 @@ struct mlx5_cqe64 { __be32 timestamp_l; __be32 sop_drop_qpn; __be16 wqe_counter; - u8 signature; + union { + u8 signature; + u8 validity_iteration_count; + }; u8 op_own; }; @@ -854,6 +857,11 @@ enum { MLX5_CQE_FORMAT_CSUM_STRIDX = 0x3, }; +enum { + MLX5_CQE_COMPRESS_LAYOUT_BASIC = 0, + MLX5_CQE_COMPRESS_LAYOUT_ENHANCED = 1, +}; + #define MLX5_MINI_CQE_ARRAY_SIZE 8 static inline u8 mlx5_get_cqe_format(struct mlx5_cqe64 *cqe) @@ -866,6 +874,12 @@ static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe) return cqe->op_own >> 4; } +static inline u8 get_cqe_enhanced_num_mini_cqes(struct mlx5_cqe64 *cqe) +{ + /* num_of_mini_cqes is zero based */ + return get_cqe_opcode(cqe) + 1; +} + static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { return (cqe->lro.tcppsh_abort_dupack >> 6) & 1; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f678ad88a7d5..8e87eb47f9dc 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1739,7 +1739,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_dci_errored_streams[0x5]; u8 reserved_at_598[0x8]; - u8 reserved_at_5a0[0x13]; + u8 reserved_at_5a0[0x10]; + u8 enhanced_cqe_compression[0x1]; + u8 reserved_at_5b1[0x2]; u8 log_max_dek[0x5]; u8 reserved_at_5b8[0x4]; u8 mini_cqe_resp_stride_index[0x1]; @@ -4139,7 +4141,8 @@ struct mlx5_ifc_cqc_bits { u8 cqe_comp_en[0x1]; u8 mini_cqe_res_format[0x2]; u8 st[0x4]; - u8 reserved_at_18[0x8]; + u8 reserved_at_18[0x6]; + u8 cqe_compression_layout[0x2]; u8 reserved_at_20[0x20]; -- cgit v1.2.3 From 9822bb87cee12a9d1e3321b652ba537af1f174aa Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 20 Feb 2022 16:33:27 +0000 Subject: iio: core: drop iio_get_time_res() This function was introduced with the ability to pick a clock. There are no upstream users so presumably it isn't as obviously useful as it seemed at the time. Hence drop it. Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20220220163327.424696-1-jic23@kernel.org --- include/linux/iio/iio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 233d2e6b7721..f6ea2ed99457 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -313,7 +313,6 @@ static inline bool iio_channel_has_available(const struct iio_chan_spec *chan, } s64 iio_get_time_ns(const struct iio_dev *indio_dev); -unsigned int iio_get_time_res(const struct iio_dev *indio_dev); /* * Device operating modes -- cgit v1.2.3 From 12c4efe3509b8018e76ea3ebda8227cb53bf5887 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 8 May 2022 18:55:41 +0100 Subject: iio: core: Fix IIO_ALIGN and rename as it was not sufficiently large MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Discussion of the series: https://lore.kernel.org/all/20220405135758.774016-1-catalin.marinas@arm.com/ mm, arm64: Reduce ARCH_KMALLOC_MINALIGN brought to my attention that our current IIO usage of L1CACHE_ALIGN is insufficient as their are Arm platforms out their with non coherent DMA and larger cache lines at at higher levels of their cache hierarchy. Rename the define to make it's purpose more explicit. It will be used much more widely going forwards (to replace incorrect ____cacheline_aligned markings. Note this patch will greatly reduce the padding on some architectures that have smaller requirements for DMA safe buffers. The history of changing values of ARCH_KMALLOC_MINALIGN via ARCH_DMA_MINALIGN on arm64 is rather complex. I'm not tagging this as fixing a particular patch from that route as it's not clear what to tag. Most recently a change to bring them back inline was reverted because of some Qualcomm Kryo cores with an L2 cache with 128-byte lines sitting above the point of coherency. c1132702c71f Revert "arm64: cache: Lower ARCH_DMA_MINALIGN to 64 (L1_CACHE_BYTES)" That reverts: 65688d2a05de arm64: cache: Lower ARCH_DMA_MINALIGN to 64 (L1_CACHE_BYTES) which refers to the change originally being motivated by Thunder x1 performance rather than correctness. Fixes: 6f7c8ee585e9d ("staging:iio: Add ability to allocate private data space to iio_allocate_device") Signed-off-by: Jonathan Cameron Acked-by: Nuno Sá Link: https://lore.kernel.org/r/20220508175712.647246-2-jic23@kernel.org --- include/linux/iio/iio.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index f6ea2ed99457..4e21a82b3756 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -9,6 +9,7 @@ #include #include +#include #include #include /* IIO TODO LIST */ @@ -708,8 +709,13 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) return dev_get_drvdata(&indio_dev->dev); } -/* Can we make this smaller? */ -#define IIO_ALIGN L1_CACHE_BYTES +/* + * Used to ensure the iio_priv() structure is aligned to allow that structure + * to in turn include IIO_DMA_MINALIGN'd elements such as buffers which + * must not share cachelines with the rest of the structure, thus making + * them safe for use with non-coherent DMA. + */ +#define IIO_DMA_MINALIGN ARCH_KMALLOC_MINALIGN struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv); /* The information at the returned address is guaranteed to be cacheline aligned */ -- cgit v1.2.3 From 6dbdc9f35360d4ef4704462c265f63b32fcb5354 Mon Sep 17 00:00:00 2001 From: Hongyi Lu Date: Mon, 13 Jun 2022 21:16:33 +0000 Subject: bpf: Fix spelling in bpf_verifier.h Minor spelling fix spotted in bpf_verifier.h. Spelling is no big deal, but it is still an improvement when reading through the code. Signed-off-by: Hongyi Lu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220613211633.58647-1-jwnhy0@gmail.com --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e8439f6cbe57..3930c963fa67 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -299,7 +299,7 @@ struct bpf_verifier_state { * If is_state_visited() sees a state with branches > 0 it means * there is a loop. If such state is exactly equal to the current state * it's an infinite loop. Note states_equal() checks for states - * equvalency, so two states being 'states_equal' does not mean + * equivalency, so two states being 'states_equal' does not mean * infinite loop. The exact comparison is provided by * states_maybe_looping() function. It's a stronger pre-check and * much faster than states_equal(). -- cgit v1.2.3 From 064520e8aeaa2569f6504a50a37ac801b73656bc Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 15 Jun 2022 16:43:48 +0800 Subject: ASoC: SOF: Intel: Add support for MeteorLake (MTL) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add platform abstraction for the Meteor Lake platform. This platform has significant differences compared to the TGL/ADL generation: it relies on new hardware using the code name 'ACE' and only supports the INTEL_IPC4 protocol and firmware architecture based on the Zephyr RTOS Co-developed-by: Ranjani Sridharan Signed-off-by: Ranjani Sridharan Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Rander Wang Link: https://lore.kernel.org/r/20220615084348.3489-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/linux/soundwire/sdw_intel.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 67e0d3e750b5..b5b489ea1aef 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -9,6 +9,8 @@ #define SDW_SHIM_BASE 0x2C000 #define SDW_ALH_BASE 0x2C800 +#define SDW_SHIM_BASE_ACE 0x38000 +#define SDW_ALH_BASE_ACE 0x24000 #define SDW_LINK_BASE 0x30000 #define SDW_LINK_SIZE 0x10000 -- cgit v1.2.3 From 6365a1935c5151455812e96d8de434c551dc0d98 Mon Sep 17 00:00:00 2001 From: Ma Wupeng Date: Tue, 14 Jun 2022 17:21:52 +0800 Subject: efi: Make code to find mirrored memory ranges generic Commit b05b9f5f9dcf ("x86, mirror: x86 enabling - find mirrored memory ranges") introduce the efi_find_mirror() function on x86. In order to reuse the API we make it public. Arm64 can support mirrored memory too, so function efi_find_mirror() is added to efi_init() to this support for arm64. Since efi_init() is shared by ARM, arm64 and riscv, this patch will bring mirror memory support for these architectures, but this support is only tested in arm64. Signed-off-by: Ma Wupeng Link: https://lore.kernel.org/r/20220614092156.1972846-2-mawupeng1@huawei.com [ardb: fix subject to better reflect the payload] Acked-by: Mike Rapoport Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 7d9b0bb47eb3..53f64c14a525 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -872,6 +872,7 @@ static inline bool efi_rt_services_supported(unsigned int mask) { return (efi.runtime_supported_mask & mask) == mask; } +extern void efi_find_mirror(void); #else static inline bool efi_enabled(int feature) { @@ -889,6 +890,8 @@ static inline bool efi_rt_services_supported(unsigned int mask) { return false; } + +static inline void efi_find_mirror(void) {} #endif extern int efi_status_to_err(efi_status_t status); -- cgit v1.2.3 From f67be8b7ee90c292948c3ec6395673963cccaee6 Mon Sep 17 00:00:00 2001 From: Li Chen Date: Sun, 22 May 2022 20:26:58 -0700 Subject: regmap: provide regmap_field helpers for simple bit operations We have set/clear/test operations for regmap, but not for regmap_field yet. So let's introduce regmap_field helpers too. In many instances regmap_field_update_bits() is used for simple bit setting and clearing. In these cases the last argument is redundant and we can hide it with a static inline function. This adds three new helpers for simple bit operations: set_bits, clear_bits and test_bits (the last one defined as a regular function). Signed-off-by: Li Chen Link: https://lore.kernel.org/r/180eef422c3.deae9cd960729.8518395646822099769@zohomail.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 8952fa3d0d59..d5b08f4f0dc0 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1336,6 +1336,22 @@ static inline int regmap_field_update_bits(struct regmap_field *field, NULL, false, false); } +static inline int regmap_field_set_bits(struct regmap_field *field, + unsigned int bits) +{ + return regmap_field_update_bits_base(field, bits, bits, NULL, false, + false); +} + +static inline int regmap_field_clear_bits(struct regmap_field *field, + unsigned int bits) +{ + return regmap_field_update_bits_base(field, bits, 0, NULL, false, + false); +} + +int regmap_field_test_bits(struct regmap_field *field, unsigned int bits); + static inline int regmap_field_force_update_bits(struct regmap_field *field, unsigned int mask, unsigned int val) @@ -1769,6 +1785,27 @@ regmap_field_force_update_bits(struct regmap_field *field, return -EINVAL; } +static inline int regmap_field_set_bits(struct regmap_field *field, + unsigned int bits) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int regmap_field_clear_bits(struct regmap_field *field, + unsigned int bits) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int regmap_field_test_bits(struct regmap_field *field, + unsigned int bits) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + static inline int regmap_fields_write(struct regmap_field *field, unsigned int id, unsigned int val) { -- cgit v1.2.3 From 003cbe046171596809c2f37dc07e69df1b4d9f95 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Wed, 1 Jun 2022 20:58:55 +0530 Subject: pinctrl: Add pingroup and define PINCTRL_PINGROUP Add 'struct pingroup' to represent pingroup and 'PINCTRL_PINGROUP' macro for inline use. Both are used to manage and represent larger number of pingroups. Signed-off-by: Basavaraj Natikar Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220601152900.1012813-2-Basavaraj.Natikar@amd.com Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinctrl.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 70b45d28e7a9..487117ccb1bc 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -26,6 +26,26 @@ struct pin_config_item; struct gpio_chip; struct device_node; +/** + * struct pingroup - provides information on pingroup + * @name: a name for pingroup + * @pins: an array of pins in the pingroup + * @npins: number of pins in the pingroup + */ +struct pingroup { + const char *name; + const unsigned int *pins; + size_t npins; +}; + +/* Convenience macro to define a single named or anonymous pingroup */ +#define PINCTRL_PINGROUP(_name, _pins, _npins) \ +(struct pingroup){ \ + .name = _name, \ + .pins = _pins, \ + .npins = _npins, \ +} + /** * struct pinctrl_pin_desc - boards/machines provide information on their * pins, pads or other muxable units in this struct -- cgit v1.2.3 From 10f09307199da274584b8170a41228ca6dfed6d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Fri, 10 Jun 2022 10:45:30 +0200 Subject: iio: core: drop of.h from iio.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no reason to include OF as we only need to forward declare 'of_phandle_args'. Previously, some drivers were actually relying on this for some headers (those were already fixed). Signed-off-by: Nuno Sá Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220610084545.547700-20-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 4e21a82b3756..d9b4a9ca9a0f 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -11,13 +11,14 @@ #include #include #include -#include /* IIO TODO LIST */ /* * Provide means of adjusting timer accuracy. * Currently assumes nano seconds. */ +struct of_phandle_args; + enum iio_shared_by { IIO_SEPARATE, IIO_SHARED_BY_TYPE, -- cgit v1.2.3 From af89cd45603483135bdd238fcb3fa871155a0ae1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 20 May 2022 09:57:34 +0200 Subject: clk: Improve documentation for devm_clk_get() and its optional variant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make use of "Context:" and "Return:". Mention that the clk is not to be expected to be prepared, previously only not being enabled was mentioned which probably dates from the times when the concept of clk preparation wasn't invented yet. Also describe devm_clk_get_optional() fully instead of just referencing devm_clk_get(). Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20220520075737.758761-2-u.kleine-koenig@pengutronix.de Reviewed-by: Russell King (Oracle) Signed-off-by: Stephen Boyd --- include/linux/clk.h | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index 39faa54efe88..c8fc398d2ad7 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -443,15 +443,16 @@ int __must_check devm_clk_bulk_get_all(struct device *dev, * @dev: device for clock "consumer" * @id: clock consumer ID * - * Returns a struct clk corresponding to the clock producer, or + * Context: May sleep. + * + * Return: a struct clk corresponding to the clock producer, or * valid IS_ERR() condition containing errno. The implementation * uses @dev and @id to determine the clock consumer, and thereby * the clock producer. (IOW, @id may be identical strings, but * clk_get may return different clock producers depending on @dev.) * - * Drivers must assume that the clock source is not enabled. - * - * devm_clk_get should not be called from within interrupt context. + * Drivers must assume that the clock source is neither prepared nor + * enabled. * * The clock will automatically be freed when the device is unbound * from the bus. @@ -464,8 +465,20 @@ struct clk *devm_clk_get(struct device *dev, const char *id); * @dev: device for clock "consumer" * @id: clock consumer ID * - * Behaves the same as devm_clk_get() except where there is no clock producer. - * In this case, instead of returning -ENOENT, the function returns NULL. + * Context: May sleep. + * + * Return: a struct clk corresponding to the clock producer, or + * valid IS_ERR() condition containing errno. The implementation + * uses @dev and @id to determine the clock consumer, and thereby + * the clock producer. If no such clk is found, it returns NULL + * which serves as a dummy clk. That's the only difference compared + * to devm_clk_get(). + * + * Drivers must assume that the clock source is neither prepared nor + * enabled. + * + * The clock will automatically be freed when the device is unbound + * from the bus. */ struct clk *devm_clk_get_optional(struct device *dev, const char *id); -- cgit v1.2.3 From 7ef9651e9792b08eb310c6beb202cbc947f43cab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 20 May 2022 09:57:36 +0200 Subject: clk: Provide new devm_clk helpers for prepared and enabled clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a driver keeps a clock prepared (or enabled) during the whole lifetime of the driver, these helpers allow to simplify the drivers. Reviewed-by: Jonathan Cameron Reviewed-by: Alexandru Ardelean Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20220520075737.758761-4-u.kleine-koenig@pengutronix.de Signed-off-by: Stephen Boyd --- include/linux/clk.h | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index c8fc398d2ad7..c13061cabdfc 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -459,6 +459,47 @@ int __must_check devm_clk_bulk_get_all(struct device *dev, */ struct clk *devm_clk_get(struct device *dev, const char *id); +/** + * devm_clk_get_prepared - devm_clk_get() + clk_prepare() + * @dev: device for clock "consumer" + * @id: clock consumer ID + * + * Context: May sleep. + * + * Return: a struct clk corresponding to the clock producer, or + * valid IS_ERR() condition containing errno. The implementation + * uses @dev and @id to determine the clock consumer, and thereby + * the clock producer. (IOW, @id may be identical strings, but + * clk_get may return different clock producers depending on @dev.) + * + * The returned clk (if valid) is prepared. Drivers must however assume + * that the clock is not enabled. + * + * The clock will automatically be unprepared and freed when the device + * is unbound from the bus. + */ +struct clk *devm_clk_get_prepared(struct device *dev, const char *id); + +/** + * devm_clk_get_enabled - devm_clk_get() + clk_prepare_enable() + * @dev: device for clock "consumer" + * @id: clock consumer ID + * + * Context: May sleep. + * + * Return: a struct clk corresponding to the clock producer, or + * valid IS_ERR() condition containing errno. The implementation + * uses @dev and @id to determine the clock consumer, and thereby + * the clock producer. (IOW, @id may be identical strings, but + * clk_get may return different clock producers depending on @dev.) + * + * The returned clk (if valid) is prepared and enabled. + * + * The clock will automatically be disabled, unprepared and freed + * when the device is unbound from the bus. + */ +struct clk *devm_clk_get_enabled(struct device *dev, const char *id); + /** * devm_clk_get_optional - lookup and obtain a managed reference to an optional * clock producer. @@ -482,6 +523,50 @@ struct clk *devm_clk_get(struct device *dev, const char *id); */ struct clk *devm_clk_get_optional(struct device *dev, const char *id); +/** + * devm_clk_get_optional_prepared - devm_clk_get_optional() + clk_prepare() + * @dev: device for clock "consumer" + * @id: clock consumer ID + * + * Context: May sleep. + * + * Return: a struct clk corresponding to the clock producer, or + * valid IS_ERR() condition containing errno. The implementation + * uses @dev and @id to determine the clock consumer, and thereby + * the clock producer. If no such clk is found, it returns NULL + * which serves as a dummy clk. That's the only difference compared + * to devm_clk_get_prepared(). + * + * The returned clk (if valid) is prepared. Drivers must however + * assume that the clock is not enabled. + * + * The clock will automatically be unprepared and freed when the + * device is unbound from the bus. + */ +struct clk *devm_clk_get_optional_prepared(struct device *dev, const char *id); + +/** + * devm_clk_get_optional_enabled - devm_clk_get_optional() + + * clk_prepare_enable() + * @dev: device for clock "consumer" + * @id: clock consumer ID + * + * Context: May sleep. + * + * Return: a struct clk corresponding to the clock producer, or + * valid IS_ERR() condition containing errno. The implementation + * uses @dev and @id to determine the clock consumer, and thereby + * the clock producer. If no such clk is found, it returns NULL + * which serves as a dummy clk. That's the only difference compared + * to devm_clk_get_enabled(). + * + * The returned clk (if valid) is prepared and enabled. + * + * The clock will automatically be disabled, unprepared and freed + * when the device is unbound from the bus. + */ +struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id); + /** * devm_get_clk_from_child - lookup and obtain a managed reference to a * clock producer from child node. @@ -826,12 +911,36 @@ static inline struct clk *devm_clk_get(struct device *dev, const char *id) return NULL; } +static inline struct clk *devm_clk_get_prepared(struct device *dev, + const char *id) +{ + return NULL; +} + +static inline struct clk *devm_clk_get_enabled(struct device *dev, + const char *id) +{ + return NULL; +} + static inline struct clk *devm_clk_get_optional(struct device *dev, const char *id) { return NULL; } +static inline struct clk *devm_clk_get_optional_prepared(struct device *dev, + const char *id) +{ + return NULL; +} + +static inline struct clk *devm_clk_get_optional_enabled(struct device *dev, + const char *id) +{ + return NULL; +} + static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, struct clk_bulk_data *clks) { -- cgit v1.2.3 From 5a0e4529d9aee8ce348f628ad476c9ddb6cf457d Mon Sep 17 00:00:00 2001 From: Frank Li Date: Tue, 24 May 2022 10:21:52 -0500 Subject: dmaengine: dw-edma: Remove unused irq field in struct dw_edma_chip The "irq" field of struct dw_edma_chip was never used. Remove it. Link: https://lore.kernel.org/r/20220524152159.2370739-2-Frank.Li@nxp.com Tested-by: Serge Semin Tested-by: Manivannan Sadhasivam Signed-off-by: Frank Li Signed-off-by: Bjorn Helgaas Reviewed-by: Serge Semin Reviewed-by: Manivannan Sadhasivam Acked-By: Vinod Koul --- include/linux/dma/edma.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index cab6e18773da..d4333e721588 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -18,13 +18,11 @@ struct dw_edma; * struct dw_edma_chip - representation of DesignWare eDMA controller hardware * @dev: struct device of the eDMA controller * @id: instance ID - * @irq: irq line * @dw: struct dw_edma that is filed by dw_edma_probe() */ struct dw_edma_chip { struct device *dev; int id; - int irq; struct dw_edma *dw; }; -- cgit v1.2.3 From 07fd5b6cdf3cc30bfde8fe0f644771688be04447 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 13 Jun 2022 12:19:50 -1000 Subject: cgroup: Use separate src/dst nodes when preloading css_sets for migration Each cset (css_set) is pinned by its tasks. When we're moving tasks around across csets for a migration, we need to hold the source and destination csets to ensure that they don't go away while we're moving tasks about. This is done by linking cset->mg_preload_node on either the mgctx->preloaded_src_csets or mgctx->preloaded_dst_csets list. Using the same cset->mg_preload_node for both the src and dst lists was deemed okay as a cset can't be both the source and destination at the same time. Unfortunately, this overloading becomes problematic when multiple tasks are involved in a migration and some of them are identity noop migrations while others are actually moving across cgroups. For example, this can happen with the following sequence on cgroup1: #1> mkdir -p /sys/fs/cgroup/misc/a/b #2> echo $$ > /sys/fs/cgroup/misc/a/cgroup.procs #3> RUN_A_COMMAND_WHICH_CREATES_MULTIPLE_THREADS & #4> PID=$! #5> echo $PID > /sys/fs/cgroup/misc/a/b/tasks #6> echo $PID > /sys/fs/cgroup/misc/a/cgroup.procs the process including the group leader back into a. In this final migration, non-leader threads would be doing identity migration while the group leader is doing an actual one. After #3, let's say the whole process was in cset A, and that after #4, the leader moves to cset B. Then, during #6, the following happens: 1. cgroup_migrate_add_src() is called on B for the leader. 2. cgroup_migrate_add_src() is called on A for the other threads. 3. cgroup_migrate_prepare_dst() is called. It scans the src list. 4. It notices that B wants to migrate to A, so it tries to A to the dst list but realizes that its ->mg_preload_node is already busy. 5. and then it notices A wants to migrate to A as it's an identity migration, it culls it by list_del_init()'ing its ->mg_preload_node and putting references accordingly. 6. The rest of migration takes place with B on the src list but nothing on the dst list. This means that A isn't held while migration is in progress. If all tasks leave A before the migration finishes and the incoming task pins it, the cset will be destroyed leading to use-after-free. This is caused by overloading cset->mg_preload_node for both src and dst preload lists. We wanted to exclude the cset from the src list but ended up inadvertently excluding it from the dst list too. This patch fixes the issue by separating out cset->mg_preload_node into ->mg_src_preload_node and ->mg_dst_preload_node, so that the src and dst preloadings don't interfere with each other. Signed-off-by: Tejun Heo Reported-by: Mukesh Ojha Reported-by: shisiyuan Link: http://lkml.kernel.org/r/1654187688-27411-1-git-send-email-shisiyuan@xiaomi.com Link: https://www.spinics.net/lists/cgroups/msg33313.html Fixes: f817de98513d ("cgroup: prepare migration path for unified hierarchy") Cc: stable@vger.kernel.org # v3.16+ --- include/linux/cgroup-defs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1bfcfb1af352..d4427d0a0e18 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -264,7 +264,8 @@ struct css_set { * List of csets participating in the on-going migration either as * source or destination. Protected by cgroup_mutex. */ - struct list_head mg_preload_node; + struct list_head mg_src_preload_node; + struct list_head mg_dst_preload_node; struct list_head mg_node; /* -- cgit v1.2.3 From e4a8864f74e9e9e4a7eb93952a4cfa35c165c930 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 10 Jun 2022 16:21:30 -0700 Subject: iosys-map: Fix typo in documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's one argument, vaddr_iomem, not 2 (vaddr and _iomem). Signed-off-by: Lucas De Marchi Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20220610232130.2865479-3-lucas.demarchi@intel.com --- include/linux/iosys-map.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index e69a002d5aa4..4b8406ee8bc4 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -23,7 +23,7 @@ * memcpy(vaddr, src, len); * * void *vaddr_iomem = ...; // pointer to I/O memory - * memcpy_toio(vaddr, _iomem, src, len); + * memcpy_toio(vaddr_iomem, src, len); * * The user of such pointer may not have information about the mapping of that * region or may want to have a single code path to handle operations on that -- cgit v1.2.3 From d687f621c518d791b5fffde8add3112d869b0b1b Mon Sep 17 00:00:00 2001 From: Delyan Kratunov Date: Tue, 14 Jun 2022 23:10:42 +0000 Subject: bpf: move bpf_prog to bpf.h In order to add a version of bpf_prog_run_array which accesses the bpf_prog->aux member, bpf_prog needs to be more than a forward declaration inside bpf.h. Given that filter.h already includes bpf.h, this merely reorders the type declarations for filter.h users. bpf.h users now have access to bpf_prog internals. Signed-off-by: Delyan Kratunov Link: https://lore.kernel.org/r/3ed7824e3948f22d84583649ccac0ff0d38b6b58.1655248076.git.delyank@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 36 ++++++++++++++++++++++++++++++++++++ include/linux/filter.h | 34 ---------------------------------- 2 files changed, 36 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8e6092d0ea95..69106ae46464 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -5,6 +5,7 @@ #define _LINUX_BPF_H 1 #include +#include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include #include #include @@ -1084,6 +1086,40 @@ struct bpf_prog_aux { }; }; +struct bpf_prog { + u16 pages; /* Number of allocated pages */ + u16 jited:1, /* Is our filter JIT'ed? */ + jit_requested:1,/* archs need to JIT the prog */ + gpl_compatible:1, /* Is filter GPL compatible? */ + cb_access:1, /* Is control block accessed? */ + dst_needed:1, /* Do we need dst entry? */ + blinding_requested:1, /* needs constant blinding */ + blinded:1, /* Was blinded */ + is_func:1, /* program is a bpf function */ + kprobe_override:1, /* Do we override a kprobe? */ + has_callchain_buf:1, /* callchain buffer allocated? */ + enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ + call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ + call_get_func_ip:1, /* Do we call get_func_ip() */ + tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ + enum bpf_prog_type type; /* Type of BPF program */ + enum bpf_attach_type expected_attach_type; /* For some prog types */ + u32 len; /* Number of filter blocks */ + u32 jited_len; /* Size of jited insns in bytes */ + u8 tag[BPF_TAG_SIZE]; + struct bpf_prog_stats __percpu *stats; + int __percpu *active; + unsigned int (*bpf_func)(const void *ctx, + const struct bpf_insn *insn); + struct bpf_prog_aux *aux; /* Auxiliary fields */ + struct sock_fprog_kern *orig_prog; /* Original BPF program */ + /* Instructions for interpreter */ + union { + DECLARE_FLEX_ARRAY(struct sock_filter, insns); + DECLARE_FLEX_ARRAY(struct bpf_insn, insnsi); + }; +}; + struct bpf_array_aux { /* Programs with direct jumps into programs part of this array. */ struct list_head poke_progs; diff --git a/include/linux/filter.h b/include/linux/filter.h index ed0c0ff42ad5..d0cbb31b1b4d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -559,40 +559,6 @@ struct bpf_prog_stats { struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); -struct bpf_prog { - u16 pages; /* Number of allocated pages */ - u16 jited:1, /* Is our filter JIT'ed? */ - jit_requested:1,/* archs need to JIT the prog */ - gpl_compatible:1, /* Is filter GPL compatible? */ - cb_access:1, /* Is control block accessed? */ - dst_needed:1, /* Do we need dst entry? */ - blinding_requested:1, /* needs constant blinding */ - blinded:1, /* Was blinded */ - is_func:1, /* program is a bpf function */ - kprobe_override:1, /* Do we override a kprobe? */ - has_callchain_buf:1, /* callchain buffer allocated? */ - enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ - call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ - call_get_func_ip:1, /* Do we call get_func_ip() */ - tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ - enum bpf_prog_type type; /* Type of BPF program */ - enum bpf_attach_type expected_attach_type; /* For some prog types */ - u32 len; /* Number of filter blocks */ - u32 jited_len; /* Size of jited insns in bytes */ - u8 tag[BPF_TAG_SIZE]; - struct bpf_prog_stats __percpu *stats; - int __percpu *active; - unsigned int (*bpf_func)(const void *ctx, - const struct bpf_insn *insn); - struct bpf_prog_aux *aux; /* Auxiliary fields */ - struct sock_fprog_kern *orig_prog; /* Original BPF program */ - /* Instructions for interpreter */ - union { - DECLARE_FLEX_ARRAY(struct sock_filter, insns); - DECLARE_FLEX_ARRAY(struct bpf_insn, insnsi); - }; -}; - struct sk_filter { refcount_t refcnt; struct rcu_head rcu; -- cgit v1.2.3 From 8c7dcb84e3b744b2b70baa7a44a9b1881c33a9c9 Mon Sep 17 00:00:00 2001 From: Delyan Kratunov Date: Tue, 14 Jun 2022 23:10:46 +0000 Subject: bpf: implement sleepable uprobes by chaining gps uprobes work by raising a trap, setting a task flag from within the interrupt handler, and processing the actual work for the uprobe on the way back to userspace. As a result, uprobe handlers already execute in a might_fault/_sleep context. The primary obstacle to sleepable bpf uprobe programs is therefore on the bpf side. Namely, the bpf_prog_array attached to the uprobe is protected by normal rcu. In order for uprobe bpf programs to become sleepable, it has to be protected by the tasks_trace rcu flavor instead (and kfree() called after a corresponding grace period). Therefore, the free path for bpf_prog_array now chains a tasks_trace and normal grace periods one after the other. Users who iterate under tasks_trace read section would be safe, as would users who iterate under normal read sections (from non-sleepable locations). The downside is that the tasks_trace latency affects all perf_event-attached bpf programs (and not just uprobe ones). This is deemed safe given the possible attach rates for kprobe/uprobe/tp programs. Separately, non-sleepable programs need access to dynamically sized rcu-protected maps, so bpf_run_prog_array_sleepables now conditionally takes an rcu read section, in addition to the overarching tasks_trace section. Signed-off-by: Delyan Kratunov Link: https://lore.kernel.org/r/ce844d62a2fd0443b08c5ab02e95bc7149f9aeb1.1655248076.git.delyank@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 69106ae46464..f3e88afdaffe 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -26,6 +26,7 @@ #include #include #include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -1372,6 +1373,8 @@ extern struct bpf_empty_prog_array bpf_empty_prog_array; struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); void bpf_prog_array_free(struct bpf_prog_array *progs); +/* Use when traversal over the bpf_prog_array uses tasks_trace rcu */ +void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs); int bpf_prog_array_length(struct bpf_prog_array *progs); bool bpf_prog_array_is_empty(struct bpf_prog_array *array); int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs, @@ -1463,6 +1466,55 @@ bpf_prog_run_array(const struct bpf_prog_array *array, return ret; } +/* Notes on RCU design for bpf_prog_arrays containing sleepable programs: + * + * We use the tasks_trace rcu flavor read section to protect the bpf_prog_array + * overall. As a result, we must use the bpf_prog_array_free_sleepable + * in order to use the tasks_trace rcu grace period. + * + * When a non-sleepable program is inside the array, we take the rcu read + * section and disable preemption for that program alone, so it can access + * rcu-protected dynamically sized maps. + */ +static __always_inline u32 +bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu, + const void *ctx, bpf_prog_run_fn run_prog) +{ + const struct bpf_prog_array_item *item; + const struct bpf_prog *prog; + const struct bpf_prog_array *array; + struct bpf_run_ctx *old_run_ctx; + struct bpf_trace_run_ctx run_ctx; + u32 ret = 1; + + might_fault(); + + rcu_read_lock_trace(); + migrate_disable(); + + array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held()); + if (unlikely(!array)) + goto out; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + item = &array->items[0]; + while ((prog = READ_ONCE(item->prog))) { + if (!prog->aux->sleepable) + rcu_read_lock(); + + run_ctx.bpf_cookie = item->bpf_cookie; + ret &= run_prog(prog, ctx); + item++; + + if (!prog->aux->sleepable) + rcu_read_unlock(); + } + bpf_reset_run_ctx(old_run_ctx); +out: + migrate_enable(); + rcu_read_unlock_trace(); + return ret; +} + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); extern struct mutex bpf_stats_enabled_mutex; -- cgit v1.2.3 From d92725256b4f22d084b813b37ddc394da79aacab Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 30 May 2022 14:34:50 -0400 Subject: mm: avoid unnecessary page fault retires on shared memory types I observed that for each of the shared file-backed page faults, we're very likely to retry one more time for the 1st write fault upon no page. It's because we'll need to release the mmap lock for dirty rate limit purpose with balance_dirty_pages_ratelimited() (in fault_dirty_shared_page()). Then after that throttling we return VM_FAULT_RETRY. We did that probably because VM_FAULT_RETRY is the only way we can return to the fault handler at that time telling it we've released the mmap lock. However that's not ideal because it's very likely the fault does not need to be retried at all since the pgtable was well installed before the throttling, so the next continuous fault (including taking mmap read lock, walk the pgtable, etc.) could be in most cases unnecessary. It's not only slowing down page faults for shared file-backed, but also add more mmap lock contention which is in most cases not needed at all. To observe this, one could try to write to some shmem page and look at "pgfault" value in /proc/vmstat, then we should expect 2 counts for each shmem write simply because we retried, and vm event "pgfault" will capture that. To make it more efficient, add a new VM_FAULT_COMPLETED return code just to show that we've completed the whole fault and released the lock. It's also a hint that we should very possibly not need another fault immediately on this page because we've just completed it. This patch provides a ~12% perf boost on my aarch64 test VM with a simple program sequentially dirtying 400MB shmem file being mmap()ed and these are the time it needs: Before: 650.980 ms (+-1.94%) After: 569.396 ms (+-1.38%) I believe it could help more than that. We need some special care on GUP and the s390 pgfault handler (for gmap code before returning from pgfault), the rest changes in the page fault handlers should be relatively straightforward. Another thing to mention is that mm_account_fault() does take this new fault as a generic fault to be accounted, unlike VM_FAULT_RETRY. I explicitly didn't touch hmm_vma_fault() and break_ksm() because they do not handle VM_FAULT_RETRY even with existing code, so I'm literally keeping them as-is. Link: https://lkml.kernel.org/r/20220530183450.42886-1-peterx@redhat.com Signed-off-by: Peter Xu Acked-by: Geert Uytterhoeven Acked-by: Peter Zijlstra (Intel) Acked-by: Johannes Weiner Acked-by: Vineet Gupta Acked-by: Guo Ren Acked-by: Max Filippov Acked-by: Christian Borntraeger Acked-by: Michael Ellerman (powerpc) Acked-by: Catalin Marinas Reviewed-by: Alistair Popple Reviewed-by: Ingo Molnar Acked-by: Russell King (Oracle) [arm part] Acked-by: Heiko Carstens Cc: Vasily Gorbik Cc: Stafford Horne Cc: David S. Miller Cc: Johannes Berg Cc: Brian Cain Cc: Richard Henderson Cc: Richard Weinberger Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Janosch Frank Cc: Albert Ou Cc: Anton Ivanov Cc: Dave Hansen Cc: Borislav Petkov Cc: Sven Schnelle Cc: Andrea Arcangeli Cc: James Bottomley Cc: Al Viro Cc: Alexander Gordeev Cc: Jonas Bonn Cc: Will Deacon Cc: Vlastimil Babka Cc: Michal Simek Cc: Matt Turner Cc: Paul Mackerras Cc: David Hildenbrand Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Stefan Kristiansson Cc: Paul Walmsley Cc: Ivan Kokshaysky Cc: Chris Zankel Cc: Hugh Dickins Cc: Dinh Nguyen Cc: Rich Felker Cc: H. Peter Anvin Cc: Andy Lutomirski Cc: Thomas Bogendoerfer Cc: Helge Deller Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c29ab4c0cd5c..6b961a29bf26 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -729,6 +729,7 @@ typedef __bitwise unsigned int vm_fault_t; * @VM_FAULT_NEEDDSYNC: ->fault did not modify page tables and needs * fsync() to complete (for synchronous page faults * in DAX) + * @VM_FAULT_COMPLETED: ->fault completed, meanwhile mmap lock released * @VM_FAULT_HINDEX_MASK: mask HINDEX value * */ @@ -746,6 +747,7 @@ enum vm_fault_reason { VM_FAULT_FALLBACK = (__force vm_fault_t)0x000800, VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000, VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000, + VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000, VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000, }; -- cgit v1.2.3 From bcc728eb4f446073e0160671d7d0059a4e9aa300 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Tue, 31 May 2022 10:04:21 +0800 Subject: mm/damon: remove obsolete comments of kdamond_stop Since commit 0f91d13366a4 ("mm/damon: simplify stop mechanism") delete kdamond_stop and change to use kthread stop mechanism, these obsolete comments should be removed accordingly. Link: https://lkml.kernel.org/r/20220531020421.46849-1-zhouchengming@bytedance.com Signed-off-by: Chengming Zhou Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 7c62da31ce4b..2765c7d99beb 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -397,7 +397,6 @@ struct damon_callback { * detail. * * @kdamond: Kernel thread who does the monitoring. - * @kdamond_stop: Notifies whether kdamond should stop. * @kdamond_lock: Mutex for the synchronizations with @kdamond. * * For each monitoring context, one kernel thread for the monitoring is @@ -406,14 +405,14 @@ struct damon_callback { * Once started, the monitoring thread runs until explicitly required to be * terminated or every monitoring target is invalid. The validity of the * targets is checked via the &damon_operations.target_valid of @ops. The - * termination can also be explicitly requested by writing non-zero to - * @kdamond_stop. The thread sets @kdamond to NULL when it terminates. - * Therefore, users can know whether the monitoring is ongoing or terminated by - * reading @kdamond. Reads and writes to @kdamond and @kdamond_stop from - * outside of the monitoring thread must be protected by @kdamond_lock. - * - * Note that the monitoring thread protects only @kdamond and @kdamond_stop via - * @kdamond_lock. Accesses to other fields must be protected by themselves. + * termination can also be explicitly requested by calling damon_stop(). + * The thread sets @kdamond to NULL when it terminates. Therefore, users can + * know whether the monitoring is ongoing or terminated by reading @kdamond. + * Reads and writes to @kdamond from outside of the monitoring thread must + * be protected by @kdamond_lock. + * + * Note that the monitoring thread protects only @kdamond via @kdamond_lock. + * Accesses to other fields must be protected by themselves. * * @ops: Set of monitoring operations for given use cases. * @callback: Set of callbacks for monitoring events notifications. -- cgit v1.2.3 From 9384d79249d04b03572abb7e551a35d99c9268c0 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Mon, 6 Jun 2022 16:15:33 +0200 Subject: mm/highmem: delete memmove_page() Matthew Wilcox reported that, while he was looking at memmove_page(), he realized that it can't actually work. The reasons are hidden in its implementation, which makes use of memmove() on logical addresses provided by kmap_local_page(). memmove() does the wrong thing when it tests "if (dest <= src)". Therefore, delete memmove_page(). No need to change any other code because we have no call sites of memmove_page() across the whole kernel. Link: https://lkml.kernel.org/r/20220606141533.555-1-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Reported-by: Matthew Wilcox Reviewed-by: Baoquan He Reviewed-by: Ira Weiny Cc: Sebastian Andrzej Siewior Signed-off-by: Andrew Morton --- include/linux/highmem.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 3af34de54330..fee9835e3793 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -336,19 +336,6 @@ static inline void memcpy_page(struct page *dst_page, size_t dst_off, kunmap_local(dst); } -static inline void memmove_page(struct page *dst_page, size_t dst_off, - struct page *src_page, size_t src_off, - size_t len) -{ - char *dst = kmap_local_page(dst_page); - char *src = kmap_local_page(src_page); - - VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE); - memmove(dst + dst_off, src + src_off, len); - kunmap_local(src); - kunmap_local(dst); -} - static inline void memset_page(struct page *page, size_t offset, int val, size_t len) { -- cgit v1.2.3 From c200d90049dbe08fa8b016f74b713fddefca0479 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Sat, 11 Jun 2022 11:55:48 +0800 Subject: mm: kmemleak: remove kmemleak_not_leak_phys() and the min_count argument to kmemleak_alloc_phys() Patch series "mm: kmemleak: store objects allocated with physical address separately and check when scan", v4. The kmemleak_*_phys() interface uses "min_low_pfn" and "max_low_pfn" to check address. But on some architectures, kmemleak_*_phys() is called before those two variables initialized. The following steps will be taken: 1) Add OBJECT_PHYS flag and rbtree for the objects allocated with physical address 2) Store physical address in objects if allocated with OBJECT_PHYS 3) Check the boundary when scan instead of in kmemleak_*_phys() This patch set will solve: https://lore.kernel.org/r/20220527032504.30341-1-yee.lee@mediatek.com https://lore.kernel.org/r/9dd08bb5-f39e-53d8-f88d-bec598a08c93@gmail.com v3: https://lore.kernel.org/r/20220609124950.1694394-1-patrick.wang.shcn@gmail.com v2: https://lore.kernel.org/r/20220603035415.1243913-1-patrick.wang.shcn@gmail.com v1: https://lore.kernel.org/r/20220531150823.1004101-1-patrick.wang.shcn@gmail.com This patch (of 4): Remove the unused kmemleak_not_leak_phys() function. And remove the min_count argument to kmemleak_alloc_phys() function, assume it's 0. Link: https://lkml.kernel.org/r/20220611035551.1823303-1-patrick.wang.shcn@gmail.com Link: https://lkml.kernel.org/r/20220611035551.1823303-2-patrick.wang.shcn@gmail.com Signed-off-by: Patrick Wang Suggested-by: Catalin Marinas Reviewed-by: Catalin Marinas Cc: Yee Lee Signed-off-by: Andrew Morton --- include/linux/kmemleak.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 34684b2026ab..6a3cd1bf4680 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -29,10 +29,9 @@ extern void kmemleak_not_leak(const void *ptr) __ref; extern void kmemleak_ignore(const void *ptr) __ref; extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; extern void kmemleak_no_scan(const void *ptr) __ref; -extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count, +extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, gfp_t gfp) __ref; extern void kmemleak_free_part_phys(phys_addr_t phys, size_t size) __ref; -extern void kmemleak_not_leak_phys(phys_addr_t phys) __ref; extern void kmemleak_ignore_phys(phys_addr_t phys) __ref; static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, @@ -107,15 +106,12 @@ static inline void kmemleak_no_scan(const void *ptr) { } static inline void kmemleak_alloc_phys(phys_addr_t phys, size_t size, - int min_count, gfp_t gfp) + gfp_t gfp) { } static inline void kmemleak_free_part_phys(phys_addr_t phys, size_t size) { } -static inline void kmemleak_not_leak_phys(phys_addr_t phys) -{ -} static inline void kmemleak_ignore_phys(phys_addr_t phys) { } -- cgit v1.2.3 From fc4db90fe71e640e3fe88df346f7cf653b75315d Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 10 Jun 2022 11:03:10 -0700 Subject: mm: kmem: make mem_cgroup_from_obj() vmalloc()-safe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently mem_cgroup_from_obj() is not working properly with objects allocated using vmalloc(). It creates problems in some cases, when it's called for static objects belonging to modules or generally allocated using vmalloc(). This patch makes mem_cgroup_from_obj() safe to be called on objects allocated using vmalloc(). It also introduces mem_cgroup_from_slab_obj(), which is a faster version to use in places when we know the object is either a slab object or a generic slab page (e.g. when adding an object to a lru list). Link: https://lkml.kernel.org/r/20220610180310.1725111-1-roman.gushchin@linux.dev Suggested-by: Kefeng Wang Signed-off-by: Roman Gushchin Tested-by: Linux Kernel Functional Testing Acked-by: Shakeel Butt Tested-by: Vasily Averin Acked-by: Michal Hocko Acked-by: Muchun Song Cc: Johannes Weiner Cc: Naresh Kamboju Cc: Qian Cai Cc: Kefeng Wang Cc: David S. Miller Cc: Eric Dumazet Cc: Florian Westphal Cc: Jakub Kicinski Cc: Michal Koutný Cc: Paolo Abeni Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9ecead1042b9..3ce96ce5fe3e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1740,6 +1740,7 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg) } struct mem_cgroup *mem_cgroup_from_obj(void *p); +struct mem_cgroup *mem_cgroup_from_slab_obj(void *p); static inline void count_objcg_event(struct obj_cgroup *objcg, enum vm_event_item idx) @@ -1801,6 +1802,11 @@ static inline struct mem_cgroup *mem_cgroup_from_obj(void *p) return NULL; } +static inline struct mem_cgroup *mem_cgroup_from_slab_obj(void *p) +{ + return NULL; +} + static inline void count_objcg_event(struct obj_cgroup *objcg, enum vm_event_item idx) { -- cgit v1.2.3 From 1d0403d20f6c281cb3d14c5f1db5317caeec48e9 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 3 Jun 2022 07:19:43 +0300 Subject: net: set proper memcg for net_init hooks allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __register_pernet_operations() executes init hook of registered pernet_operation structure in all existing net namespaces. Typically, these hooks are called by a process associated with the specified net namespace, and all __GFP_ACCOUNT marked allocation are accounted for corresponding container/memcg. However __register_pernet_operations() calls the hooks in the same context, and as a result all marked allocations are accounted to one memcg for all processed net namespaces. This patch adjusts active memcg for each net namespace and helps to account memory allocated inside ops_init() into the proper memcg. Link: https://lkml.kernel.org/r/f9394752-e272-9bf9-645f-a18c56d1c4ec@openvz.org Signed-off-by: Vasily Averin Acked-by: Roman Gushchin Acked-by: Shakeel Butt Cc: Michal Koutný Cc: Vlastimil Babka Cc: Michal Hocko Cc: Florian Westphal Cc: David S. Miller Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Eric Dumazet Cc: Johannes Weiner Cc: Kefeng Wang Cc: Linux Kernel Functional Testing Cc: Muchun Song Cc: Naresh Kamboju Cc: Qian Cai Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 47 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3ce96ce5fe3e..04f2f33607e9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1756,6 +1756,42 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, rcu_read_unlock(); } +/** + * get_mem_cgroup_from_obj - get a memcg associated with passed kernel object. + * @p: pointer to object from which memcg should be extracted. It can be NULL. + * + * Retrieves the memory group into which the memory of the pointed kernel + * object is accounted. If memcg is found, its reference is taken. + * If a passed kernel object is uncharged, or if proper memcg cannot be found, + * as well as if mem_cgroup is disabled, NULL is returned. + * + * Return: valid memcg pointer with taken reference or NULL. + */ +static inline struct mem_cgroup *get_mem_cgroup_from_obj(void *p) +{ + struct mem_cgroup *memcg; + + rcu_read_lock(); + do { + memcg = mem_cgroup_from_obj(p); + } while (memcg && !css_tryget(&memcg->css)); + rcu_read_unlock(); + return memcg; +} + +/** + * mem_cgroup_or_root - always returns a pointer to a valid memory cgroup. + * @memcg: pointer to a valid memory cgroup or NULL. + * + * If passed argument is not NULL, returns it without any additional checks + * and changes. Otherwise, root_mem_cgroup is returned. + * + * NOTE: root_mem_cgroup can be NULL during early boot. + */ +static inline struct mem_cgroup *mem_cgroup_or_root(struct mem_cgroup *memcg) +{ + return memcg ? memcg : root_mem_cgroup; +} #else static inline bool mem_cgroup_kmem_disabled(void) { @@ -1799,7 +1835,7 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg) static inline struct mem_cgroup *mem_cgroup_from_obj(void *p) { - return NULL; + return NULL; } static inline struct mem_cgroup *mem_cgroup_from_slab_obj(void *p) @@ -1812,6 +1848,15 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, { } +static inline struct mem_cgroup *get_mem_cgroup_from_obj(void *p) +{ + return NULL; +} + +static inline struct mem_cgroup *mem_cgroup_or_root(struct mem_cgroup *memcg) +{ + return NULL; +} #endif /* CONFIG_MEMCG_KMEM */ #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP) -- cgit v1.2.3 From 4815a36009044ba69a9b8d781943ec6505c451a2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 3 Jun 2022 20:10:12 +0300 Subject: include/linux/rbtree.h: replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Link: https://lkml.kernel.org/r/20220603171012.48880-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/rbtree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 235047d7a1b5..f7edca369eda 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -17,9 +17,9 @@ #ifndef _LINUX_RBTREE_H #define _LINUX_RBTREE_H +#include #include -#include #include #include -- cgit v1.2.3 From dabba87229411a5e9d20ac03ffc36463c53ae672 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Fri, 27 May 2022 02:55:34 +0000 Subject: fs/kernel_read_file: allow to read files up-to ssize_t Patch series "Allow to kexec with initramfs larger than 2G", v2. Currently, the largest initramfs that is supported by kexec_file_load() syscall is 2G. This is because kernel_read_file() returns int, and is limited to INT_MAX or 2G. On the other hand, there are kexec based boot loaders (i.e. u-root), that may need to boot netboot images that might be larger than 2G. The first patch changes the return type from int to ssize_t in kernel_read_file* functions. The second patch increases the maximum initramfs file size to 4G. Tested: verified that can kexec_file_load() works with 4G initramfs on x86_64. This patch (of 2): Currently, the maximum file size that is supported is 2G. This may be too small in some cases. For example, kexec_file_load() system call loads initramfs. In some netboot cases initramfs can be rather large. Allow to use up-to ssize_t bytes. The callers still can limit the maximum file size via buf_size. Link: https://lkml.kernel.org/r/20220527025535.3953665-1-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20220527025535.3953665-2-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Cc: Al Viro Cc: Baoquan He Cc: "Eric W. Biederman" Cc: Greg Thelen Cc: Sasha Levin Signed-off-by: Andrew Morton --- include/linux/kernel_read_file.h | 32 ++++++++++++++++---------------- include/linux/limits.h | 1 + 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel_read_file.h b/include/linux/kernel_read_file.h index 575ffa1031d3..90451e2e12bd 100644 --- a/include/linux/kernel_read_file.h +++ b/include/linux/kernel_read_file.h @@ -35,21 +35,21 @@ static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) return kernel_read_file_str[id]; } -int kernel_read_file(struct file *file, loff_t offset, - void **buf, size_t buf_size, - size_t *file_size, - enum kernel_read_file_id id); -int kernel_read_file_from_path(const char *path, loff_t offset, - void **buf, size_t buf_size, - size_t *file_size, - enum kernel_read_file_id id); -int kernel_read_file_from_path_initns(const char *path, loff_t offset, - void **buf, size_t buf_size, - size_t *file_size, - enum kernel_read_file_id id); -int kernel_read_file_from_fd(int fd, loff_t offset, - void **buf, size_t buf_size, - size_t *file_size, - enum kernel_read_file_id id); +ssize_t kernel_read_file(struct file *file, loff_t offset, + void **buf, size_t buf_size, + size_t *file_size, + enum kernel_read_file_id id); +ssize_t kernel_read_file_from_path(const char *path, loff_t offset, + void **buf, size_t buf_size, + size_t *file_size, + enum kernel_read_file_id id); +ssize_t kernel_read_file_from_path_initns(const char *path, loff_t offset, + void **buf, size_t buf_size, + size_t *file_size, + enum kernel_read_file_id id); +ssize_t kernel_read_file_from_fd(int fd, loff_t offset, + void **buf, size_t buf_size, + size_t *file_size, + enum kernel_read_file_id id); #endif /* _LINUX_KERNEL_READ_FILE_H */ diff --git a/include/linux/limits.h b/include/linux/limits.h index b568b9c30bbf..f6bcc9369010 100644 --- a/include/linux/limits.h +++ b/include/linux/limits.h @@ -7,6 +7,7 @@ #include #define SIZE_MAX (~(size_t)0) +#define SSIZE_MAX ((ssize_t)(SIZE_MAX >> 1)) #define PHYS_ADDR_MAX (~(phys_addr_t)0) #define U8_MAX ((u8)~0U) -- cgit v1.2.3 From a793679827a87b01f9d973ea30b923cd5a3ff2c5 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 14 Jun 2022 10:46:11 +0200 Subject: linux/phy.h: add phydev_err_probe() wrapper for dev_err_probe() The dev_err_probe() function is quite useful to avoid boilerplate related to -EPROBE_DEFER handling. Add a phydev_err_probe() helper to simplify making use of that from phy drivers which otherwise use the phydev_* helpers. Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 508f1149665b..bed9a347481b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1539,6 +1539,9 @@ static inline void phy_device_reset(struct phy_device *phydev, int value) #define phydev_err(_phydev, format, args...) \ dev_err(&_phydev->mdio.dev, format, ##args) +#define phydev_err_probe(_phydev, err, format, args...) \ + dev_err_probe(&_phydev->mdio.dev, err, format, ##args) + #define phydev_info(_phydev, format, args...) \ dev_info(&_phydev->mdio.dev, format, ##args) -- cgit v1.2.3 From 508362ac66b0478affb4e52cb8da98478312d72d Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 15 Jun 2022 16:48:43 +0300 Subject: bpf: Allow helpers to accept pointers with a fixed size Before this commit, the BPF verifier required ARG_PTR_TO_MEM arguments to be followed by ARG_CONST_SIZE holding the size of the memory region. The helpers had to check that size in runtime. There are cases where the size expected by a helper is a compile-time constant. Checking it in runtime is an unnecessary overhead and waste of BPF registers. This commit allows helpers to accept pointers to memory without the corresponding ARG_CONST_SIZE, given that they define the memory region size in struct bpf_func_proto and use ARG_PTR_TO_FIXED_SIZE_MEM type. arg_size is unionized with arg_btf_id to reduce the kernel image size, and it's valid because they are used by different argument types. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20220615134847.3753567-3-maximmi@nvidia.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f3e88afdaffe..a94531971a7a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -401,6 +401,9 @@ enum bpf_type_flag { /* DYNPTR points to a ringbuf record. */ DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS), + /* Size is known at compile time. */ + MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; @@ -464,6 +467,8 @@ enum bpf_arg_type { * all bytes or clear them in error case. */ ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM, + /* Pointer to valid memory of size known at compile time. */ + ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM, /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. @@ -529,6 +534,14 @@ struct bpf_func_proto { u32 *arg5_btf_id; }; u32 *arg_btf_id[5]; + struct { + size_t arg1_size; + size_t arg2_size; + size_t arg3_size; + size_t arg4_size; + size_t arg5_size; + }; + size_t arg_size[5]; }; int *ret_btf_id; /* return value btf_id */ bool (*allowed)(const struct bpf_prog *prog); -- cgit v1.2.3 From f0a6d77b351c18c122fc1638ac9e58f5e0346f64 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Tue, 14 Jun 2022 22:51:47 +0300 Subject: ata: make transfer mode masks *unsigned int* The packed transfer mode masks and also the {pio|mwdma|udma}_mask fields of *struct*s ata_device and ata_port_info are declared as *unsigned long* (which is a 64-bit type on 64-bit architectures) but actually the packed masks occupy only 20 bits (7 PIO modes, 5 MWDMA modes, and 8 UDMA modes) and the PIO/MWDMA/UDMA masks easily fit into just 8 bits each, so we can safely use (always 32-bit) *unsigned int* variables instead. This saves 745 bytes of object code in libata-core.o alone, not to mention LLDDs... Signed-off-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- include/linux/libata.h | 49 ++++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 0f2a59c9c735..a8bc88b4fe07 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -275,7 +275,7 @@ enum { PORT_DISABLED = 2, /* encoding various smaller bitmaps into a single - * unsigned long bitmap + * unsigned int bitmap */ ATA_NR_PIO_MODES = 7, ATA_NR_MWDMA_MODES = 5, @@ -426,12 +426,9 @@ enum { }; enum ata_xfer_mask { - ATA_MASK_PIO = ((1LU << ATA_NR_PIO_MODES) - 1) - << ATA_SHIFT_PIO, - ATA_MASK_MWDMA = ((1LU << ATA_NR_MWDMA_MODES) - 1) - << ATA_SHIFT_MWDMA, - ATA_MASK_UDMA = ((1LU << ATA_NR_UDMA_MODES) - 1) - << ATA_SHIFT_UDMA, + ATA_MASK_PIO = ((1U << ATA_NR_PIO_MODES) - 1) << ATA_SHIFT_PIO, + ATA_MASK_MWDMA = ((1U << ATA_NR_MWDMA_MODES) - 1) << ATA_SHIFT_MWDMA, + ATA_MASK_UDMA = ((1U << ATA_NR_UDMA_MODES) - 1) << ATA_SHIFT_UDMA, }; enum hsm_task_states { @@ -680,9 +677,9 @@ struct ata_device { unsigned int cdb_len; /* per-dev xfer mask */ - unsigned long pio_mask; - unsigned long mwdma_mask; - unsigned long udma_mask; + unsigned int pio_mask; + unsigned int mwdma_mask; + unsigned int udma_mask; /* for CHS addressing */ u16 cylinders; /* Number of cylinders */ @@ -885,7 +882,7 @@ struct ata_port_operations { * Configuration and exception handling */ int (*cable_detect)(struct ata_port *ap); - unsigned long (*mode_filter)(struct ata_device *dev, unsigned long xfer_mask); + unsigned int (*mode_filter)(struct ata_device *dev, unsigned int xfer_mask); void (*set_piomode)(struct ata_port *ap, struct ata_device *dev); void (*set_dmamode)(struct ata_port *ap, struct ata_device *dev); int (*set_mode)(struct ata_link *link, struct ata_device **r_failed_dev); @@ -981,9 +978,9 @@ struct ata_port_operations { struct ata_port_info { unsigned long flags; unsigned long link_flags; - unsigned long pio_mask; - unsigned long mwdma_mask; - unsigned long udma_mask; + unsigned int pio_mask; + unsigned int mwdma_mask; + unsigned int udma_mask; struct ata_port_operations *port_ops; void *private_data; }; @@ -1102,16 +1099,18 @@ extern void ata_msleep(struct ata_port *ap, unsigned int msecs); extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, u32 val, unsigned long interval, unsigned long timeout); extern int atapi_cmd_type(u8 opcode); -extern unsigned long ata_pack_xfermask(unsigned long pio_mask, - unsigned long mwdma_mask, unsigned long udma_mask); -extern void ata_unpack_xfermask(unsigned long xfer_mask, - unsigned long *pio_mask, unsigned long *mwdma_mask, - unsigned long *udma_mask); -extern u8 ata_xfer_mask2mode(unsigned long xfer_mask); -extern unsigned long ata_xfer_mode2mask(u8 xfer_mode); +extern unsigned int ata_pack_xfermask(unsigned int pio_mask, + unsigned int mwdma_mask, + unsigned int udma_mask); +extern void ata_unpack_xfermask(unsigned int xfer_mask, + unsigned int *pio_mask, + unsigned int *mwdma_mask, + unsigned int *udma_mask); +extern u8 ata_xfer_mask2mode(unsigned int xfer_mask); +extern unsigned int ata_xfer_mode2mask(u8 xfer_mode); extern int ata_xfer_mode2shift(u8 xfer_mode); -extern const char *ata_mode_string(unsigned long xfer_mask); -extern unsigned long ata_id_xfermask(const u16 *id); +extern const char *ata_mode_string(unsigned int xfer_mask); +extern unsigned int ata_id_xfermask(const u16 *id); extern int ata_std_qc_defer(struct ata_queued_cmd *qc); extern enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc); extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg, @@ -1283,8 +1282,8 @@ static inline const struct ata_acpi_gtm *ata_acpi_init_gtm(struct ata_port *ap) } int ata_acpi_stm(struct ata_port *ap, const struct ata_acpi_gtm *stm); int ata_acpi_gtm(struct ata_port *ap, struct ata_acpi_gtm *stm); -unsigned long ata_acpi_gtm_xfermask(struct ata_device *dev, - const struct ata_acpi_gtm *gtm); +unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, + const struct ata_acpi_gtm *gtm); int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm); #else static inline const struct ata_acpi_gtm *ata_acpi_init_gtm(struct ata_port *ap) -- cgit v1.2.3 From d64de9773c18409d2161228242968ff3ebe3707e Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Thu, 9 Jun 2022 20:31:19 +0800 Subject: crypto: hisilicon/qm - modify event irq processing When the driver receives an event interrupt, the driver will enable the event interrupt after handling all completed tasks on the function, tasks on the function are parsed through only one thread. If the task's user callback takes time, other tasks on the function will be blocked. Therefore, the event irq processing is modified as follows: 1. Obtain the ID of the queue that completes the task. 2. Enable event interrupt. 3. Parse the completed tasks in the queue and call the user callback. Enabling event interrupt in advance can quickly report pending event interrupts and process tasks in multiple threads. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- include/linux/hisi_acc_qm.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 6cabafffd0dd..116e8bd68c99 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -265,6 +265,12 @@ struct hisi_qm_list { void (*unregister_from_crypto)(struct hisi_qm *qm); }; +struct hisi_qm_poll_data { + struct hisi_qm *qm; + struct work_struct work; + u16 *qp_finish_id; +}; + struct hisi_qm { enum qm_hw_ver ver; enum qm_fun_type fun_type; @@ -302,6 +308,7 @@ struct hisi_qm { struct rw_semaphore qps_lock; struct idr qp_idr; struct hisi_qp *qp_array; + struct hisi_qm_poll_data *poll_data; struct mutex mailbox_lock; @@ -312,7 +319,6 @@ struct hisi_qm { u32 error_mask; struct workqueue_struct *wq; - struct work_struct work; struct work_struct rst_work; struct work_struct cmd_process; -- cgit v1.2.3 From fa9c562f9735d24c3253747eb21f3f0c0f6de48e Mon Sep 17 00:00:00 2001 From: Ong Boon Leong Date: Wed, 15 Jun 2022 16:39:04 +0800 Subject: net: make xpcs_do_config to accept advertising for pcs-xpcs and sja1105 xpcs_config() has 'advertising' input that is required for C37 1000BASE-X AN in later patch series. So, we prepare xpcs_do_config() for it. For sja1105, xpcs_do_config() is used for xpcs configuration without depending on advertising input, so set to NULL. Reported-by: kernel test robot Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- include/linux/pcs/pcs-xpcs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 266eb26fb029..37eb97cc2283 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -30,7 +30,7 @@ int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode, phy_interface_t interface, int speed, int duplex); int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, - unsigned int mode); + unsigned int mode, const unsigned long *advertising); void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable); -- cgit v1.2.3 From b47aec885bcd672ebca2108a8b7e9ce3e3982775 Mon Sep 17 00:00:00 2001 From: Ong Boon Leong Date: Wed, 15 Jun 2022 16:39:06 +0800 Subject: net: pcs: xpcs: add CL37 1000BASE-X AN support For CL37 1000BASE-X AN, DW xPCS does not support C22 method but offers C45 vendor-specific MII MMD for programming. We also add the ability to disable Autoneg (through ethtool for certain network switch that supports 1000BASE-X (1000Mbps and Full-Duplex) but not Autoneg capability. v4: Fixes to comment from Russell King. Thanks! https://patchwork.kernel.org/comment/24894239/ Make xpcs_modify_changed() as private, change to use mdiodev_modify_changed() for cleaner code. v3: Fixes to issues spotted by Russell King. Thanks! https://patchwork.kernel.org/comment/24890210/ Use phylink_mii_c22_pcs_decode_state(), remove unnecessary interrupt clearing and skip speed & duplex setting if AN is enabled. v2: Fixes to issues spotted by Russell King in v1. Thanks! https://patchwork.kernel.org/comment/24826650/ Use phylink_mii_c22_pcs_encode_advertisement() and implement C45 MII ADV handling since IP only support C45 access. Tested-by: Emilio Riva Signed-off-by: Ong Boon Leong Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/pcs/pcs-xpcs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 37eb97cc2283..d2da1e0b4a92 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -17,6 +17,7 @@ #define DW_AN_C73 1 #define DW_AN_C37_SGMII 2 #define DW_2500BASEX 3 +#define DW_AN_C37_1000BASEX 4 struct xpcs_id; -- cgit v1.2.3 From d0804085c5a7feb557bd3219777b51f5598dfdc6 Mon Sep 17 00:00:00 2001 From: Moudy Ho Date: Fri, 10 Jun 2022 14:34:18 +0800 Subject: soc: mediatek: mutex: add common interface for modules setting In order to allow multiple modules to operate MUTEX hardware through a common interfrace, two flexible indexes "mtk_mutex_mod_index" and "mtk_mutex_sof_index" need to be added to replace original component ID so that like DDP and MDP can add their own MOD table or SOF settings independently. In addition, 2 generic interface "mtk_mutex_write_mod" and "mtk_mutex_write_sof" have been added, which is expected to replace the "mtk_mutex_add_comp" and "mtk_mutex_remove_comp" pair originally dedicated to DDP in the future. Signed-off-by: Moudy Ho Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Rex-BC Chen Reviewed-by: CK Hu Link: https://lore.kernel.org/r/20220610063424.7800-2-moudy.ho@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/mtk-mutex.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk-mutex.h b/include/linux/soc/mediatek/mtk-mutex.h index 6fe4ffbde290..2ddab9d2b85d 100644 --- a/include/linux/soc/mediatek/mtk-mutex.h +++ b/include/linux/soc/mediatek/mtk-mutex.h @@ -10,6 +10,26 @@ struct regmap; struct device; struct mtk_mutex; +enum mtk_mutex_mod_index { + /* MDP table index */ + MUTEX_MOD_IDX_MDP_RDMA0, + MUTEX_MOD_IDX_MDP_RSZ0, + MUTEX_MOD_IDX_MDP_RSZ1, + MUTEX_MOD_IDX_MDP_TDSHP0, + MUTEX_MOD_IDX_MDP_WROT0, + MUTEX_MOD_IDX_MDP_WDMA, + MUTEX_MOD_IDX_MDP_AAL0, + MUTEX_MOD_IDX_MDP_CCORR0, + + MUTEX_MOD_IDX_MAX /* ALWAYS keep at the end */ +}; + +enum mtk_mutex_sof_index { + MUTEX_SOF_IDX_SINGLE_MODE, + + MUTEX_SOF_IDX_MAX /* ALWAYS keep at the end */ +}; + struct mtk_mutex *mtk_mutex_get(struct device *dev); int mtk_mutex_prepare(struct mtk_mutex *mutex); void mtk_mutex_add_comp(struct mtk_mutex *mutex, @@ -22,5 +42,10 @@ void mtk_mutex_unprepare(struct mtk_mutex *mutex); void mtk_mutex_put(struct mtk_mutex *mutex); void mtk_mutex_acquire(struct mtk_mutex *mutex); void mtk_mutex_release(struct mtk_mutex *mutex); +int mtk_mutex_write_mod(struct mtk_mutex *mutex, + enum mtk_mutex_mod_index idx, + bool clear); +int mtk_mutex_write_sof(struct mtk_mutex *mutex, + enum mtk_mutex_sof_index idx); #endif /* MTK_MUTEX_H */ -- cgit v1.2.3 From e5758850c2ea448dd750a280d128a3590d68b899 Mon Sep 17 00:00:00 2001 From: Moudy Ho Date: Fri, 10 Jun 2022 14:34:23 +0800 Subject: soc: mediatek: mutex: add functions that operate registers by CMDQ Due to HW limitations, MDP3 is necessary to enable MUTEX in each frame for SOF triggering and cooperate with CMDQ control to reduce the amount of interrupts generated(also, reduce frame latency). In response to the above situation, a new interface "mtk_mutex_enable_by_cmdq" has been added to achieve the purpose. Signed-off-by: Moudy Ho Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Rex-BC Chen Reviewed-by: CK Hu Link: https://lore.kernel.org/r/20220610063424.7800-7-moudy.ho@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/mtk-mutex.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk-mutex.h b/include/linux/soc/mediatek/mtk-mutex.h index 2ddab9d2b85d..a0f4f51a3b45 100644 --- a/include/linux/soc/mediatek/mtk-mutex.h +++ b/include/linux/soc/mediatek/mtk-mutex.h @@ -35,6 +35,8 @@ int mtk_mutex_prepare(struct mtk_mutex *mutex); void mtk_mutex_add_comp(struct mtk_mutex *mutex, enum mtk_ddp_comp_id id); void mtk_mutex_enable(struct mtk_mutex *mutex); +int mtk_mutex_enable_by_cmdq(struct mtk_mutex *mutex, + void *pkt); void mtk_mutex_disable(struct mtk_mutex *mutex); void mtk_mutex_remove_comp(struct mtk_mutex *mutex, enum mtk_ddp_comp_id id); -- cgit v1.2.3 From dc368e1c658e4f478a45e8d1d5b0c8392ca87506 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Thu, 16 Jun 2022 15:54:07 -0700 Subject: bpf: Fix non-static bpf_func_proto struct definitions This patch does two things: 1) Marks the dynptr bpf_func_proto structs that were added in [1] as static, as pointed out by the kernel test robot in [2]. 2) There are some bpf_func_proto structs marked as extern which can instead be statically defined. [1] https://lore.kernel.org/bpf/20220523210712.3641569-1-joannelkoong@gmail.com/ [2] https://lore.kernel.org/bpf/62ab89f2.Pko7sI08RAKdF8R6%25lkp@intel.com/ Reported-by: kernel test robot Signed-off-by: Joanne Koong Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220616225407.1878436-1-joannelkoong@gmail.com --- include/linux/bpf.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a94531971a7a..0edd7d2c0064 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2363,12 +2363,9 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; -extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; -extern const struct bpf_func_proto bpf_strncmp_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; -extern const struct bpf_func_proto bpf_kptr_xchg_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); -- cgit v1.2.3 From 9a2139c2912ea64e288749c21452930dc752d4fd Mon Sep 17 00:00:00 2001 From: Caleb Connolly Date: Fri, 29 Apr 2022 23:08:56 +0100 Subject: spmi: add a helper to look up an SPMI device from a device node The helper function spmi_device_from_of() takes a device node and returns the SPMI device associated with it. This is like of_find_device_by_node but for SPMI devices. Signed-off-by: Caleb Connolly Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/20220429220904.137297-2-caleb.connolly@linaro.org Signed-off-by: Jonathan Cameron --- include/linux/spmi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spmi.h b/include/linux/spmi.h index 729bcbf9f5ad..eac1956a8727 100644 --- a/include/linux/spmi.h +++ b/include/linux/spmi.h @@ -164,6 +164,9 @@ static inline void spmi_driver_unregister(struct spmi_driver *sdrv) module_driver(__spmi_driver, spmi_driver_register, \ spmi_driver_unregister) +struct device_node; + +struct spmi_device *spmi_device_from_of(struct device_node *np); int spmi_register_read(struct spmi_device *sdev, u8 addr, u8 *buf); int spmi_ext_register_read(struct spmi_device *sdev, u8 addr, u8 *buf, size_t len); -- cgit v1.2.3 From 4a08069461ac9822855116d24fbf11d5fb223cd1 Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Tue, 7 Jun 2022 18:39:18 +0000 Subject: iio: trigger: warn about non-registered iio trigger getting attempt As a part of patch series about wrong trigger register() and get() calls order in the some IIO drivers trigger initialization path: https://lore.kernel.org/all/20220524181150.9240-1-ddrokosov@sberdevices.ru/ runtime WARN_ONCE() is added to alarm IIO driver authors who make such a mistake. When an IIO driver allocates a new IIO trigger, it should register it before calling the get() operation. In other words, each IIO driver must abide by IIO trigger alloc()/register()/get() calls order. Signed-off-by: Dmitry Rokosov Link: https://lore.kernel.org/r/20220607183907.20017-1-ddrokosov@sberdevices.ru Signed-off-by: Jonathan Cameron --- include/linux/iio/trigger.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index 4c69b144677b..03b1d6863436 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -93,6 +93,11 @@ static inline void iio_trigger_put(struct iio_trigger *trig) static inline struct iio_trigger *iio_trigger_get(struct iio_trigger *trig) { get_device(&trig->dev); + + WARN_ONCE(list_empty(&trig->list), + "Getting non-registered iio trigger %s is prohibited\n", + trig->name); + __module_get(trig->owner); return trig; -- cgit v1.2.3 From bdb6cfe7512f7a214815a3092f0be50963dcacbc Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 18 Jun 2022 11:28:32 +0100 Subject: net: mii: add mii_bmcr_encode_fixed() Add a function to encode a fixed speed/duplex to a BMCR value. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/mii.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 5ee13083cec7..d5a959ce4877 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -545,4 +545,39 @@ static inline u8 mii_resolve_flowctrl_fdx(u16 lcladv, u16 rmtadv) return cap; } +/** + * mii_bmcr_encode_fixed - encode fixed speed/duplex settings to a BMCR value + * @speed: a SPEED_* value + * @duplex: a DUPLEX_* value + * + * Encode the speed and duplex to a BMCR value. 2500, 1000, 100 and 10 Mbps are + * supported. 2500Mbps is encoded to 1000Mbps. Other speeds are encoded as 10 + * Mbps. Unknown duplex values are encoded to half-duplex. + */ +static inline u16 mii_bmcr_encode_fixed(int speed, int duplex) +{ + u16 bmcr; + + switch (speed) { + case SPEED_2500: + case SPEED_1000: + bmcr = BMCR_SPEED1000; + break; + + case SPEED_100: + bmcr = BMCR_SPEED100; + break; + + case SPEED_10: + default: + bmcr = BMCR_SPEED10; + break; + } + + if (duplex == DUPLEX_FULL) + bmcr |= BMCR_FULLDPLX; + + return bmcr; +} + #endif /* __LINUX_MII_H__ */ -- cgit v1.2.3 From 2e0aee8f0a22c60a1ae0876f7233e70ad9d026b8 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Thu, 16 Jun 2022 23:51:49 +0300 Subject: ata: make ata_port::fastdrain_cnt *unsigned int* *unsigned long* ata_port::fastdrain_cnt (64-bit value in a 64-bit kernel) is always assigned from the 32-bit *unsigned int* variables, thus could also be made just *unsigned int*... Signed-off-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index a8bc88b4fe07..0269ff114f5a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -847,7 +847,7 @@ struct ata_port { enum ata_lpm_policy target_lpm_policy; struct timer_list fastdrain_timer; - unsigned long fastdrain_cnt; + unsigned int fastdrain_cnt; async_cookie_t cookie; -- cgit v1.2.3 From 8e1c69149f27189cff93a0cfe9402e576d89ce29 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 29 Apr 2022 01:04:10 +0000 Subject: KVM: Avoid pfn_to_page() and vice versa when releasing pages Invert the order of KVM's page/pfn release helpers so that the "inner" helper operates on a page instead of a pfn. As pointed out by Linus[*], converting between struct page and a pfn isn't necessarily cheap, and that's not even counting the overhead of is_error_noslot_pfn() and kvm_is_reserved_pfn(). Even if the checks were dirt cheap, there's no reason to convert from a page to a pfn and back to a page, just to mark the page dirty/accessed or to put a reference to the page. Opportunistically drop a stale declaration of kvm_set_page_accessed() from kvm_host.h (there was no implementation). No functional change intended. [*] https://lore.kernel.org/all/CAHk-=wifQimj2d6npq-wCi5onYPjzQg4vyO4tFcPJJZr268cRw@mail.gmail.com Signed-off-by: Sean Christopherson Message-Id: <20220429010416.2788472-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c20f2d55840c..af4b5c0bf04e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1139,7 +1139,6 @@ unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, bool *writable); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); -void kvm_set_page_accessed(struct page *page); kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, -- cgit v1.2.3 From b1624f99aa8fedafcabf1b92fa51ed88dde14acb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 29 Apr 2022 01:04:13 +0000 Subject: KVM: Remove kvm_vcpu_gfn_to_page() and kvm_vcpu_gpa_to_page() Drop helpers to convert a gfn/gpa to a 'struct page' in the context of a vCPU. KVM doesn't require that guests be backed by 'struct page' memory, thus any use of helpers that assume 'struct page' is bound to be flawed, as was the case for the recently removed last user in x86's nested VMX. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20220429010416.2788472-8-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index af4b5c0bf04e..4334789409c0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1230,7 +1230,6 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn); kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map); -struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty); unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); @@ -1718,12 +1717,6 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn) return (hpa_t)pfn << PAGE_SHIFT; } -static inline struct page *kvm_vcpu_gpa_to_page(struct kvm_vcpu *vcpu, - gpa_t gpa) -{ - return kvm_vcpu_gfn_to_page(vcpu, gpa_to_gfn(gpa)); -} - static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) { unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); -- cgit v1.2.3 From 284dc49307738d2a897fd375431f741213cd0f27 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 29 Apr 2022 01:04:14 +0000 Subject: KVM: Take a 'struct page', not a pfn in kvm_is_zone_device_page() Operate on a 'struct page' instead of a pfn when checking if a page is a ZONE_DEVICE page, and rename the helper accordingly. Generally speaking, KVM doesn't actually care about ZONE_DEVICE memory, i.e. shouldn't do anything special for ZONE_DEVICE memory. Rather, KVM wants to treat ZONE_DEVICE memory like regular memory, and the need to identify ZONE_DEVICE memory only arises as an exception to PG_reserved pages. In other words, KVM should only ever check for ZONE_DEVICE memory after KVM has already verified that there is a struct page associated with the pfn. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20220429010416.2788472-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4334789409c0..6fb433ac2ba1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1571,7 +1571,7 @@ void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); bool kvm_is_reserved_pfn(kvm_pfn_t pfn); -bool kvm_is_zone_device_pfn(kvm_pfn_t pfn); +bool kvm_is_zone_device_page(struct page *page); struct kvm_irq_ack_notifier { struct hlist_node link; -- cgit v1.2.3 From b14b2690c50e02145bb867dfcde8845eb17aa8a4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 29 Apr 2022 01:04:15 +0000 Subject: KVM: Rename/refactor kvm_is_reserved_pfn() to kvm_pfn_to_refcounted_page() Rename and refactor kvm_is_reserved_pfn() to kvm_pfn_to_refcounted_page() to better reflect what KVM is actually checking, and to eliminate extra pfn_to_page() lookups. The kvm_release_pfn_*() an kvm_try_get_pfn() helpers in particular benefit from "refouncted" nomenclature, as it's not all that obvious why KVM needs to get/put refcounts for some PG_reserved pages (ZERO_PAGE and ZONE_DEVICE). Add a comment to call out that the list of exceptions to PG_reserved is all but guaranteed to be incomplete. The list has mostly been compiled by people throwing noodles at KVM and finding out they stick a little too well, e.g. the ZERO_PAGE's refcount overflowed and ZONE_DEVICE pages didn't get freed. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20220429010416.2788472-10-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6fb433ac2ba1..a2bbdf3ab086 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1570,7 +1570,7 @@ void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); -bool kvm_is_reserved_pfn(kvm_pfn_t pfn); +struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn); bool kvm_is_zone_device_page(struct page *page); struct kvm_irq_ack_notifier { -- cgit v1.2.3 From 7b0a0e3c3a88260b6fcb017e49f198463aa62ed1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Apr 2022 16:50:57 +0200 Subject: wifi: cfg80211: do some rework towards MLO link APIs In order to support multi-link operation with multiple links, start adding some APIs. The notable addition here is to have the link ID in a new nl80211 attribute, that will be used to differentiate the links in many nl80211 operations. So far, this patch adds the netlink NL80211_ATTR_MLO_LINK_ID attribute (as well as the NL80211_ATTR_MLO_LINKS attribute) and plugs it through the system in some places, checking the validity etc. along with other infrastructure needed for it. For now, I've decided to include only the over-the-air link ID in the API. I know we discussed that we eventually need to have to have other ways of identifying a link, but for local AP mode and auth/assoc commands as well as set_key etc. we'll use the OTA ID. Also included in this patch is some refactoring of the data structures in struct wireless_dev, splitting for the first time the data into type dependent pieces, to make reasoning about these things easier. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 5c65ae6b8154..e15771965916 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4376,4 +4376,7 @@ enum ieee80211_range_params_max_total_ltf { IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_UNSPECIFIED, }; +/* multi-link device */ +#define IEEE80211_MLD_MAX_NUM_LINKS 15 + #endif /* LINUX_IEEE80211_H */ -- cgit v1.2.3 From 0f48b8b88aa9ed7b65d7cb55dbc57ec914ddada1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 31 May 2022 14:03:38 +0200 Subject: wifi: ieee80211: add definitions for multi-link element Add the definitions necessary to build and parse some of the multi-link element, the per-STA profile isn't fully included. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 222 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e15771965916..870f659087d6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4379,4 +4379,226 @@ enum ieee80211_range_params_max_total_ltf { /* multi-link device */ #define IEEE80211_MLD_MAX_NUM_LINKS 15 +#define IEEE80211_ML_CONTROL_TYPE 0x0007 +#define IEEE80211_ML_CONTROL_TYPE_BASIC 0 +#define IEEE80211_ML_CONTROL_TYPE_PREQ 1 +#define IEEE80211_ML_CONTROL_TYPE_RECONF 2 +#define IEEE80211_ML_CONTROL_TYPE_TDLS 3 +#define IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS 4 +#define IEEE80211_ML_CONTROL_PRESENCE_MASK 0xfff0 + +struct ieee80211_multi_link_elem { + __le16 control; + u8 variable[]; +} __packed; + +#define IEEE80211_MLC_BASIC_PRES_LINK_ID 0x0010 +#define IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT 0x0020 +#define IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY 0x0040 +#define IEEE80211_MLC_BASIC_PRES_EML_CAPA 0x0080 +#define IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP 0x0100 +#define IEEE80211_MLC_BASIC_PRES_MLD_ID 0x0200 + +#define IEEE80211_MED_SYNC_DELAY_DURATION 0x00ff +#define IEEE80211_MED_SYNC_DELAY_SYNC_OFDM_ED_THRESH 0x0f00 +#define IEEE80211_MED_SYNC_DELAY_SYNC_MAX_NUM_TXOPS 0xf000 + +#define IEEE80211_EML_CAP_EMLSR_SUPP 0x0001 +#define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY 0x000e +#define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_0US 0 +#define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_32US 1 +#define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_64US 2 +#define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_128US 3 +#define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_256US 4 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY 0x0070 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_0US 0 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_16US 1 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_32US 2 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_64US 3 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_128US 4 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_256US 5 +#define IEEE80211_EML_CAP_EMLMR_SUPPORT 0x0080 +#define IEEE80211_EML_CAP_EMLMR_DELAY 0x0700 +#define IEEE80211_EML_CAP_EMLMR_DELAY_0US 0 +#define IEEE80211_EML_CAP_EMLMR_DELAY_32US 1 +#define IEEE80211_EML_CAP_EMLMR_DELAY_64US 2 +#define IEEE80211_EML_CAP_EMLMR_DELAY_128US 3 +#define IEEE80211_EML_CAP_EMLMR_DELAY_256US 4 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT 0x7800 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_0 0 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128US 1 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_256US 2 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_512US 3 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_1TU 4 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_2TU 5 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_4TU 6 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_8TU 7 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_16TU 8 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_32TU 9 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_64TU 10 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128TU 11 + +#define IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS 0x000f +#define IEEE80211_MLD_CAP_OP_SRS_SUPPORT 0x0010 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP 0x0060 +#define IEEE80211_MLD_CAP_OP_FREQ_SEP_TYPE_IND 0x0f80 +#define IEEE80211_MLD_CAP_OP_AAR_SUPPORT 0x1000 + +struct ieee80211_mle_basic_common_info { + u8 len; + u8 mld_mac_addr[ETH_ALEN]; + u8 variable[]; +} __packed; + +#define IEEE80211_MLC_PREQ_PRES_MLD_ID 0x0010 + +struct ieee80211_mle_preq_common_info { + u8 len; + u8 variable[]; +} __packed; + +#define IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR 0x0010 + +/* no fixed fields in RECONF */ + +struct ieee80211_mle_tdls_common_info { + u8 len; + u8 ap_mld_mac_addr[ETH_ALEN]; +} __packed; + +#define IEEE80211_MLC_PRIO_ACCESS_PRES_AP_MLD_MAC_ADDR 0x0010 + +/* no fixed fields in PRIO_ACCESS */ + +/** + * ieee80211_mle_common_size - check multi-link element common size + * @data: multi-link element, must already be checked for size using + * ieee80211_mle_size_ok() + */ +static inline u8 ieee80211_mle_common_size(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + u8 common = 0; + + switch (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE)) { + case IEEE80211_ML_CONTROL_TYPE_BASIC: + common += sizeof(struct ieee80211_mle_basic_common_info); + break; + case IEEE80211_ML_CONTROL_TYPE_PREQ: + common += sizeof(struct ieee80211_mle_preq_common_info); + break; + case IEEE80211_ML_CONTROL_TYPE_RECONF: + if (control & IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR) + common += ETH_ALEN; + return common; + case IEEE80211_ML_CONTROL_TYPE_TDLS: + common += sizeof(struct ieee80211_mle_tdls_common_info); + break; + case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS: + if (control & IEEE80211_MLC_PRIO_ACCESS_PRES_AP_MLD_MAC_ADDR) + common += ETH_ALEN; + return common; + default: + WARN_ON(1); + return 0; + } + + return common + mle->variable[0]; +} + +/** + * ieee80211_mle_size_ok - validate multi-link element size + * @data: pointer to the element data + * @len: length of the containing element + */ +static inline bool ieee80211_mle_size_ok(const u8 *data, u8 len) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u8 fixed = sizeof(*mle); + u8 common = 0; + bool check_common_len = false; + u16 control; + + if (len < fixed) + return false; + + control = le16_to_cpu(mle->control); + + switch (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE)) { + case IEEE80211_ML_CONTROL_TYPE_BASIC: + common += sizeof(struct ieee80211_mle_basic_common_info); + check_common_len = true; + if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_MLD_ID) + common += 1; + break; + case IEEE80211_ML_CONTROL_TYPE_PREQ: + common += sizeof(struct ieee80211_mle_preq_common_info); + if (control & IEEE80211_MLC_PREQ_PRES_MLD_ID) + common += 1; + check_common_len = true; + break; + case IEEE80211_ML_CONTROL_TYPE_RECONF: + if (control & IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR) + common += ETH_ALEN; + break; + case IEEE80211_ML_CONTROL_TYPE_TDLS: + common += sizeof(struct ieee80211_mle_tdls_common_info); + check_common_len = true; + break; + case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS: + if (control & IEEE80211_MLC_PRIO_ACCESS_PRES_AP_MLD_MAC_ADDR) + common += ETH_ALEN; + break; + default: + /* we don't know this type */ + return true; + } + + if (len < fixed + common) + return false; + + if (!check_common_len) + return true; + + /* if present, common length is the first octet there */ + return mle->variable[0] >= common; +} + +enum ieee80211_mle_subelems { + IEEE80211_MLE_SUBELEM_PER_STA_PROFILE = 0, +}; + +#define IEEE80211_MLE_STA_CONTROL_LINK_ID 0x000f +#define IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE 0x0010 +#define IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT 0x0020 +#define IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT 0x0040 +#define IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT 0x0080 +#define IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT 0x0100 +#define IEEE80211_MLE_STA_CONTROL_NSTR_LINK_PAIR_PRESENT 0x0200 +#define IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE 0x0400 +#define IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT 0x0800 + +struct ieee80211_mle_per_sta_profile { + __le16 control; + u8 sta_info_len; + u8 variable[]; +} __packed; + +#define for_each_mle_subelement(_elem, _data, _len) \ + if (ieee80211_mle_size_ok(_data, _len)) \ + for_each_element(_elem, \ + _data + ieee80211_mle_common_size(_data),\ + _len - ieee80211_mle_common_size(_data)) + #endif /* LINUX_IEEE80211_H */ -- cgit v1.2.3 From 965b57b469a589d64d81b1688b38dcb537011bb0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 15 Jun 2022 09:20:12 -0700 Subject: net: Introduce a new proto_ops ->read_skb() Currently both splice() and sockmap use ->read_sock() to read skb from receive queue, but for sockmap we only read one entire skb at a time, so ->read_sock() is too conservative to use. Introduce a new proto_ops ->read_skb() which supports this sematic, with this we can finally pass the ownership of skb to recv actors. For non-TCP protocols, all ->read_sock() can be simply converted to ->read_skb(). Signed-off-by: Cong Wang Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Link: https://lore.kernel.org/bpf/20220615162014.89193-3-xiyou.wangcong@gmail.com --- include/linux/net.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 12093f4db50c..a03485e8cbb2 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -152,6 +152,8 @@ struct module; struct sk_buff; typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, unsigned int, size_t); +typedef int (*skb_read_actor_t)(struct sock *, struct sk_buff *); + struct proto_ops { int family; @@ -214,6 +216,8 @@ struct proto_ops { */ int (*read_sock)(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); + /* This is different from read_sock(), it reads an entire skb at a time. */ + int (*read_skb)(struct sock *sk, skb_read_actor_t recv_actor); int (*sendpage_locked)(struct sock *sk, struct page *page, int offset, size_t size, int flags); int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg, -- cgit v1.2.3 From 414c12385d4741e35d88670c6cc2f40a77809734 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 13 Apr 2022 15:17:25 -0700 Subject: rcu: Provide a get_completed_synchronize_rcu() function It is currently up to the caller to handle stale return values from get_state_synchronize_rcu(). If poll_state_synchronize_rcu() returned true once, a grace period has elapsed, regardless of the fact that counter wrap might cause some future poll_state_synchronize_rcu() invocation to return false. For example, the caller might store a separate flag that indicates whether some previous call to poll_state_synchronize_rcu() determined that the relevant grace period had already ended. This approach works, but it requires extra storage and is easy to get wrong. This commit therefore introduces a get_completed_synchronize_rcu() that returns a cookie that causes poll_state_synchronize_rcu() to always return true. This already-completed cookie can be stored in place of the cookie that previously caused poll_state_synchronize_rcu() to return true. It can also be used to flag a given structure as not having been exposed to readers, and thus not requiring a grace period to elapse. This commit is in preparation for polled expedited grace periods. Link: https://lore.kernel.org/all/20220121142454.1994916-1-bfoster@redhat.com/ Link: https://docs.google.com/document/d/1RNKWW9jQyfjxw2E8dsXVTdvZYh0HnYeSHDKog9jhdN8/edit?usp=sharing Cc: Brian Foster Cc: Dave Chinner Cc: Al Viro Cc: Ian Kent Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 1a32036c918c..7f12daa4708b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -41,6 +41,7 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier_tasks(void); void rcu_barrier_tasks_rude(void); void synchronize_rcu(void); +unsigned long get_completed_synchronize_rcu(void); #ifdef CONFIG_PREEMPT_RCU -- cgit v1.2.3 From 3847b64570b1753e9863a4106aec03f4d68e7b17 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 23 May 2022 20:50:11 -0700 Subject: rcu-tasks: Merge state into .b.need_qs and atomically update This commit gets rid of the task_struct structure's ->trc_reader_checked field, making it instead be a bit within the task_struct structure's existing ->trc_reader_special.b.need_qs field. This commit also atomically loads, stores, and checks the resulting combination of the reader-checked and need-quiescent state flags. This will in turn allow significant simplification of the rcu_tasks_trace_postgp() function as well as elimination of the trc_n_readers_need_end counter in later commits. These changes will in turn simplify later elimination of the RCU Tasks Trace scan of the task list, which will make RCU Tasks Trace grace periods less CPU-intensive. [ paulmck: Apply kernel test robot feedback. ] Signed-off-by: Paul E. McKenney Cc: Neeraj Upadhyay Cc: Eric Dumazet Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Martin KaFai Lau Cc: KP Singh --- include/linux/rcupdate.h | 18 +++++++++++------- include/linux/sched.h | 1 - 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 1a32036c918c..1e728d544fc1 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -169,13 +169,17 @@ void synchronize_rcu_tasks(void); # endif # ifdef CONFIG_TASKS_TRACE_RCU -# define rcu_tasks_trace_qs(t) \ - do { \ - if (!likely(READ_ONCE((t)->trc_reader_checked)) && \ - !unlikely(READ_ONCE((t)->trc_reader_nesting))) { \ - smp_store_release(&(t)->trc_reader_checked, true); \ - smp_mb(); /* Readers partitioned by store. */ \ - } \ +// Bits for ->trc_reader_special.b.need_qs field. +#define TRC_NEED_QS 0x1 // Task needs a quiescent state. +#define TRC_NEED_QS_CHECKED 0x2 // Task has been checked for needing quiescent state. + +u8 rcu_trc_cmpxchg_need_qs(struct task_struct *t, u8 old, u8 new); + +# define rcu_tasks_trace_qs(t) \ + do { \ + if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) && \ + likely(!READ_ONCE((t)->trc_reader_nesting))) \ + rcu_trc_cmpxchg_need_qs((t), 0, TRC_NEED_QS_CHECKED); \ } while (0) # else # define rcu_tasks_trace_qs(t) do { } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index c46f3a63b758..e6eb5871593e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -843,7 +843,6 @@ struct task_struct { int trc_reader_nesting; int trc_ipi_to_cpu; union rcu_special trc_reader_special; - bool trc_reader_checked; struct list_head trc_holdout_list; #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ -- cgit v1.2.3 From 434c9eefb959c36331a93617ea95df903469b99f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 May 2022 17:56:16 -0700 Subject: rcu-tasks: Add data structures for lightweight grace periods This commit adds fields to task_struct and to rcu_tasks_percpu that will be used to avoid the task-list scan for RCU Tasks Trace grace periods, and also initializes these fields. Signed-off-by: Paul E. McKenney Cc: Neeraj Upadhyay Cc: Eric Dumazet Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Martin KaFai Lau Cc: KP Singh --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index e6eb5871593e..b88caf54e168 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -844,6 +844,8 @@ struct task_struct { int trc_ipi_to_cpu; union rcu_special trc_reader_special; struct list_head trc_holdout_list; + struct list_head trc_blkd_node; + int trc_blkd_cpu; #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ struct sched_info sched_info; -- cgit v1.2.3 From 0356d4e66214569de674ab2684f2e0b440a466ab Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 17 May 2022 11:30:32 -0700 Subject: rcu-tasks: Track blocked RCU Tasks Trace readers This commit places any task that has ever blocked within its current RCU Tasks Trace read-side critical section on a per-CPU list within the rcu_tasks_percpu structure. Tasks are removed from this list when they exit by the exit_tasks_rcu_finish_trace() function. The purpose of this commit is to provide the information needed to eliminate the current scan of the full task list. This commit offsets the INT_MIN value for ->trc_reader_nesting with the new nesting level in order to avoid queueing tasks that are exiting their read-side critical sections. [ paulmck: Apply kernel test robot feedback. ] [ paulmck: Apply feedback from syzbot+9bb26e7c5e8e4fa7e641@syzkaller.appspotmail.com ] Signed-off-by: Paul E. McKenney Tested-by: syzbot Tested-by: "Zhang, Qiang1" Cc: Peter Zijlstra Cc: Neeraj Upadhyay Cc: Eric Dumazet Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Martin KaFai Lau Cc: KP Singh --- include/linux/rcupdate.h | 11 +++++++++-- include/linux/rcupdate_trace.h | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 1e728d544fc1..ebdfeead44e5 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -174,12 +174,19 @@ void synchronize_rcu_tasks(void); #define TRC_NEED_QS_CHECKED 0x2 // Task has been checked for needing quiescent state. u8 rcu_trc_cmpxchg_need_qs(struct task_struct *t, u8 old, u8 new); +void rcu_tasks_trace_qs_blkd(struct task_struct *t); # define rcu_tasks_trace_qs(t) \ do { \ + int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \ + \ if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) && \ - likely(!READ_ONCE((t)->trc_reader_nesting))) \ + likely(!___rttq_nesting)) { \ rcu_trc_cmpxchg_need_qs((t), 0, TRC_NEED_QS_CHECKED); \ + } else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \ + !READ_ONCE((t)->trc_reader_special.b.blocked)) { \ + rcu_tasks_trace_qs_blkd(t); \ + } \ } while (0) # else # define rcu_tasks_trace_qs(t) do { } while (0) @@ -188,7 +195,7 @@ u8 rcu_trc_cmpxchg_need_qs(struct task_struct *t, u8 old, u8 new); #define rcu_tasks_qs(t, preempt) \ do { \ rcu_tasks_classic_qs((t), (preempt)); \ - rcu_tasks_trace_qs((t)); \ + rcu_tasks_trace_qs(t); \ } while (0) # ifdef CONFIG_TASKS_RUDE_RCU diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index 6f9c35817398..9bc8cbb33340 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -75,7 +75,7 @@ static inline void rcu_read_unlock_trace(void) nesting = READ_ONCE(t->trc_reader_nesting) - 1; barrier(); // Critical section before disabling. // Disable IPI-based setting of .need_qs. - WRITE_ONCE(t->trc_reader_nesting, INT_MIN); + WRITE_ONCE(t->trc_reader_nesting, INT_MIN + nesting); if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) { WRITE_ONCE(t->trc_reader_nesting, nesting); return; // We assume shallow reader nesting. -- cgit v1.2.3 From 5f8a62af527ad8a615d68889d816b72c3f5f227f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:18 +0200 Subject: context_tracking: Remove unused context_tracking_in_user() This function is not used and CT_WARN_ON() coupled with ct_state() is the preferred way to assert context tracking state values. Reported-by: Nicolas Saenz Julienne Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/context_tracking_state.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index ae1e63e26947..edc7b46376a6 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -41,12 +41,7 @@ static inline bool context_tracking_enabled_this_cpu(void) return context_tracking_enabled() && __this_cpu_read(context_tracking.active); } -static __always_inline bool context_tracking_in_user(void) -{ - return __this_cpu_read(context_tracking.state) == CONTEXT_USER; -} #else -static __always_inline bool context_tracking_in_user(void) { return false; } static __always_inline bool context_tracking_enabled(void) { return false; } static __always_inline bool context_tracking_enabled_cpu(int cpu) { return false; } static __always_inline bool context_tracking_enabled_this_cpu(void) { return false; } -- cgit v1.2.3 From 1ade23711971b0eececf0d7fedc29d3c1d2fce01 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Jun 2022 02:53:42 +0300 Subject: bpf: Inline calls to bpf_loop when callback is known Calls to `bpf_loop` are replaced with direct loops to avoid indirection. E.g. the following: bpf_loop(10, foo, NULL, 0); Is replaced by equivalent of the following: for (int i = 0; i < 10; ++i) foo(i, NULL); This transformation could be applied when: - callback is known and does not change during program execution; - flags passed to `bpf_loop` are always zero. Inlining logic works as follows: - During execution simulation function `update_loop_inline_state` tracks the following information for each `bpf_loop` call instruction: - is callback known and constant? - are flags constant and zero? - Function `optimize_bpf_loop` increases stack depth for functions where `bpf_loop` calls can be inlined and invokes `inline_bpf_loop` to apply the inlining. The additional stack space is used to spill registers R6, R7 and R8. These registers are used as loop counter, loop maximal bound and callback context parameter; Measurements using `benchs/run_bench_bpf_loop.sh` inside QEMU / KVM on i7-4710HQ CPU show a drop in latency from 14 ns/op to 2 ns/op. Signed-off-by: Eduard Zingerman Acked-by: Song Liu Link: https://lore.kernel.org/r/20220620235344.569325-4-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +++ include/linux/bpf_verifier.h | 12 ++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0edd7d2c0064..d05e1495a06e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1286,6 +1286,9 @@ struct bpf_array { #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 33 +/* Maximum number of loops for bpf_loop */ +#define BPF_MAX_LOOPS BIT(23) + #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ BPF_F_RDONLY_PROG | \ BPF_F_WRONLY | \ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3930c963fa67..81b19669efba 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -344,6 +344,14 @@ struct bpf_verifier_state_list { int miss_cnt, hit_cnt; }; +struct bpf_loop_inline_state { + int initialized:1; /* set to true upon first entry */ + int fit_for_inline:1; /* true if callback function is the same + * at each call and flags are always zero + */ + u32 callback_subprogno; /* valid when fit_for_inline is true */ +}; + /* Possible states for alu_state member. */ #define BPF_ALU_SANITIZE_SRC (1U << 0) #define BPF_ALU_SANITIZE_DST (1U << 1) @@ -373,6 +381,10 @@ struct bpf_insn_aux_data { u32 mem_size; /* mem_size for non-struct typed var */ }; } btf_var; + /* if instruction is a call to bpf_loop this field tracks + * the state of the relevant registers to make decision about inlining + */ + struct bpf_loop_inline_state loop_inline_state; }; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ -- cgit v1.2.3 From f9aefd6b2aa38b9787d2705f0f1161dfd23cdb8f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Jun 2022 02:30:17 -0700 Subject: net: warn if mac header was not set Make sure skb_mac_header(), skb_mac_offset() and skb_mac_header_len() uses are not fooled if the mac header has not been set. These checks are enabled if CONFIG_DEBUG_NET=y This commit will likely expose existing bugs in linux networking stacks. Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20220620093017.3366713-1-eric.dumazet@gmail.com Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 82edf0359ab3..cd4a8268894a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2763,8 +2763,14 @@ static inline void skb_set_network_header(struct sk_buff *skb, const int offset) skb->network_header += offset; } +static inline int skb_mac_header_was_set(const struct sk_buff *skb) +{ + return skb->mac_header != (typeof(skb->mac_header))~0U; +} + static inline unsigned char *skb_mac_header(const struct sk_buff *skb) { + DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb)); return skb->head + skb->mac_header; } @@ -2775,14 +2781,10 @@ static inline int skb_mac_offset(const struct sk_buff *skb) static inline u32 skb_mac_header_len(const struct sk_buff *skb) { + DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb)); return skb->network_header - skb->mac_header; } -static inline int skb_mac_header_was_set(const struct sk_buff *skb) -{ - return skb->mac_header != (typeof(skb->mac_header))~0U; -} - static inline void skb_unset_mac_header(struct sk_buff *skb) { skb->mac_header = (typeof(skb->mac_header))~0U; -- cgit v1.2.3 From a37599ebfb656c2af4ca119de556eba29b6926d6 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Wed, 15 Jun 2022 17:20:18 +0000 Subject: usb: typec: mux: Add CONFIG guards for functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some drivers that can use the Type C mux API, but don't have to. Introduce CONFIG guards for the mux functions so that drivers can include the header file and not run into compilation errors on systems which don't have CONFIG_TYPEC enabled. When CONFIG_TYPEC is not enabled, the Type C mux functions will be stub versions of the original calls. Reported-by: kernel test robot Reviewed-by: Nícolas F. R. A. Prado Reviewed-by: Heikki Krogerus Reviewed-by: AngeloGioacchino Del Regno Tested-by: Nícolas F. R. A. Prado Signed-off-by: Prashant Malani Link: https://lore.kernel.org/r/20220615172129.1314056-3-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_mux.h | 44 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/typec_mux.h b/include/linux/usb/typec_mux.h index ee57781dcf28..9292f0e07846 100644 --- a/include/linux/usb/typec_mux.h +++ b/include/linux/usb/typec_mux.h @@ -58,17 +58,13 @@ struct typec_mux_desc { void *drvdata; }; +#if IS_ENABLED(CONFIG_TYPEC) + struct typec_mux *fwnode_typec_mux_get(struct fwnode_handle *fwnode, const struct typec_altmode_desc *desc); void typec_mux_put(struct typec_mux *mux); int typec_mux_set(struct typec_mux *mux, struct typec_mux_state *state); -static inline struct typec_mux * -typec_mux_get(struct device *dev, const struct typec_altmode_desc *desc) -{ - return fwnode_typec_mux_get(dev_fwnode(dev), desc); -} - struct typec_mux_dev * typec_mux_register(struct device *parent, const struct typec_mux_desc *desc); void typec_mux_unregister(struct typec_mux_dev *mux); @@ -76,4 +72,40 @@ void typec_mux_unregister(struct typec_mux_dev *mux); void typec_mux_set_drvdata(struct typec_mux_dev *mux, void *data); void *typec_mux_get_drvdata(struct typec_mux_dev *mux); +#else + +static inline struct typec_mux *fwnode_typec_mux_get(struct fwnode_handle *fwnode, + const struct typec_altmode_desc *desc) +{ + return NULL; +} + +static inline void typec_mux_put(struct typec_mux *mux) {} + +static inline int typec_mux_set(struct typec_mux *mux, struct typec_mux_state *state) +{ + return 0; +} + +static inline struct typec_mux_dev * +typec_mux_register(struct device *parent, const struct typec_mux_desc *desc) +{ + return ERR_PTR(-EOPNOTSUPP); +} +static inline void typec_mux_unregister(struct typec_mux_dev *mux) {} + +static inline void typec_mux_set_drvdata(struct typec_mux_dev *mux, void *data) {} +static inline void *typec_mux_get_drvdata(struct typec_mux_dev *mux) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +#endif /* CONFIG_TYPEC */ + +static inline struct typec_mux * +typec_mux_get(struct device *dev, const struct typec_altmode_desc *desc) +{ + return fwnode_typec_mux_get(dev_fwnode(dev), desc); +} + #endif /* __USB_TYPEC_MUX */ -- cgit v1.2.3 From 95acd8817e66d031d2e6ee7def3f1e1874819317 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Fri, 17 Jun 2022 12:57:34 +0200 Subject: bpf, x64: Add predicate for bpf2bpf with tailcalls support in JIT The BPF core/verifier is hard-coded to permit mixing bpf2bpf and tail calls for only x86-64. Change the logic to instead rely on a new weak function 'bool bpf_jit_supports_subprog_tailcalls(void)', which a capable JIT backend can override. Update the x86-64 eBPF JIT to reflect this. Signed-off-by: Tony Ambardar [jakub: drop MIPS bits and tweak patch subject] Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220617105735.733938-2-jakub@cloudflare.com --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index d0cbb31b1b4d..4c1a8b247545 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -914,6 +914,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); +bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_helper_changes_pkt_data(void *func); -- cgit v1.2.3 From 77515ebaf01920e2db49e04672ef669a7c2907f2 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Tue, 7 Jun 2022 11:26:25 +0800 Subject: devcoredump: remove the useless gfp_t parameter in dev_coredumpv and dev_coredumpm The dev_coredumpv() and dev_coredumpm() could not be used in atomic context, because they call kvasprintf_const() and kstrdup() with GFP_KERNEL parameter. The process is shown below: dev_coredumpv(.., gfp_t gfp) dev_coredumpm(.., gfp_t gfp) dev_set_name kobject_set_name_vargs kvasprintf_const(GFP_KERNEL, ...); //may sleep kstrdup(s, GFP_KERNEL); //may sleep This patch removes gfp_t parameter of dev_coredumpv() and dev_coredumpm() and changes the gfp_t parameter of kzalloc() in dev_coredumpm() to GFP_KERNEL in order to show they could not be used in atomic context. Fixes: 833c95456a70 ("device coredump: add new device coredump class") Reviewed-by: Brian Norris Reviewed-by: Johannes Berg Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/df72af3b1862bac7d8e793d1f3931857d3779dfd.1654569290.git.duoming@zju.edu.cn Signed-off-by: Greg Kroah-Hartman --- include/linux/devcoredump.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h index c008169ed2c6..c7d840d824c3 100644 --- a/include/linux/devcoredump.h +++ b/include/linux/devcoredump.h @@ -52,27 +52,26 @@ static inline void _devcd_free_sgtable(struct scatterlist *table) #ifdef CONFIG_DEV_COREDUMP -void dev_coredumpv(struct device *dev, void *data, size_t datalen, - gfp_t gfp); +void dev_coredumpv(struct device *dev, void *data, size_t datalen); void dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, gfp_t gfp, + void *data, size_t datalen, ssize_t (*read)(char *buffer, loff_t offset, size_t count, void *data, size_t datalen), void (*free)(void *data)); void dev_coredumpsg(struct device *dev, struct scatterlist *table, - size_t datalen, gfp_t gfp); + size_t datalen); #else static inline void dev_coredumpv(struct device *dev, void *data, - size_t datalen, gfp_t gfp) + size_t datalen) { vfree(data); } static inline void dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, gfp_t gfp, + void *data, size_t datalen, ssize_t (*read)(char *buffer, loff_t offset, size_t count, void *data, size_t datalen), void (*free)(void *data)) @@ -81,7 +80,7 @@ dev_coredumpm(struct device *dev, struct module *owner, } static inline void dev_coredumpsg(struct device *dev, struct scatterlist *table, - size_t datalen, gfp_t gfp) + size_t datalen) { _devcd_free_sgtable(table); } -- cgit v1.2.3 From e386b6725798eec07facedf4d4bb710c079fd25c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 Jun 2022 17:30:01 -0700 Subject: rcu-tasks: Eliminate RCU Tasks Trace IPIs to online CPUs Currently, the RCU Tasks Trace grace-period kthread IPIs each online CPU using smp_call_function_single() in order to track any tasks currently in RCU Tasks Trace read-side critical sections during which the corresponding task has neither blocked nor been preempted. These IPIs are annoying and are also not strictly necessary because any task that blocks or is preempted within its current RCU Tasks Trace read-side critical section will be tracked on one of the per-CPU rcu_tasks_percpu structure's ->rtp_blkd_tasks list. So the only time that this is a problem is if one of the CPUs runs through a long-duration RCU Tasks Trace read-side critical section without a context switch. Note that the task_call_func() function cannot help here because there is no safe way to identify the target task. Of course, the task_call_func() function will be very useful later, when processing the list of tasks, but it needs to know the task. This commit therefore creates a cpu_curr_snapshot() function that returns a pointer the task_struct structure of some task that happened to be running on the specified CPU more or less during the time that the cpu_curr_snapshot() function was executing. If there was no context switch during this time, this function will return a pointer to the task_struct structure of the task that was running throughout. If there was a context switch, then the outgoing task will be taken care of by RCU's context-switch hook, and the incoming task was either already taken care during some previous context switch, or it is not currently within an RCU Tasks Trace read-side critical section. And in this latter case, the grace period already started, so there is no need to wait on this task. This new cpu_curr_snapshot() function is invoked on each CPU early in the RCU Tasks Trace grace-period processing, and the resulting tasks are queued for later quiescent-state inspection. Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Neeraj Upadhyay Cc: Eric Dumazet Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Martin KaFai Lau Cc: KP Singh --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b88caf54e168..72242bc73d85 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2224,6 +2224,7 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) extern bool sched_task_on_rq(struct task_struct *p); extern unsigned long get_wchan(struct task_struct *p); +extern struct task_struct *cpu_curr_snapshot(int cpu); /* * In order to reduce various lock holder preemption latencies provide an -- cgit v1.2.3 From 0ffc781a19ed3030c792ad1a0e44e6e047bb9adc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:20 +0200 Subject: context_tracking: Rename __context_tracking_enter/exit() to __ct_user_enter/exit() The context tracking namespace is going to expand and some new functions will require even longer names. Start shrinking the context_tracking prefix to "ct" as is already the case for some existing macros, this will make the introduction of new functions easier. Acked-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/context_tracking.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 7a14807c9d1a..773035124bad 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -14,8 +14,8 @@ extern void context_tracking_cpu_set(int cpu); /* Called with interrupts disabled. */ -extern void __context_tracking_enter(enum ctx_state state); -extern void __context_tracking_exit(enum ctx_state state); +extern void __ct_user_enter(enum ctx_state state); +extern void __ct_user_exit(enum ctx_state state); extern void context_tracking_enter(enum ctx_state state); extern void context_tracking_exit(enum ctx_state state); @@ -38,13 +38,13 @@ static inline void user_exit(void) static __always_inline void user_enter_irqoff(void) { if (context_tracking_enabled()) - __context_tracking_enter(CONTEXT_USER); + __ct_user_enter(CONTEXT_USER); } static __always_inline void user_exit_irqoff(void) { if (context_tracking_enabled()) - __context_tracking_exit(CONTEXT_USER); + __ct_user_exit(CONTEXT_USER); } static inline enum ctx_state exception_enter(void) @@ -74,7 +74,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) static __always_inline bool context_tracking_guest_enter(void) { if (context_tracking_enabled()) - __context_tracking_enter(CONTEXT_GUEST); + __ct_user_enter(CONTEXT_GUEST); return context_tracking_enabled_this_cpu(); } @@ -82,7 +82,7 @@ static __always_inline bool context_tracking_guest_enter(void) static __always_inline void context_tracking_guest_exit(void) { if (context_tracking_enabled()) - __context_tracking_exit(CONTEXT_GUEST); + __ct_user_exit(CONTEXT_GUEST); } /** -- cgit v1.2.3 From e244a46a529a9a4c43ae3a2b4bf613e260ec8f81 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Tue, 14 Jun 2022 21:41:17 +0200 Subject: platform/surface: aggregator: Reserve more event- and target-categories With the introduction of the Surface Laptop Studio, more event- and target categories have been added. Therefore, increase the number of reserved events and extend the enum of know target categories to accommodate this. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20220614194117.4118897-1-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/surface_aggregator/serial_hub.h | 75 ++++++++++++++------------- 1 file changed, 40 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/serial_hub.h b/include/linux/surface_aggregator/serial_hub.h index 26b95ec12733..45501b6e54e8 100644 --- a/include/linux/surface_aggregator/serial_hub.h +++ b/include/linux/surface_aggregator/serial_hub.h @@ -201,7 +201,7 @@ static inline u16 ssh_crc(const u8 *buf, size_t len) * exception of zero, which is not an event ID. Thus, this is also the * absolute maximum number of event handlers that can be registered. */ -#define SSH_NUM_EVENTS 34 +#define SSH_NUM_EVENTS 38 /* * SSH_NUM_TARGETS - The number of communication targets used in the protocol. @@ -292,40 +292,45 @@ struct ssam_span { * Windows driver. */ enum ssam_ssh_tc { - /* Category 0x00 is invalid for EC use. */ - SSAM_SSH_TC_SAM = 0x01, /* Generic system functionality, real-time clock. */ - SSAM_SSH_TC_BAT = 0x02, /* Battery/power subsystem. */ - SSAM_SSH_TC_TMP = 0x03, /* Thermal subsystem. */ - SSAM_SSH_TC_PMC = 0x04, - SSAM_SSH_TC_FAN = 0x05, - SSAM_SSH_TC_PoM = 0x06, - SSAM_SSH_TC_DBG = 0x07, - SSAM_SSH_TC_KBD = 0x08, /* Legacy keyboard (Laptop 1/2). */ - SSAM_SSH_TC_FWU = 0x09, - SSAM_SSH_TC_UNI = 0x0a, - SSAM_SSH_TC_LPC = 0x0b, - SSAM_SSH_TC_TCL = 0x0c, - SSAM_SSH_TC_SFL = 0x0d, - SSAM_SSH_TC_KIP = 0x0e, /* Manages detachable peripherals (Pro X/8 keyboard cover) */ - SSAM_SSH_TC_EXT = 0x0f, - SSAM_SSH_TC_BLD = 0x10, - SSAM_SSH_TC_BAS = 0x11, /* Detachment system (Surface Book 2/3). */ - SSAM_SSH_TC_SEN = 0x12, - SSAM_SSH_TC_SRQ = 0x13, - SSAM_SSH_TC_MCU = 0x14, - SSAM_SSH_TC_HID = 0x15, /* Generic HID input subsystem. */ - SSAM_SSH_TC_TCH = 0x16, - SSAM_SSH_TC_BKL = 0x17, - SSAM_SSH_TC_TAM = 0x18, - SSAM_SSH_TC_ACC = 0x19, - SSAM_SSH_TC_UFI = 0x1a, - SSAM_SSH_TC_USC = 0x1b, - SSAM_SSH_TC_PEN = 0x1c, - SSAM_SSH_TC_VID = 0x1d, - SSAM_SSH_TC_AUD = 0x1e, - SSAM_SSH_TC_SMC = 0x1f, - SSAM_SSH_TC_KPD = 0x20, - SSAM_SSH_TC_REG = 0x21, /* Extended event registry. */ + /* Category 0x00 is invalid for EC use. */ + SSAM_SSH_TC_SAM = 0x01, /* Generic system functionality, real-time clock. */ + SSAM_SSH_TC_BAT = 0x02, /* Battery/power subsystem. */ + SSAM_SSH_TC_TMP = 0x03, /* Thermal subsystem. */ + SSAM_SSH_TC_PMC = 0x04, + SSAM_SSH_TC_FAN = 0x05, + SSAM_SSH_TC_PoM = 0x06, + SSAM_SSH_TC_DBG = 0x07, + SSAM_SSH_TC_KBD = 0x08, /* Legacy keyboard (Laptop 1/2). */ + SSAM_SSH_TC_FWU = 0x09, + SSAM_SSH_TC_UNI = 0x0a, + SSAM_SSH_TC_LPC = 0x0b, + SSAM_SSH_TC_TCL = 0x0c, + SSAM_SSH_TC_SFL = 0x0d, + SSAM_SSH_TC_KIP = 0x0e, /* Manages detachable peripherals (Pro X/8 keyboard cover) */ + SSAM_SSH_TC_EXT = 0x0f, + SSAM_SSH_TC_BLD = 0x10, + SSAM_SSH_TC_BAS = 0x11, /* Detachment system (Surface Book 2/3). */ + SSAM_SSH_TC_SEN = 0x12, + SSAM_SSH_TC_SRQ = 0x13, + SSAM_SSH_TC_MCU = 0x14, + SSAM_SSH_TC_HID = 0x15, /* Generic HID input subsystem. */ + SSAM_SSH_TC_TCH = 0x16, + SSAM_SSH_TC_BKL = 0x17, + SSAM_SSH_TC_TAM = 0x18, + SSAM_SSH_TC_ACC0 = 0x19, + SSAM_SSH_TC_UFI = 0x1a, + SSAM_SSH_TC_USC = 0x1b, + SSAM_SSH_TC_PEN = 0x1c, + SSAM_SSH_TC_VID = 0x1d, + SSAM_SSH_TC_AUD = 0x1e, + SSAM_SSH_TC_SMC = 0x1f, + SSAM_SSH_TC_KPD = 0x20, + SSAM_SSH_TC_REG = 0x21, /* Extended event registry. */ + SSAM_SSH_TC_SPT = 0x22, + SSAM_SSH_TC_SYS = 0x23, + SSAM_SSH_TC_ACC1 = 0x24, + SSAM_SSH_TC_SHB = 0x25, + SSAM_SSH_TC_POS = 0x26, /* For obtaining Laptop Studio screen position. */ }; -- cgit v1.2.3 From 1a3c7d0841ae24d28a15bbf87ee7d08614cec957 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Sat, 11 Jun 2022 01:25:11 -0700 Subject: swiotlb: remove the unused swiotlb_force declaration The 'swiotlb_force' is removed since commit c6af2aa9ffc9 ("swiotlb: make the swiotlb_init interface more useful"). Signed-off-by: Dongli Zhang Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 7ed35dd3de6e..bdc58a0e20b1 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -60,7 +60,6 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs); #ifdef CONFIG_SWIOTLB -extern enum swiotlb_force swiotlb_force; /** * struct io_tlb_mem - IO TLB Memory Pool Descriptor -- cgit v1.2.3 From 0829c35dc5346e90f428de61896362b51ab58296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 21 May 2022 13:02:03 +0200 Subject: pwm: Drop support for legacy drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no drivers left providing the legacy callbacks. So drop support for these. If this commit breaks your out-of-tree pwm driver, look at e.g. commit ec00cd5e63f0 ("pwm: renesas-tpu: Implement .apply() callback") for an example of the needed conversion for your driver. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/pwm.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 9771a0761a40..76463b71ad4e 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -261,10 +261,6 @@ pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle, * called once per PWM device when the PWM chip is * registered. * @owner: helps prevent removal of modules exporting active PWMs - * @config: configure duty cycles and period length for this PWM - * @set_polarity: configure the polarity of this PWM - * @enable: enable PWM output toggling - * @disable: disable PWM output toggling */ struct pwm_ops { int (*request)(struct pwm_chip *chip, struct pwm_device *pwm); @@ -276,14 +272,6 @@ struct pwm_ops { void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); struct module *owner; - - /* Only used by legacy drivers */ - int (*config)(struct pwm_chip *chip, struct pwm_device *pwm, - int duty_ns, int period_ns); - int (*set_polarity)(struct pwm_chip *chip, struct pwm_device *pwm, - enum pwm_polarity polarity); - int (*enable)(struct pwm_chip *chip, struct pwm_device *pwm); - void (*disable)(struct pwm_chip *chip, struct pwm_device *pwm); }; /** -- cgit v1.2.3 From ef2e35d90890932ee4546231c775326ffd7a3909 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 23 May 2022 19:45:00 +0200 Subject: pwm: Reorder header file to get rid of struct pwm_capture forward declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no cyclic dependency, so by reordering the forward declaration can be dropped. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/pwm.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 76463b71ad4e..6d6946dec771 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -6,7 +6,6 @@ #include #include -struct pwm_capture; struct seq_file; struct pwm_chip; @@ -251,6 +250,16 @@ pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle, return 0; } +/** + * struct pwm_capture - PWM capture data + * @period: period of the PWM signal (in nanoseconds) + * @duty_cycle: duty cycle of the PWM signal (in nanoseconds) + */ +struct pwm_capture { + unsigned int period; + unsigned int duty_cycle; +}; + /** * struct pwm_ops - PWM controller operations * @request: optional hook for requesting a PWM @@ -300,16 +309,6 @@ struct pwm_chip { struct pwm_device *pwms; }; -/** - * struct pwm_capture - PWM capture data - * @period: period of the PWM signal (in nanoseconds) - * @duty_cycle: duty cycle of the PWM signal (in nanoseconds) - */ -struct pwm_capture { - unsigned int period; - unsigned int duty_cycle; -}; - #if IS_ENABLED(CONFIG_PWM) /* PWM user APIs */ struct pwm_device *pwm_request(int pwm_id, const char *label); -- cgit v1.2.3 From 5c8dca97404bf23f6531456ae4d14c862f6871a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 23 May 2022 19:45:01 +0200 Subject: pwm: Drop unused forward declaration from pwm.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The declaration was necessary until commit cc2d22477779 ("pwm: Drop per-chip dbg_show callback"). Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/pwm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 6d6946dec771..9429930c5566 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -6,8 +6,6 @@ #include #include -struct seq_file; - struct pwm_chip; /** -- cgit v1.2.3 From 62c0aff64c8d6594357b6217db019552ea664b90 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Jun 2022 20:11:47 +0300 Subject: clk: Remove never used devm_clk_*unregister() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the entire history of the devm_clk_*unregister() existence they were used only once (*) in 2015. Remove them. *) The commit 264e3b75de4e ("clk: s2mps11: Simplify s2mps11_clk_probe unwind paths") exactly supports the point of the change proposed here. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220622171147.85603-1-andriy.shevchenko@linux.intel.com Acked-by: Uwe Kleine-König Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index c10dc4c659e2..72d937c03a3e 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1176,10 +1176,8 @@ int __must_check devm_clk_hw_register(struct device *dev, struct clk_hw *hw); int __must_check of_clk_hw_register(struct device_node *node, struct clk_hw *hw); void clk_unregister(struct clk *clk); -void devm_clk_unregister(struct device *dev, struct clk *clk); void clk_hw_unregister(struct clk_hw *hw); -void devm_clk_hw_unregister(struct device *dev, struct clk_hw *hw); /* helper functions */ const char *__clk_get_name(const struct clk *clk); -- cgit v1.2.3 From 203184571388a988283543f0fd7da1a0da7c3f91 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Tue, 24 May 2022 10:21:53 -0500 Subject: dmaengine: dw-edma: Detach the private data and chip info structures "struct dw_edma_chip" contains an internal structure "struct dw_edma" that is used by the eDMA core internally and should not be touched by the eDMA controller drivers themselves. But currently, the eDMA controller drivers like "dw-edma-pci" allocate and populate this internal structure before passing it on to the eDMA core. The eDMA core further populates the structure and uses it. This is wrong! Hence, move all the "struct dw_edma" specifics from controller drivers to the eDMA core. Link: https://lore.kernel.org/r/20220524152159.2370739-3-Frank.Li@nxp.com Tested-by: Serge Semin Tested-by: Manivannan Sadhasivam Signed-off-by: Frank Li Signed-off-by: Bjorn Helgaas Reviewed-by: Serge Semin Reviewed-by: Manivannan Sadhasivam Acked-By: Vinod Koul --- include/linux/dma/edma.h | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index d4333e721588..6f64e90d5c38 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -12,17 +12,63 @@ #include #include +#define EDMA_MAX_WR_CH 8 +#define EDMA_MAX_RD_CH 8 + struct dw_edma; +struct dw_edma_region { + phys_addr_t paddr; + void __iomem *vaddr; + size_t sz; +}; + +struct dw_edma_core_ops { + int (*irq_vector)(struct device *dev, unsigned int nr); +}; + +enum dw_edma_map_format { + EDMA_MF_EDMA_LEGACY = 0x0, + EDMA_MF_EDMA_UNROLL = 0x1, + EDMA_MF_HDMA_COMPAT = 0x5 +}; + /** * struct dw_edma_chip - representation of DesignWare eDMA controller hardware * @dev: struct device of the eDMA controller * @id: instance ID - * @dw: struct dw_edma that is filed by dw_edma_probe() + * @nr_irqs: total number of DMA IRQs + * @ops DMA channel to IRQ number mapping + * @wr_ch_cnt DMA write channel number + * @rd_ch_cnt DMA read channel number + * @rg_region DMA register region + * @ll_region_wr DMA descriptor link list memory for write channel + * @ll_region_rd DMA descriptor link list memory for read channel + * @dt_region_wr DMA data memory for write channel + * @dt_region_rd DMA data memory for read channel + * @mf DMA register map format + * @dw: struct dw_edma that is filled by dw_edma_probe() */ struct dw_edma_chip { struct device *dev; int id; + int nr_irqs; + const struct dw_edma_core_ops *ops; + + struct dw_edma_region rg_region; + + u16 wr_ch_cnt; + u16 rd_ch_cnt; + /* link list address */ + struct dw_edma_region ll_region_wr[EDMA_MAX_WR_CH]; + struct dw_edma_region ll_region_rd[EDMA_MAX_RD_CH]; + + /* data region */ + struct dw_edma_region dt_region_wr[EDMA_MAX_WR_CH]; + struct dw_edma_region dt_region_rd[EDMA_MAX_RD_CH]; + + enum dw_edma_map_format mf; + struct dw_edma *dw; }; -- cgit v1.2.3 From e51b3048116a6e10b96bd5298cbcb209b6d729cd Mon Sep 17 00:00:00 2001 From: Frank Li Date: Tue, 24 May 2022 10:21:54 -0500 Subject: dmaengine: dw-edma: Change rg_region to reg_base in struct dw_edma_chip struct dw_edma_region rg_region included virtual address, physical address and size information. But only the virtual address is used by EDMA driver. Change it to void __iomem *reg_base to clean up code. Link: https://lore.kernel.org/r/20220524152159.2370739-4-Frank.Li@nxp.com Tested-by: Serge Semin Tested-by: Manivannan Sadhasivam Signed-off-by: Frank Li Signed-off-by: Bjorn Helgaas Reviewed-by: Serge Semin Reviewed-by: Manivannan Sadhasivam Acked-By: Vinod Koul --- include/linux/dma/edma.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index 6f64e90d5c38..df9ba3ecb437 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -39,6 +39,7 @@ enum dw_edma_map_format { * @id: instance ID * @nr_irqs: total number of DMA IRQs * @ops DMA channel to IRQ number mapping + * @reg_base DMA register base address * @wr_ch_cnt DMA write channel number * @rd_ch_cnt DMA read channel number * @rg_region DMA register region @@ -55,7 +56,7 @@ struct dw_edma_chip { int nr_irqs; const struct dw_edma_core_ops *ops; - struct dw_edma_region rg_region; + void __iomem *reg_base; u16 wr_ch_cnt; u16 rd_ch_cnt; -- cgit v1.2.3 From 6951ee96c649f6e963b98c11b2b1a92697d3c45c Mon Sep 17 00:00:00 2001 From: Frank Li Date: Tue, 24 May 2022 10:21:55 -0500 Subject: dmaengine: dw-edma: Rename wr(rd)_ch_cnt to ll_wr(rd)_cnt in struct dw_edma_chip The struct dw_edma contains wr(rd)_ch_cnt fields. The EDMA driver gets write(read) channel number from register, then saves these into dw_edma. The wr(rd)_ch_cnt in dw_edma_chip actually means how many link list memory are available in ll_region_wr(rd)[EDMA_MAX_WR_CH]. Rename it to ll_wr(rd)_cnt to indicate actual usage. Link: https://lore.kernel.org/r/20220524152159.2370739-5-Frank.Li@nxp.com Tested-by: Serge Semin Tested-by: Manivannan Sadhasivam Signed-off-by: Frank Li Signed-off-by: Bjorn Helgaas Reviewed-by: Serge Semin Reviewed-by: Manivannan Sadhasivam Acked-By: Vinod Koul --- include/linux/dma/edma.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index df9ba3ecb437..fdbbeda170a9 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -40,8 +40,8 @@ enum dw_edma_map_format { * @nr_irqs: total number of DMA IRQs * @ops DMA channel to IRQ number mapping * @reg_base DMA register base address - * @wr_ch_cnt DMA write channel number - * @rd_ch_cnt DMA read channel number + * @ll_wr_cnt DMA write link list count + * @ll_rd_cnt DMA read link list count * @rg_region DMA register region * @ll_region_wr DMA descriptor link list memory for write channel * @ll_region_rd DMA descriptor link list memory for read channel @@ -58,8 +58,8 @@ struct dw_edma_chip { void __iomem *reg_base; - u16 wr_ch_cnt; - u16 rd_ch_cnt; + u16 ll_wr_cnt; + u16 ll_rd_cnt; /* link list address */ struct dw_edma_region ll_region_wr[EDMA_MAX_WR_CH]; struct dw_edma_region ll_region_rd[EDMA_MAX_RD_CH]; -- cgit v1.2.3 From d6b03171f9fc8127b3a7adfd4e74ee5d4dae5d14 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Tue, 24 May 2022 10:21:58 -0500 Subject: dmaengine: dw-edma: Add support for chip-specific flags Add a "flags" field to the "struct dw_edma_chip" so that the controller drivers can pass flags that are relevant to the platform. DW_EDMA_CHIP_LOCAL - Used by the controller drivers accessing eDMA locally. Local eDMA access doesn't require generating MSIs to the remote. Link: https://lore.kernel.org/r/20220524152159.2370739-8-Frank.Li@nxp.com Tested-by: Serge Semin Tested-by: Manivannan Sadhasivam Signed-off-by: Frank Li Signed-off-by: Bjorn Helgaas Reviewed-by: Serge Semin Reviewed-by: Manivannan Sadhasivam Acked-By: Vinod Koul --- include/linux/dma/edma.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index fdbbeda170a9..7d8062e9c544 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -33,12 +33,21 @@ enum dw_edma_map_format { EDMA_MF_HDMA_COMPAT = 0x5 }; +/** + * enum dw_edma_chip_flags - Flags specific to an eDMA chip + * @DW_EDMA_CHIP_LOCAL: eDMA is used locally by an endpoint + */ +enum dw_edma_chip_flags { + DW_EDMA_CHIP_LOCAL = BIT(0), +}; + /** * struct dw_edma_chip - representation of DesignWare eDMA controller hardware * @dev: struct device of the eDMA controller * @id: instance ID * @nr_irqs: total number of DMA IRQs * @ops DMA channel to IRQ number mapping + * @flags dw_edma_chip_flags * @reg_base DMA register base address * @ll_wr_cnt DMA write link list count * @ll_rd_cnt DMA read link list count @@ -55,6 +64,7 @@ struct dw_edma_chip { int id; int nr_irqs; const struct dw_edma_core_ops *ops; + u32 flags; void __iomem *reg_base; -- cgit v1.2.3 From 3cc624beba6310a8a534fb00841f22445a200d54 Mon Sep 17 00:00:00 2001 From: Ivan Bornyakov Date: Thu, 23 Jun 2022 19:32:44 +0300 Subject: fpga: fpga-mgr: support bitstream offset in image buffer At the moment FPGA manager core loads to the device entire image provided to fpga_mgr_load(). But it is not always whole FPGA image buffer meant to be written to the device. In particular, .dat formatted image for Microchip MPF contains meta info in the header that is not meant to be written to the device. This is issue for those low level drivers that loads data to the device with write() fpga_manager_ops callback, since write() can be called in iterator over scatter-gather table, not only linear image buffer. On the other hand, write_sg() callback is provided with whole image in scatter-gather form and can decide itself which part should be sent to the device. Add header_size and data_size to the fpga_image_info struct, add skip_header to the fpga_manager_ops struct and adjust fpga_mgr_write() callers with respect to them. * info->header_size indicates part at the beginning of image buffer that contains some meta info. It is optional and can be 0, initialized with mops->initial_header_size. * mops->skip_header tells fpga-mgr core whether write should start from the beginning of image buffer or at the offset of header_size. * info->data_size is the size of bitstream data that is meant to be written to the device. It is also optional and can be 0, which means bitstream data is up to the end of image buffer. Also add parse_header() callback to fpga_manager_ops, which purpose is to set info->header_size and info->data_size. At least initial_header_size bytes of image buffer will be passed into parse_header() first time. If it is not enough, parse_header() should set desired size into info->header_size and return -EAGAIN, then it will be called again with greater part of image buffer on the input. Suggested-by: Xu Yilun Signed-off-by: Ivan Bornyakov Acked-by: Xu Yilun Link: https://lore.kernel.org/r/20220623163248.3672-2-i.bornyakov@metrotek.ru Signed-off-by: Xu Yilun --- include/linux/fpga/fpga-mgr.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index 0f9468771bb9..54f63459efd6 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -22,6 +22,8 @@ struct sg_table; * @FPGA_MGR_STATE_RESET: FPGA in reset state * @FPGA_MGR_STATE_FIRMWARE_REQ: firmware request in progress * @FPGA_MGR_STATE_FIRMWARE_REQ_ERR: firmware request failed + * @FPGA_MGR_STATE_PARSE_HEADER: parse FPGA image header + * @FPGA_MGR_STATE_PARSE_HEADER_ERR: Error during PARSE_HEADER stage * @FPGA_MGR_STATE_WRITE_INIT: preparing FPGA for programming * @FPGA_MGR_STATE_WRITE_INIT_ERR: Error during WRITE_INIT stage * @FPGA_MGR_STATE_WRITE: writing image to FPGA @@ -41,7 +43,9 @@ enum fpga_mgr_states { FPGA_MGR_STATE_FIRMWARE_REQ, FPGA_MGR_STATE_FIRMWARE_REQ_ERR, - /* write sequence: init, write, complete */ + /* write sequence: parse header, init, write, complete */ + FPGA_MGR_STATE_PARSE_HEADER, + FPGA_MGR_STATE_PARSE_HEADER_ERR, FPGA_MGR_STATE_WRITE_INIT, FPGA_MGR_STATE_WRITE_INIT_ERR, FPGA_MGR_STATE_WRITE, @@ -85,6 +89,9 @@ enum fpga_mgr_states { * @sgt: scatter/gather table containing FPGA image * @buf: contiguous buffer containing FPGA image * @count: size of buf + * @header_size: size of image header. + * @data_size: size of image data to be sent to the device. If not specified, + * whole image will be used. Header may be skipped in either case. * @region_id: id of target region * @dev: device that owns this * @overlay: Device Tree overlay @@ -98,6 +105,8 @@ struct fpga_image_info { struct sg_table *sgt; const char *buf; size_t count; + size_t header_size; + size_t data_size; int region_id; struct device *dev; #ifdef CONFIG_OF @@ -137,9 +146,16 @@ struct fpga_manager_info { /** * struct fpga_manager_ops - ops for low level fpga manager drivers - * @initial_header_size: Maximum number of bytes that should be passed into write_init + * @initial_header_size: minimum number of bytes that should be passed into + * parse_header and write_init. + * @skip_header: bool flag to tell fpga-mgr core whether it should skip + * info->header_size part at the beginning of the image when invoking + * write callback. * @state: returns an enum value of the FPGA's state * @status: returns status of the FPGA, including reconfiguration error code + * @parse_header: parse FPGA image header to set info->header_size and + * info->data_size. In case the input buffer is not large enough, set + * required size to info->header_size and return -EAGAIN. * @write_init: prepare the FPGA to receive configuration data * @write: write count bytes of configuration data to the FPGA * @write_sg: write the scatter list of configuration data to the FPGA @@ -153,8 +169,12 @@ struct fpga_manager_info { */ struct fpga_manager_ops { size_t initial_header_size; + bool skip_header; enum fpga_mgr_states (*state)(struct fpga_manager *mgr); u64 (*status)(struct fpga_manager *mgr); + int (*parse_header)(struct fpga_manager *mgr, + struct fpga_image_info *info, + const char *buf, size_t count); int (*write_init)(struct fpga_manager *mgr, struct fpga_image_info *info, const char *buf, size_t count); -- cgit v1.2.3 From fdfd42892f311e2b3695852036e5be23661dc590 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 15 Jun 2022 17:41:41 +0200 Subject: jump_label: mips: move module NOP patching into arch code MIPS is the only remaining architecture that needs to patch jump label NOP encodings to initialize them at load time. So let's move the module patching part of that from generic code into arch/mips, and drop it from the others. Signed-off-by: Ard Biesheuvel Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220615154142.1574619-3-ardb@kernel.org --- include/linux/jump_label.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index bf1eef337a07..2003a0935478 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -230,12 +230,12 @@ extern void static_key_slow_inc(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); extern void static_key_slow_inc_cpuslocked(struct static_key *key); extern void static_key_slow_dec_cpuslocked(struct static_key *key); -extern void jump_label_apply_nops(struct module *mod); extern int static_key_count(struct static_key *key); extern void static_key_enable(struct static_key *key); extern void static_key_disable(struct static_key *key); extern void static_key_enable_cpuslocked(struct static_key *key); extern void static_key_disable_cpuslocked(struct static_key *key); +extern enum jump_label_type jump_label_init_type(struct jump_entry *entry); /* * We should be using ATOMIC_INIT() for initializing .enabled, but @@ -303,11 +303,6 @@ static inline int jump_label_text_reserved(void *start, void *end) static inline void jump_label_lock(void) {} static inline void jump_label_unlock(void) {} -static inline int jump_label_apply_nops(struct module *mod) -{ - return 0; -} - static inline void static_key_enable(struct static_key *key) { STATIC_KEY_CHECK_USE(key); -- cgit v1.2.3 From 7e6b9db27de9f69a705c1a046d45882c768e16c3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 15 Jun 2022 17:41:42 +0200 Subject: jump_label: make initial NOP patching the special case Instead of defaulting to patching NOP opcodes at init time, and leaving it to the architectures to override this if this is not needed, switch to a model where doing nothing is the default. This is the common case by far, as only MIPS requires NOP patching at init time. On all other architectures, the correct encodings are emitted by the compiler and so no initial patching is needed. Signed-off-by: Ard Biesheuvel Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220615154142.1574619-4-ardb@kernel.org --- include/linux/jump_label.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 2003a0935478..570831ca9951 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -220,8 +220,6 @@ extern void jump_label_lock(void); extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type); -extern void arch_jump_label_transform_static(struct jump_entry *entry, - enum jump_label_type type); extern bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_transform_apply(void); -- cgit v1.2.3 From eae6d58d67d9739be5f7ae2dbead1d0ef6528243 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Jun 2022 15:26:06 +0200 Subject: locking/lockdep: Fix lockdep_init_map_*() confusion Commit dfd5e3f5fe27 ("locking/lockdep: Mark local_lock_t") added yet another lockdep_init_map_*() variant, but forgot to update all the existing users of the most complicated version. This could lead to a loss of lock_type and hence an incorrect report. Given the relative rarity of both local_lock and these annotations, this is unlikely to happen in practise, still, best fix things. Fixes: dfd5e3f5fe27 ("locking/lockdep: Mark local_lock_t") Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/YqyEDtoan20K0CVD@worktop.programming.kicks-ass.net --- include/linux/lockdep.h | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b6829b970093..1f1099dac3f0 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -188,7 +188,7 @@ static inline void lockdep_init_map_waits(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass, u8 inner, u8 outer) { - lockdep_init_map_type(lock, name, key, subclass, inner, LD_WAIT_INV, LD_LOCK_NORMAL); + lockdep_init_map_type(lock, name, key, subclass, inner, outer, LD_LOCK_NORMAL); } static inline void @@ -211,24 +211,28 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name, * or they are too narrow (they suffer from a false class-split): */ #define lockdep_set_class(lock, key) \ - lockdep_init_map_waits(&(lock)->dep_map, #key, key, 0, \ - (lock)->dep_map.wait_type_inner, \ - (lock)->dep_map.wait_type_outer) + lockdep_init_map_type(&(lock)->dep_map, #key, key, 0, \ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer, \ + (lock)->dep_map.lock_type) #define lockdep_set_class_and_name(lock, key, name) \ - lockdep_init_map_waits(&(lock)->dep_map, name, key, 0, \ - (lock)->dep_map.wait_type_inner, \ - (lock)->dep_map.wait_type_outer) + lockdep_init_map_type(&(lock)->dep_map, name, key, 0, \ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer, \ + (lock)->dep_map.lock_type) #define lockdep_set_class_and_subclass(lock, key, sub) \ - lockdep_init_map_waits(&(lock)->dep_map, #key, key, sub,\ - (lock)->dep_map.wait_type_inner, \ - (lock)->dep_map.wait_type_outer) + lockdep_init_map_type(&(lock)->dep_map, #key, key, sub, \ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer, \ + (lock)->dep_map.lock_type) #define lockdep_set_subclass(lock, sub) \ - lockdep_init_map_waits(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ - (lock)->dep_map.wait_type_inner, \ - (lock)->dep_map.wait_type_outer) + lockdep_init_map_type(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer, \ + (lock)->dep_map.lock_type) #define lockdep_set_novalidate_class(lock) \ lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock) -- cgit v1.2.3 From 837f66c71207542283831d0762c5dca3db5b397a Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 22 Jun 2022 15:27:08 -0400 Subject: KVM: Allow for different capacities in kvm_mmu_memory_cache structs Allow the capacity of the kvm_mmu_memory_cache struct to be chosen at declaration time rather than being fixed for all declarations. This will be used in a follow-up commit to declare an cache in x86 with a capacity of 512+ objects without having to increase the capacity of all caches in KVM. This change requires each cache now specify its capacity at runtime, since the cache struct itself no longer has a fixed capacity known at compile time. To protect against someone accidentally defining a kvm_mmu_memory_cache struct directly (without the extra storage), this commit includes a WARN_ON() in kvm_mmu_topup_memory_cache(). In order to support different capacities, this commit changes the objects pointer array to be dynamically allocated the first time the cache is topped-up. While here, opportunistically clean up the stack-allocated kvm_mmu_memory_cache structs in riscv and arm64 to use designated initializers. No functional change intended. Reviewed-by: Marc Zyngier Signed-off-by: David Matlack Message-Id: <20220516232138.1783324-22-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + include/linux/kvm_types.h | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a2bbdf3ab086..3554e48406e4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1356,6 +1356,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm); #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); +int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min); int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc); void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index f328a01db4fe..4d933518060f 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -85,12 +85,16 @@ struct gfn_to_pfn_cache { * MMU flows is problematic, as is triggering reclaim, I/O, etc... while * holding MMU locks. Note, these caches act more like prefetch buffers than * classical caches, i.e. objects are not returned to the cache on being freed. + * + * The @capacity field and @objects array are lazily initialized when the cache + * is topped up (__kvm_mmu_topup_memory_cache()). */ struct kvm_mmu_memory_cache { int nobjs; gfp_t gfp_zero; struct kmem_cache *kmem_cache; - void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE]; + int capacity; + void **objects; }; #endif -- cgit v1.2.3 From ebc3197963fc42841b183d0280bd3f9f85f13c30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:32 +0000 Subject: ipmr: add rcu protection over (struct vif_device)->dev We will soon use RCU instead of rwlock in ipmr & ip6mr This preliminary patch adds proper rcu verbs to read/write (struct vif_device)->dev Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index e05ee9f001ff..10d1e4fb4e9f 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -26,7 +26,7 @@ * @remote: Remote address for tunnels */ struct vif_device { - struct net_device *dev; + struct net_device __rcu *dev; netdevice_tracker dev_tracker; unsigned long bytes_in, bytes_out; unsigned long pkt_in, pkt_out; @@ -52,6 +52,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb, unsigned short family, enum fib_event_type event_type, struct vif_device *vif, + struct net_device *vif_dev, unsigned short vif_index, u32 tb_id, struct netlink_ext_ack *extack) { @@ -60,7 +61,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb, .family = family, .extack = extack, }, - .dev = vif->dev, + .dev = vif_dev, .vif_index = vif_index, .vif_flags = vif->flags, .tb_id = tb_id, @@ -73,6 +74,7 @@ static inline int mr_call_vif_notifiers(struct net *net, unsigned short family, enum fib_event_type event_type, struct vif_device *vif, + struct net_device *vif_dev, unsigned short vif_index, u32 tb_id, unsigned int *ipmr_seq) { @@ -80,7 +82,7 @@ static inline int mr_call_vif_notifiers(struct net *net, .info = { .family = family, }, - .dev = vif->dev, + .dev = vif_dev, .vif_index = vif_index, .vif_flags = vif->flags, .tb_id = tb_id, @@ -98,7 +100,8 @@ static inline int mr_call_vif_notifiers(struct net *net, #define MAXVIFS 32 #endif -#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev)) +/* Note: This helper is deprecated. */ +#define VIF_EXISTS(_mrt, _idx) (!!rcu_access_pointer((_mrt)->vif_table[_idx].dev)) /* mfc_flags: * MFC_STATIC - the entry was added statically (not by a routing daemon) -- cgit v1.2.3 From 194366b28b8306b7a24596c57c09635ab2891252 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:46 +0000 Subject: ipmr: adopt rcu_read_lock() in mr_dump() We no longer need to acquire mrt_lock() in mr_dump, using rcu_read_lock() is enough. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 10d1e4fb4e9f..9dd4bf157255 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -308,7 +308,7 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock, struct netlink_ext_ack *extack); + struct netlink_ext_ack *extack); #else static inline void vif_device_init(struct vif_device *v, struct net_device *dev, @@ -363,7 +363,7 @@ static inline int mr_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock, struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack) { return -EINVAL; } -- cgit v1.2.3 From ba1afa676d0babf99e99f5415db43fdd7ecef104 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 24 Jun 2022 17:31:47 +0800 Subject: lib: bitmap: fix the duplicated comments on bitmap_to_arr64() Thanks to the recent commit 0a97953fd221 ("lib: add bitmap_{from,to}_arr64") now we can directly convert a U64 value into a bitmap and vice verse. However when checking the header there is duplicated helper for bitmap_to_arr64(), but no bitmap_from_arr64(). Just fix the copy-n-paste error. Signed-off-by: Qu Wenruo Signed-off-by: Yury Norov --- include/linux/bitmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 2e6cd5681040..f091a1664bf1 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -71,9 +71,9 @@ struct device; * bitmap_release_region(bitmap, pos, order) Free specified bit region * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region * bitmap_from_arr32(dst, buf, nbits) Copy nbits from u32[] buf to dst + * bitmap_from_arr64(dst, buf, nbits) Copy nbits from u64[] buf to dst * bitmap_to_arr32(buf, src, nbits) Copy nbits from buf to u32[] dst * bitmap_to_arr64(buf, src, nbits) Copy nbits from buf to u64[] dst - * bitmap_to_arr64(buf, src, nbits) Copy nbits from buf to u64[] dst * bitmap_get_value8(map, start) Get 8bit value from map at start * bitmap_set_value8(map, value, start) Set 8bit value to map at start * -- cgit v1.2.3 From e61c451476e61450f6771ce03bbc01210a09be16 Mon Sep 17 00:00:00 2001 From: Mark-PK Tsai Date: Fri, 22 Apr 2022 14:24:35 +0800 Subject: dma-mapping: Add dma_release_coherent_memory to DMA API Add dma_release_coherent_memory to DMA API to allow dma user call it to release dev->dma_mem when the device is removed. Signed-off-by: Mark-PK Tsai Acked-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220422062436.14384-2-mark-pk.tsai@mediatek.com Signed-off-by: Mathieu Poirier --- include/linux/dma-map-ops.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 0d5b06b3a4a6..53db9655efe9 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -166,6 +166,7 @@ static inline void dma_pernuma_cma_reserve(void) { } #ifdef CONFIG_DMA_DECLARE_COHERENT int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size); +void dma_release_coherent_memory(struct device *dev); int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle, void **ret); int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr); @@ -177,6 +178,8 @@ static inline int dma_declare_coherent_memory(struct device *dev, { return -ENOSYS; } + +#define dma_release_coherent_memory(dev) (0) #define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0) #define dma_release_from_dev_coherent(dev, order, vaddr) (0) #define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0) -- cgit v1.2.3 From e36de87d34a7f2f26b2e2129c6dc18a0024663eb Mon Sep 17 00:00:00 2001 From: Vineeth Pillai Date: Mon, 23 May 2022 15:03:27 -0400 Subject: KVM: debugfs: expose pid of vcpu threads Add a new debugfs file to expose the pid of each vcpu threads. This is very helpful for userland tools to get the vcpu pids without worrying about thread naming conventions of the VMM. Signed-off-by: Vineeth Pillai (Google) Message-Id: <20220523190327.2658-1-vineeth@bitbyteword.org> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3554e48406e4..3b40f8d68fbb 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1435,6 +1435,8 @@ int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state); #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry); +#else +static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif int kvm_arch_hardware_enable(void); -- cgit v1.2.3 From 8ca869b24538a7b5501af368e87e4a59b0c04117 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 22 Jun 2022 09:31:31 +0200 Subject: pstore: Add priv field to pstore_record for backend specific use The EFI pstore backend will need to store per-record variable name data when we switch away from the efivars layer. Add a priv field to struct pstore_record, and document it as holding a backend specific pointer that is assumed to be a kmalloc()d buffer, and will be kfree()d when the entire record is freed. Acked-by: Kees Cook Signed-off-by: Ard Biesheuvel --- include/linux/pstore.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index e97a8188f0fd..638507a3c8ff 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -57,6 +57,9 @@ struct pstore_info; * @size: size of @buf * @ecc_notice_size: * ECC information for @buf + * @priv: pointer for backend specific use, will be + * kfree()d by the pstore core if non-NULL + * when the record is freed. * * Valid for PSTORE_TYPE_DMESG @type: * @@ -74,6 +77,7 @@ struct pstore_record { char *buf; ssize_t size; ssize_t ecc_notice_size; + void *priv; int count; enum kmsg_dump_reason reason; -- cgit v1.2.3 From ec3507b2ca51286de6ecd85fdac8e722219cdef8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Jun 2022 17:04:32 +0200 Subject: efi: vars: Don't drop lock in the middle of efivar_init() Even though the efivars_lock lock is documented as protecting the efivars->ops pointer (among other things), efivar_init() happily releases and reacquires the lock for every EFI variable that it enumerates. This used to be needed because the lock was originally a spinlock, which prevented the callback that is invoked for every variable from being able to sleep. However, releasing the lock could potentially invalidate the ops pointer, but more importantly, it might allow a SetVariable() runtime service call to take place concurrently, and the UEFI spec does not define how this affects an enumeration that is running in parallel using the GetNextVariable() runtime service, which is what efivar_init() uses. In the meantime, the lock has been converted into a semaphore, and the only reason we need to drop the lock is because the efivarfs pseudo filesystem driver will otherwise deadlock when it invokes the efivars API from the callback to create the efivar_entry items and insert them into the linked list. (EFI pstore is affected in a similar way) So let's switch to helpers that can be used while the lock is already taken. This way, we can hold on to the lock throughout the enumeration. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 53f64c14a525..56f04b6daeb0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1064,6 +1064,7 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), void *data, bool duplicates, struct list_head *head); int efivar_entry_add(struct efivar_entry *entry, struct list_head *head); +void __efivar_entry_add(struct efivar_entry *entry, struct list_head *head); int efivar_entry_remove(struct efivar_entry *entry); int __efivar_entry_delete(struct efivar_entry *entry); -- cgit v1.2.3 From 472831d4c4b2d8eac783b256e5c829487d5310df Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Jun 2022 13:17:20 +0200 Subject: efi: vars: Add thin wrapper around EFI get/set variable interface The current efivars layer is a jumble of list iterators, shadow data structures and safe variable manipulation helpers that really belong in the efivarfs pseudo file system once the obsolete sysfs access method to EFI variables is removed. So split off a minimal efivar get/set variable API that reuses the existing efivars_lock semaphore to mediate access to the various runtime services, primarily to ensure that performing a SetVariable() on one CPU while another is calling GetNextVariable() in a loop to enumerate the contents of the EFI variable store does not result in surprises. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 56f04b6daeb0..c828ab6f0e2a 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1099,6 +1099,26 @@ bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, bool efivar_variable_is_removable(efi_guid_t vendor, const char *name, size_t len); +int efivar_lock(void); +int efivar_trylock(void); +void efivar_unlock(void); + +efi_status_t efivar_get_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 *attr, unsigned long *size, void *data); + +efi_status_t efivar_get_next_variable(unsigned long *name_size, + efi_char16_t *name, efi_guid_t *vendor); + +efi_status_t efivar_set_variable_locked(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, + void *data, bool nonblocking); + +efi_status_t efivar_set_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, void *data); + +efi_status_t check_var_size(u32 attributes, unsigned long size); +efi_status_t check_var_size_nonblocking(u32 attributes, unsigned long size); + #if IS_ENABLED(CONFIG_EFI_CAPSULE_LOADER) extern bool efi_capsule_pending(int *reset_type); -- cgit v1.2.3 From 859748255b43460685e93a1f8a40b8cdc3be02f2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Jun 2022 13:21:26 +0200 Subject: efi: pstore: Omit efivars caching EFI varstore access layer Avoid the efivars layer and simply call the newly introduced EFI varstore helpers instead. This simplifies the code substantially, and also allows us to remove some hacks in the shared efivars layer that were added for efi-pstore specifically. In order to be able to delete the EFI variable associated with a record, store the UTF-16 name of the variable in the pstore record's priv field. That way, we don't have to make guesses regarding which variable the record may have been loaded from. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index c828ab6f0e2a..08bc6215e3b4 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1043,8 +1043,6 @@ struct efivar_entry { struct efi_variable var; struct list_head list; struct kobject kobj; - bool scanning; - bool deleting; }; static inline void -- cgit v1.2.3 From 0f5b2c69a4cbe4166ca24b76d5ada98ed2867741 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Jun 2022 13:34:03 +0200 Subject: efi: vars: Remove deprecated 'efivars' sysfs interface Commit 5d9db883761a ("efi: Add support for a UEFI variable filesystem") dated Oct 5, 2012, introduced a new efivarfs pseudo-filesystem to replace the efivars sysfs interface that was used up to that point to expose EFI variables to user space. The main problem with the sysfs interface was that it only supported up to 1024 bytes of payload per file, whereas the underlying variables themselves are only bounded by a platform specific per-variable and global limit that is typically much higher than 1024 bytes. The deprecated sysfs interface is only enabled on x86 and Itanium, other EFI enabled architectures only support the efivarfs pseudo-filesystem. So let's finally rip off the band aid, and drop the old interface entirely. This will make it easier to refactor and clean up the underlying infrastructure that is shared between efivars, efivarfs and efi-pstore, and is long overdue for a makeover. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 93ce85a14a46..10ef0a0d5e9a 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1045,12 +1045,6 @@ struct efivar_entry { struct kobject kobj; }; -static inline void -efivar_unregister(struct efivar_entry *var) -{ - kobject_put(&var->kobj); -} - int efivars_register(struct efivars *efivars, const struct efivar_operations *ops, struct kobject *kobject); @@ -1064,8 +1058,6 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), int efivar_entry_add(struct efivar_entry *entry, struct list_head *head); void __efivar_entry_add(struct efivar_entry *entry, struct list_head *head); void efivar_entry_remove(struct efivar_entry *entry); - -int __efivar_entry_delete(struct efivar_entry *entry); int efivar_entry_delete(struct efivar_entry *entry); int efivar_entry_size(struct efivar_entry *entry, unsigned long *size); @@ -1073,22 +1065,12 @@ int __efivar_entry_get(struct efivar_entry *entry, u32 *attributes, unsigned long *size, void *data); int efivar_entry_get(struct efivar_entry *entry, u32 *attributes, unsigned long *size, void *data); -int efivar_entry_set(struct efivar_entry *entry, u32 attributes, - unsigned long size, void *data, struct list_head *head); int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, unsigned long *size, void *data, bool *set); -int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, - bool block, unsigned long size, void *data); - -int efivar_entry_iter_begin(void); -void efivar_entry_iter_end(void); int efivar_entry_iter(int (*func)(struct efivar_entry *, void *), struct list_head *head, void *data); -struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid, - struct list_head *head, bool remove); - bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, unsigned long data_size); bool efivar_variable_is_removable(efi_guid_t vendor, const char *name, -- cgit v1.2.3 From 3a75f9f2f9ad19bb9a0f566373ae91d8f09db85e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 21 Jun 2022 15:48:29 +0200 Subject: efi: vars: Use locking version to iterate over efivars linked lists Both efivars and efivarfs uses __efivar_entry_iter() to go over the linked list that shadows the list of EFI variables held by the firmware, but fail to call the begin/end helpers that are documented as a prerequisite. So switch to the proper version, which is efivar_entry_iter(). Given that in both cases, efivar_entry_remove() is invoked with the lock held already, don't take the lock there anymore. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 08bc6215e3b4..54ca2d6b6c78 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1063,7 +1063,7 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), int efivar_entry_add(struct efivar_entry *entry, struct list_head *head); void __efivar_entry_add(struct efivar_entry *entry, struct list_head *head); -int efivar_entry_remove(struct efivar_entry *entry); +void efivar_entry_remove(struct efivar_entry *entry); int __efivar_entry_delete(struct efivar_entry *entry); int efivar_entry_delete(struct efivar_entry *entry); -- cgit v1.2.3 From 5ac941367a6f85777ef34ec15d60e17ea8e446d4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 21 Jun 2022 15:54:53 +0200 Subject: efi: vars: Drop __efivar_entry_iter() helper which is no longer used __efivar_entry_iter() uses a list iterator in a dubious way, i.e., it assumes that the iteration variable always points to an object of the appropriate type, even if the list traversal exhausts the list completely, in which case it will point somewhere in the vicinity of the list's anchor instead. Fortunately, we no longer use this function so we can just get rid of it entirely. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 54ca2d6b6c78..93ce85a14a46 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1083,9 +1083,6 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, int efivar_entry_iter_begin(void); void efivar_entry_iter_end(void); -int __efivar_entry_iter(int (*func)(struct efivar_entry *, void *), - struct list_head *head, void *data, - struct efivar_entry **prev); int efivar_entry_iter(int (*func)(struct efivar_entry *, void *), struct list_head *head, void *data); -- cgit v1.2.3 From 2d82e6227ea189c0589e7383a36616ac2a2d248c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Jun 2022 18:19:43 +0200 Subject: efi: vars: Move efivar caching layer into efivarfs Move the fiddly bits of the efivar layer into its only remaining user, efivarfs, and confine its use to that particular module. All other uses of the EFI variable store have no need for this additional layer of complexity, given that they either only read variables, or read and write variables into a separate GUIDed namespace, and cannot be used to manipulate EFI variables that are covered by the EFI spec and/or affect the boot flow. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 38 -------------------------------------- 1 file changed, 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 10ef0a0d5e9a..8122c2ed505c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1030,21 +1030,6 @@ struct efivars { #define EFI_VAR_NAME_LEN 1024 -struct efi_variable { - efi_char16_t VariableName[EFI_VAR_NAME_LEN/sizeof(efi_char16_t)]; - efi_guid_t VendorGuid; - unsigned long DataSize; - __u8 Data[1024]; - efi_status_t Status; - __u32 Attributes; -} __attribute__((packed)); - -struct efivar_entry { - struct efi_variable var; - struct list_head list; - struct kobject kobj; -}; - int efivars_register(struct efivars *efivars, const struct efivar_operations *ops, struct kobject *kobject); @@ -1052,29 +1037,6 @@ int efivars_unregister(struct efivars *efivars); struct kobject *efivars_kobject(void); int efivar_supports_writes(void); -int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), - void *data, bool duplicates, struct list_head *head); - -int efivar_entry_add(struct efivar_entry *entry, struct list_head *head); -void __efivar_entry_add(struct efivar_entry *entry, struct list_head *head); -void efivar_entry_remove(struct efivar_entry *entry); -int efivar_entry_delete(struct efivar_entry *entry); - -int efivar_entry_size(struct efivar_entry *entry, unsigned long *size); -int __efivar_entry_get(struct efivar_entry *entry, u32 *attributes, - unsigned long *size, void *data); -int efivar_entry_get(struct efivar_entry *entry, u32 *attributes, - unsigned long *size, void *data); -int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, - unsigned long *size, void *data, bool *set); - -int efivar_entry_iter(int (*func)(struct efivar_entry *, void *), - struct list_head *head, void *data); - -bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, - unsigned long data_size); -bool efivar_variable_is_removable(efi_guid_t vendor, const char *name, - size_t len); int efivar_lock(void); int efivar_trylock(void); -- cgit v1.2.3 From ede57d58e6f38d5bc66137368e4a1e68a157af6e Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Wed, 22 Jun 2022 18:09:03 +0200 Subject: net: helper function skb_len_add Move the len fields manipulation in the skbs to a helper function. There is a comment specifically requesting this and there are several other areas in the code displaying the same pattern which can be refactored. This improves code readability. Signed-off-by: Richard Gobert Link: https://lore.kernel.org/r/20220622160853.GA6478@debian Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cd4a8268894a..f6a27ab19202 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2351,6 +2351,18 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) return skb_headlen(skb) + __skb_pagelen(skb); } +/** + * skb_len_add - adds a number to len fields of skb + * @skb: buffer to add len to + * @delta: number of bytes to add + */ +static inline void skb_len_add(struct sk_buff *skb, int delta) +{ + skb->len += delta; + skb->data_len += delta; + skb->truesize += delta; +} + /** * __skb_fill_page_desc - initialise a paged fragment in an skb * @skb: buffer containing fragment to be initialised -- cgit v1.2.3 From 1e5267cd0895183e09c5bb76da85c674014285d0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Jun 2022 16:14:47 +0200 Subject: mnt_idmapping: add vfs{g,u}id_t Introduces new vfs{g,u}id_t types. Similar to k{g,u}id_t the new types are just simple wrapper structs around regular {g,u}id_t types. They allows to establish a type safety boundary between {g,u}ids on idmapped mounts and {g,u}ids as they are represented in filesystems themselves. A vfs{g,u}id_t is always created from a k{g,u}id_t, never directly from a {g,u}id_t as idmapped mounts remap a given {g,u}id according to the mount's idmapping. This is expressed in the VFS{G,U}IDT_INIT() macros. A vfs{g,u}id_t may be used as a k{g,u}id_t via AS_K{G,U}IDT(). This often happens when we need to check whether a {g,u}id mapped according to an idmapped mount is identical to a given k{g,u}id_t. For an example, see vfsgid_in_group_p() which determines whether the value of vfsgid_t matches the value of any of the caller's groups. Similar logic is expressed in the k{g,u}id_eq_vfs{g,u}id(). The from_vfs{g,u}id() helpers map a given vfs{g,u}id_t from the mount's idmapping into the filesystem idmapping. They make it possible to update a filesystem object such as inode->i_{g,u}id with the correct value. This makes it harder to accidently write a wrong {g,u}id anwywhere. The vfs{g,u}id_has_fsmapping() helpers check whether a given vfs{g,u}id_t can be mapped into the filesystem idmapping. All new helpers are nops on non-idmapped mounts. I've done work on this roughly 7 months ago but dropped it to focus on the testsuite. Linus brought this up independently just last week and it's time to move this along (see [1]). [1]: https://lore.kernel.org/lkml/CAHk-=win6+ahs1EwLkcq8apqLi_1wXFWbrPf340zYEhObpz4jA@mail.gmail.com Link: https://lore.kernel.org/r/20220621141454.2914719-2-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Aleksa Sarai Cc: Linus Torvalds Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) --- include/linux/mnt_idmapping.h | 262 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 234 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index ee5a217de2a8..71b4cd5a7466 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -13,6 +13,122 @@ struct user_namespace; */ extern struct user_namespace init_user_ns; +typedef struct { + uid_t val; +} vfsuid_t; + +typedef struct { + gid_t val; +} vfsgid_t; + +#ifdef CONFIG_MULTIUSER +static inline uid_t __vfsuid_val(vfsuid_t uid) +{ + return uid.val; +} + +static inline gid_t __vfsgid_val(vfsgid_t gid) +{ + return gid.val; +} +#else +static inline uid_t __vfsuid_val(vfsuid_t uid) +{ + return 0; +} + +static inline gid_t __vfsgid_val(vfsgid_t gid) +{ + return 0; +} +#endif + +static inline bool vfsuid_valid(vfsuid_t uid) +{ + return __vfsuid_val(uid) != (uid_t)-1; +} + +static inline bool vfsgid_valid(vfsgid_t gid) +{ + return __vfsgid_val(gid) != (gid_t)-1; +} + +static inline bool vfsuid_eq(vfsuid_t left, vfsuid_t right) +{ + return __vfsuid_val(left) == __vfsuid_val(right); +} + +static inline bool vfsgid_eq(vfsgid_t left, vfsgid_t right) +{ + return __vfsgid_val(left) == __vfsgid_val(right); +} + +/** + * vfsuid_eq_kuid - check whether kuid and vfsuid have the same value + * @kuid: the kuid to compare + * @vfsuid: the vfsuid to compare + * + * Check whether @kuid and @vfsuid have the same values. + * + * Return: true if @kuid and @vfsuid have the same value, false if not. + */ +static inline bool vfsuid_eq_kuid(vfsuid_t vfsuid, kuid_t kuid) +{ + return __vfsuid_val(vfsuid) == __kuid_val(kuid); +} + +/** + * vfsgid_eq_kgid - check whether kgid and vfsgid have the same value + * @kgid: the kgid to compare + * @vfsgid: the vfsgid to compare + * + * Check whether @kgid and @vfsgid have the same values. + * + * Return: true if @kgid and @vfsgid have the same value, false if not. + */ +static inline bool vfsgid_eq_kgid(kgid_t kgid, vfsgid_t vfsgid) +{ + return __vfsgid_val(vfsgid) == __kgid_val(kgid); +} + +/* + * vfs{g,u}ids are created from k{g,u}ids. + * We don't allow them to be created from regular {u,g}id. + */ +#define VFSUIDT_INIT(val) (vfsuid_t){ __kuid_val(val) } +#define VFSGIDT_INIT(val) (vfsgid_t){ __kgid_val(val) } + +#define INVALID_VFSUID VFSUIDT_INIT(INVALID_UID) +#define INVALID_VFSGID VFSGIDT_INIT(INVALID_GID) + +/* + * Allow a vfs{g,u}id to be used as a k{g,u}id where we want to compare + * whether the mapped value is identical to value of a k{g,u}id. + */ +#define AS_KUIDT(val) (kuid_t){ __vfsuid_val(val) } +#define AS_KGIDT(val) (kgid_t){ __vfsgid_val(val) } + +#ifdef CONFIG_MULTIUSER +/** + * vfsgid_in_group_p() - check whether a vfsuid matches the caller's groups + * @vfsgid: the mnt gid to match + * + * This function can be used to determine whether @vfsuid matches any of the + * caller's groups. + * + * Return: 1 if vfsuid matches caller's groups, 0 if not. + */ +static inline int vfsgid_in_group_p(vfsgid_t vfsgid) +{ + return in_group_p(AS_KGIDT(vfsgid)); +} +#else +static inline int vfsgid_in_group_p(vfsgid_t vfsgid) +{ + return 1; +} +#endif + /** * initial_idmapping - check whether this is the initial mapping * @ns: idmapping to check @@ -67,21 +183,29 @@ static inline bool no_idmapping(const struct user_namespace *mnt_userns, * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is * returned. */ -static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - kuid_t kuid) + +static inline vfsuid_t make_vfsuid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kuid_t kuid) { uid_t uid; if (no_idmapping(mnt_userns, fs_userns)) - return kuid; + return VFSUIDT_INIT(kuid); if (initial_idmapping(fs_userns)) uid = __kuid_val(kuid); else uid = from_kuid(fs_userns, kuid); if (uid == (uid_t)-1) - return INVALID_UID; - return make_kuid(mnt_userns, uid); + return INVALID_VFSUID; + return VFSUIDT_INIT(make_kuid(mnt_userns, uid)); +} + +static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kuid_t kuid) +{ + return AS_KUIDT(make_vfsuid(mnt_userns, fs_userns, kuid)); } /** @@ -104,21 +228,56 @@ static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns, * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is * returned. */ -static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - kgid_t kgid) + +static inline vfsgid_t make_vfsgid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kgid_t kgid) { gid_t gid; if (no_idmapping(mnt_userns, fs_userns)) - return kgid; + return VFSGIDT_INIT(kgid); if (initial_idmapping(fs_userns)) gid = __kgid_val(kgid); else gid = from_kgid(fs_userns, kgid); if (gid == (gid_t)-1) - return INVALID_GID; - return make_kgid(mnt_userns, gid); + return INVALID_VFSGID; + return VFSGIDT_INIT(make_kgid(mnt_userns, gid)); +} + +static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + kgid_t kgid) +{ + return AS_KGIDT(make_vfsgid(mnt_userns, fs_userns, kgid)); +} + +/** + * from_vfsuid - map a vfsuid into the filesystem idmapping + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @vfsuid : vfsuid to be mapped + * + * Map @vfsuid into the filesystem idmapping. This function has to be used in + * order to e.g. write @vfsuid to inode->i_uid. + * + * Return: @vfsuid mapped into the filesystem idmapping + */ +static inline kuid_t from_vfsuid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + vfsuid_t vfsuid) +{ + uid_t uid; + + if (no_idmapping(mnt_userns, fs_userns)) + return AS_KUIDT(vfsuid); + uid = from_kuid(mnt_userns, AS_KUIDT(vfsuid)); + if (uid == (uid_t)-1) + return INVALID_UID; + if (initial_idmapping(fs_userns)) + return KUIDT_INIT(uid); + return make_kuid(fs_userns, uid); } /** @@ -145,16 +304,53 @@ static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns, struct user_namespace *fs_userns, kuid_t kuid) { - uid_t uid; + return from_vfsuid(mnt_userns, fs_userns, VFSUIDT_INIT(kuid)); +} + +/** + * vfsuid_has_fsmapping - check whether a vfsuid maps into the filesystem + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @vfsuid: vfsuid to be mapped + * + * Check whether @vfsuid has a mapping in the filesystem idmapping. Use this + * function to check whether the filesystem idmapping has a mapping for + * @vfsuid. + * + * Return: true if @vfsuid has a mapping in the filesystem, false if not. + */ +static inline bool vfsuid_has_fsmapping(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + vfsuid_t vfsuid) +{ + return uid_valid(from_vfsuid(mnt_userns, fs_userns, vfsuid)); +} + +/** + * from_vfsgid - map a vfsgid into the filesystem idmapping + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @vfsgid : vfsgid to be mapped + * + * Map @vfsgid into the filesystem idmapping. This function has to be used in + * order to e.g. write @vfsgid to inode->i_gid. + * + * Return: @vfsgid mapped into the filesystem idmapping + */ +static inline kgid_t from_vfsgid(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + vfsgid_t vfsgid) +{ + gid_t gid; if (no_idmapping(mnt_userns, fs_userns)) - return kuid; - uid = from_kuid(mnt_userns, kuid); - if (uid == (uid_t)-1) - return INVALID_UID; + return AS_KGIDT(vfsgid); + gid = from_kgid(mnt_userns, AS_KGIDT(vfsgid)); + if (gid == (gid_t)-1) + return INVALID_GID; if (initial_idmapping(fs_userns)) - return KUIDT_INIT(uid); - return make_kuid(fs_userns, uid); + return KGIDT_INIT(gid); + return make_kgid(fs_userns, gid); } /** @@ -181,16 +377,26 @@ static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns, struct user_namespace *fs_userns, kgid_t kgid) { - gid_t gid; + return from_vfsgid(mnt_userns, fs_userns, VFSGIDT_INIT(kgid)); +} - if (no_idmapping(mnt_userns, fs_userns)) - return kgid; - gid = from_kgid(mnt_userns, kgid); - if (gid == (gid_t)-1) - return INVALID_GID; - if (initial_idmapping(fs_userns)) - return KGIDT_INIT(gid); - return make_kgid(fs_userns, gid); +/** + * vfsgid_has_fsmapping - check whether a vfsgid maps into the filesystem + * @mnt_userns: the mount's idmapping + * @fs_userns: the filesystem's idmapping + * @vfsgid: vfsgid to be mapped + * + * Check whether @vfsgid has a mapping in the filesystem idmapping. Use this + * function to check whether the filesystem idmapping has a mapping for + * @vfsgid. + * + * Return: true if @vfsgid has a mapping in the filesystem, false if not. + */ +static inline bool vfsgid_has_fsmapping(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + vfsgid_t vfsgid) +{ + return gid_valid(from_vfsgid(mnt_userns, fs_userns, vfsgid)); } /** -- cgit v1.2.3 From 234a3113f28d02973ecf501f83d796ea89db295f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Jun 2022 16:14:48 +0200 Subject: fs: add two type safe mapping helpers Introduce i_{g,u}id_into_vfs{g,u}id(). They return vfs{g,u}id_t. This makes it way harder to confused idmapped mount {g,u}ids with filesystem {g,u}ids. The two helpers will eventually replace the old non type safe i_{g,u}id_into_mnt() helpers once we finished converting all places. Add a comment noting that they will be removed in the future. All new helpers are nops on non-idmapped mounts. Link: https://lore.kernel.org/r/20220621141454.2914719-3-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Aleksa Sarai Cc: Linus Torvalds Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ad5e3520fae..afcffa251cb9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1600,13 +1600,30 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) * @mnt_userns: user namespace of the mount the inode was found from * @inode: inode to map * + * Note, this will eventually be removed completely in favor of the type-safe + * i_uid_into_vfsuid(). + * * Return: the inode's i_uid mapped down according to @mnt_userns. * If the inode's i_uid has no mapping INVALID_UID is returned. */ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid); + return AS_KUIDT(make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid)); +} + +/** + * i_uid_into_vfsuid - map an inode's i_uid down into a mnt_userns + * @mnt_userns: user namespace of the mount the inode was found from + * @inode: inode to map + * + * Return: whe inode's i_uid mapped down according to @mnt_userns. + * If the inode's i_uid has no mapping INVALID_VFSUID is returned. + */ +static inline vfsuid_t i_uid_into_vfsuid(struct user_namespace *mnt_userns, + const struct inode *inode) +{ + return make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid); } /** @@ -1614,13 +1631,30 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, * @mnt_userns: user namespace of the mount the inode was found from * @inode: inode to map * + * Note, this will eventually be removed completely in favor of the type-safe + * i_gid_into_vfsgid(). + * * Return: the inode's i_gid mapped down according to @mnt_userns. * If the inode's i_gid has no mapping INVALID_GID is returned. */ static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { - return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid); + return AS_KGIDT(make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid)); +} + +/** + * i_gid_into_vfsgid - map an inode's i_gid down into a mnt_userns + * @mnt_userns: user namespace of the mount the inode was found from + * @inode: inode to map + * + * Return: the inode's i_gid mapped down according to @mnt_userns. + * If the inode's i_gid has no mapping INVALID_VFSGID is returned. + */ +static inline vfsgid_t i_gid_into_vfsgid(struct user_namespace *mnt_userns, + const struct inode *inode) +{ + return make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid); } /** -- cgit v1.2.3 From 45c311501c77217c50d08ed08aa722c812d92ab5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Jun 2022 16:14:49 +0200 Subject: fs: use mount types in iattr Add ia_vfs{g,u}id members of type vfs{g,u}id_t to struct iattr. We use an anonymous union (similar to what we do in struct file) around ia_{g,u}id and ia_vfs{g,u}id. At the end of this series ia_{g,u}id and ia_vfs{g,u}id will always contain the same value independent of whether struct iattr is initialized from an idmapped mount. This is a change from how this is done today. Wrapping this in a anonymous unions has a few advantages. It allows us to avoid needlessly increasing struct iattr. Since the types for ia_{g,u}id and ia_vfs{g,u}id are structures with overlapping/identical members they are covered by 6.5.2.3/6 of the C standard and it is safe to initialize and access them. Filesystems that raise FS_ALLOW_IDMAP and thus support idmapped mounts will have to use ia_vfs{g,u}id and the associated helpers. And will be ported at the end of this series. They will immediately benefit from the type safe new helpers. Filesystems that do not support FS_ALLOW_IDMAP can continue to use ia_{g,u}id for now. The aim is to convert every filesystem to always use ia_vfs{g,u}id and thus ultimately remove the ia_{g,u}id members. Link: https://lore.kernel.org/r/20220621141454.2914719-4-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Aleksa Sarai Cc: Linus Torvalds Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 18 ++++++++++++++++-- include/linux/mnt_idmapping.h | 5 +++++ 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index afcffa251cb9..54ffcdce3ccb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -221,8 +221,22 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, struct iattr { unsigned int ia_valid; umode_t ia_mode; - kuid_t ia_uid; - kgid_t ia_gid; + /* + * The two anonymous unions wrap structures with the same member. + * + * Filesystems raising FS_ALLOW_IDMAP need to use ia_vfs{g,u}id which + * are a dedicated type requiring the filesystem to use the dedicated + * helpers. Other filesystem can continue to use ia_{g,u}id until they + * have been ported. + */ + union { + kuid_t ia_uid; + vfsuid_t ia_vfsuid; + }; + union { + kgid_t ia_gid; + vfsgid_t ia_vfsgid; + }; loff_t ia_size; struct timespec64 ia_atime; struct timespec64 ia_mtime; diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 71b4cd5a7466..6a752b61088b 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -21,6 +21,11 @@ typedef struct { gid_t val; } vfsgid_t; +static_assert(sizeof(vfsuid_t) == sizeof(kuid_t)); +static_assert(sizeof(vfsgid_t) == sizeof(kgid_t)); +static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val)); +static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val)); + #ifdef CONFIG_MULTIUSER static inline uid_t __vfsuid_val(vfsuid_t uid) { -- cgit v1.2.3 From 1f36146a5a3dc6098566d34a9886f9e97c88d93e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Jun 2022 16:14:50 +0200 Subject: fs: introduce tiny iattr ownership update helpers Nearly all fileystems currently open-code the same checks for determining whether the i_{g,u}id fields of an inode need to be updated and then updating the fields. Introduce tiny helpers i_{g,u}id_needs_update() and i_{g,u}id_update() that wrap this logic. This allows filesystems to not care about updating inode->i_{g,u}id with the correct values themselves instead leaving this to the helpers. We also get rid of a lot of code duplication and make it easier to change struct iattr in the future since changes can be localized to these helpers. And finally we make it hard to conflate k{g,u}id_t types with vfs{g,u}id_t types for filesystems that support idmapped mounts. In the following patch we will port all filesystems that raise FS_ALLOW_IDMAP to use the new helpers. However, the ultimate goal is to convert all filesystems to make use of these helpers. All new helpers are nops on non-idmapped mounts. Link: https://lore.kernel.org/r/20220621141454.2914719-5-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Aleksa Sarai Cc: Linus Torvalds Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 54ffcdce3ccb..0f8cc7f2665a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1640,6 +1640,44 @@ static inline vfsuid_t i_uid_into_vfsuid(struct user_namespace *mnt_userns, return make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid); } +/** + * i_uid_needs_update - check whether inode's i_uid needs to be updated + * @mnt_userns: user namespace of the mount the inode was found from + * @attr: the new attributes of @inode + * @inode: the inode to update + * + * Check whether the $inode's i_uid field needs to be updated taking idmapped + * mounts into account if the filesystem supports it. + * + * Return: true if @inode's i_uid field needs to be updated, false if not. + */ +static inline bool i_uid_needs_update(struct user_namespace *mnt_userns, + const struct iattr *attr, + const struct inode *inode) +{ + return ((attr->ia_valid & ATTR_UID) && + !vfsuid_eq(attr->ia_vfsuid, + i_uid_into_vfsuid(mnt_userns, inode))); +} + +/** + * i_uid_update - update @inode's i_uid field + * @mnt_userns: user namespace of the mount the inode was found from + * @attr: the new attributes of @inode + * @inode: the inode to update + * + * Safely update @inode's i_uid field translating the vfsuid of any idmapped + * mount into the filesystem kuid. + */ +static inline void i_uid_update(struct user_namespace *mnt_userns, + const struct iattr *attr, + struct inode *inode) +{ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = from_vfsuid(mnt_userns, i_user_ns(inode), + attr->ia_vfsuid); +} + /** * i_gid_into_mnt - map an inode's i_gid down into a mnt_userns * @mnt_userns: user namespace of the mount the inode was found from @@ -1671,6 +1709,44 @@ static inline vfsgid_t i_gid_into_vfsgid(struct user_namespace *mnt_userns, return make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid); } +/** + * i_gid_needs_update - check whether inode's i_gid needs to be updated + * @mnt_userns: user namespace of the mount the inode was found from + * @attr: the new attributes of @inode + * @inode: the inode to update + * + * Check whether the $inode's i_gid field needs to be updated taking idmapped + * mounts into account if the filesystem supports it. + * + * Return: true if @inode's i_gid field needs to be updated, false if not. + */ +static inline bool i_gid_needs_update(struct user_namespace *mnt_userns, + const struct iattr *attr, + const struct inode *inode) +{ + return ((attr->ia_valid & ATTR_GID) && + !vfsgid_eq(attr->ia_vfsgid, + i_gid_into_vfsgid(mnt_userns, inode))); +} + +/** + * i_gid_update - update @inode's i_gid field + * @mnt_userns: user namespace of the mount the inode was found from + * @attr: the new attributes of @inode + * @inode: the inode to update + * + * Safely update @inode's i_gid field translating the vfsgid of any idmapped + * mount into the filesystem kgid. + */ +static inline void i_gid_update(struct user_namespace *mnt_userns, + const struct iattr *attr, + struct inode *inode) +{ + if (attr->ia_valid & ATTR_GID) + inode->i_gid = from_vfsgid(mnt_userns, i_user_ns(inode), + attr->ia_vfsgid); +} + /** * inode_fsuid_set - initialize inode's i_uid field with callers fsuid * @inode: inode to initialize -- cgit v1.2.3 From 35faf3109a78516f60ca13f957083d5e5535fde0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Jun 2022 16:14:51 +0200 Subject: fs: port to iattr ownership update helpers Earlier we introduced new helpers to abstract ownership update and remove code duplication. This converts all filesystems supporting idmapped mounts to make use of these new helpers. For now we always pass the initial idmapping which makes the idmapping functions these helpers call nops. This is done because we currently always pass the actual value to be written to i_{g,u}id via struct iattr. While this allowed us to treat the {g,u}id values in struct iattr as values that can be directly written to inode->i_{g,u}id it also increases the potential for confusion for filesystems. Now that we are have dedicated types to prevent this confusion we will ultimately only map the value from the idmapped mount into a filesystem value that can be written to inode->i_{g,u}id when the filesystem actually updates the inode. So pass down the initial idmapping until we finished that conversion at which point we pass down the mount's idmapping. No functional changes intended. Link: https://lore.kernel.org/r/20220621141454.2914719-6-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Aleksa Sarai Cc: Linus Torvalds Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) --- include/linux/quotaops.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index a0f6668924d3..61ee34861ca2 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -22,9 +22,9 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) /* i_mutex must being held */ static inline bool is_quota_modification(struct inode *inode, struct iattr *ia) { - return (ia->ia_valid & ATTR_SIZE) || - (ia->ia_valid & ATTR_UID && !uid_eq(ia->ia_uid, inode->i_uid)) || - (ia->ia_valid & ATTR_GID && !gid_eq(ia->ia_gid, inode->i_gid)); + return ((ia->ia_valid & ATTR_SIZE) || + i_uid_needs_update(&init_user_ns, ia, inode) || + i_gid_needs_update(&init_user_ns, ia, inode)); } #if defined(CONFIG_QUOTA) -- cgit v1.2.3 From 71e7b535b8900d7ce7d5279fa472711db5251ae5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Jun 2022 16:14:52 +0200 Subject: quota: port quota helpers mount ids Port the is_quota_modification() and dqout_transfer() helper to type safe vfs{g,u}id_t. Since these helpers are only called by a few filesystems don't introduce a new helper but simply extend the existing helpers to pass down the mount's idmapping. Note, that this is a non-functional change, i.e. nothing will have happened here or at the end of this series to how quota are done! This a change necessary because we will at the end of this series make ownership changes easier to reason about by keeping the original value in struct iattr for both non-idmapped and idmapped mounts. For now we always pass the initial idmapping which makes the idmapping functions these helpers call nops. This is done because we currently always pass the actual value to be written to i_{g,u}id via struct iattr. While this allowed us to treat the {g,u}id values in struct iattr as values that can be directly written to inode->i_{g,u}id it also increases the potential for confusion for filesystems. Now that we are have dedicated types to prevent this confusion we will ultimately only map the value from the idmapped mount into a filesystem value that can be written to inode->i_{g,u}id when the filesystem actually updates the inode. So pass down the initial idmapping until we finished that conversion at which point we pass down the mount's idmapping. Since struct iattr uses an anonymous union with overlapping types as supported by the C standard, filesystems that haven't converted to ia_vfs{g,u}id won't see any difference and things will continue to work as before. In other words, no functional changes intended with this change. Link: https://lore.kernel.org/r/20220621141454.2914719-7-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Jan Kara Cc: Aleksa Sarai Cc: Linus Torvalds Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Jan Kara Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) --- include/linux/quotaops.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 61ee34861ca2..0342ff6584fd 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -20,7 +20,8 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) } /* i_mutex must being held */ -static inline bool is_quota_modification(struct inode *inode, struct iattr *ia) +static inline bool is_quota_modification(struct user_namespace *mnt_userns, + struct inode *inode, struct iattr *ia) { return ((ia->ia_valid & ATTR_SIZE) || i_uid_needs_update(&init_user_ns, ia, inode) || @@ -115,7 +116,8 @@ int dquot_set_dqblk(struct super_block *sb, struct kqid id, struct qc_dqblk *di); int __dquot_transfer(struct inode *inode, struct dquot **transfer_to); -int dquot_transfer(struct inode *inode, struct iattr *iattr); +int dquot_transfer(struct user_namespace *mnt_userns, struct inode *inode, + struct iattr *iattr); static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type) { @@ -234,7 +236,8 @@ static inline void dquot_free_inode(struct inode *inode) { } -static inline int dquot_transfer(struct inode *inode, struct iattr *iattr) +static inline int dquot_transfer(struct user_namespace *mnt_userns, + struct inode *inode, struct iattr *iattr) { return 0; } -- cgit v1.2.3 From 0e363cf3fa598c69340794da068d4d9cbc869322 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Jun 2022 16:14:53 +0200 Subject: security: pass down mount idmapping to setattr hook Before this change we used to take a shortcut and place the actual values that would be written to inode->i_{g,u}id into struct iattr. This had the advantage that we moved idmappings mostly out of the picture early on but it made reasoning about changes more difficult than it should be. The filesystem was never explicitly told that it dealt with an idmapped mount. The transition to the value that needed to be stored in inode->i_{g,u}id appeared way too early and increased the probability of bugs in various codepaths. We know place the same value in struct iattr no matter if this is an idmapped mount or not. The vfs will only deal with type safe vfs{g,u}id_t. This makes it massively safer to perform permission checks as the type will tell us what checks we need to perform and what helpers we need to use. Adapt the security_inode_setattr() helper to pass down the mount's idmapping to account for that change. Link: https://lore.kernel.org/r/20220621141454.2914719-8-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Aleksa Sarai Cc: Linus Torvalds Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) --- include/linux/evm.h | 6 ++++-- include/linux/security.h | 8 +++++--- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/evm.h b/include/linux/evm.h index 4c374be70247..aa63e0b3c0a2 100644 --- a/include/linux/evm.h +++ b/include/linux/evm.h @@ -21,7 +21,8 @@ extern enum integrity_status evm_verifyxattr(struct dentry *dentry, void *xattr_value, size_t xattr_value_len, struct integrity_iint_cache *iint); -extern int evm_inode_setattr(struct dentry *dentry, struct iattr *attr); +extern int evm_inode_setattr(struct user_namespace *mnt_userns, + struct dentry *dentry, struct iattr *attr); extern void evm_inode_post_setattr(struct dentry *dentry, int ia_valid); extern int evm_inode_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, const char *name, @@ -68,7 +69,8 @@ static inline enum integrity_status evm_verifyxattr(struct dentry *dentry, } #endif -static inline int evm_inode_setattr(struct dentry *dentry, struct iattr *attr) +static inline int evm_inode_setattr(struct user_namespace *mnt_userns, + struct dentry *dentry, struct iattr *attr) { return 0; } diff --git a/include/linux/security.h b/include/linux/security.h index 7fc4e9f49f54..4d0baf30266e 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -353,7 +353,8 @@ int security_inode_readlink(struct dentry *dentry); int security_inode_follow_link(struct dentry *dentry, struct inode *inode, bool rcu); int security_inode_permission(struct inode *inode, int mask); -int security_inode_setattr(struct dentry *dentry, struct iattr *attr); +int security_inode_setattr(struct user_namespace *mnt_userns, + struct dentry *dentry, struct iattr *attr); int security_inode_getattr(const struct path *path); int security_inode_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, const char *name, @@ -848,8 +849,9 @@ static inline int security_inode_permission(struct inode *inode, int mask) return 0; } -static inline int security_inode_setattr(struct dentry *dentry, - struct iattr *attr) +static inline int security_inode_setattr(struct user_namespace *mnt_userns, + struct dentry *dentry, + struct iattr *attr) { return 0; } -- cgit v1.2.3 From b27c82e1296572cfa3997e58db3118a33915f85c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Jun 2022 16:14:54 +0200 Subject: attr: port attribute changes to new types Now that we introduced new infrastructure to increase the type safety for filesystems supporting idmapped mounts port the first part of the vfs over to them. This ports the attribute changes codepaths to rely on the new better helpers using a dedicated type. Before this change we used to take a shortcut and place the actual values that would be written to inode->i_{g,u}id into struct iattr. This had the advantage that we moved idmappings mostly out of the picture early on but it made reasoning about changes more difficult than it should be. The filesystem was never explicitly told that it dealt with an idmapped mount. The transition to the value that needed to be stored in inode->i_{g,u}id appeared way too early and increased the probability of bugs in various codepaths. We know place the same value in struct iattr no matter if this is an idmapped mount or not. The vfs will only deal with type safe vfs{g,u}id_t. This makes it massively safer to perform permission checks as the type will tell us what checks we need to perform and what helpers we need to use. Fileystems raising FS_ALLOW_IDMAP can't simply write ia_vfs{g,u}id to inode->i_{g,u}id since they are different types. Instead they need to use the dedicated vfs{g,u}id_to_k{g,u}id() helpers that map the vfs{g,u}id into the filesystem. The other nice effect is that filesystems like overlayfs don't need to care about idmappings explicitly anymore and can simply set up struct iattr accordingly directly. Link: https://lore.kernel.org/lkml/CAHk-=win6+ahs1EwLkcq8apqLi_1wXFWbrPf340zYEhObpz4jA@mail.gmail.com [1] Link: https://lore.kernel.org/r/20220621141454.2914719-9-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Aleksa Sarai Cc: Linus Torvalds Cc: Al Viro CC: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 4 ++++ include/linux/quotaops.h | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0f8cc7f2665a..d6e3347cbf69 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -228,6 +228,10 @@ struct iattr { * are a dedicated type requiring the filesystem to use the dedicated * helpers. Other filesystem can continue to use ia_{g,u}id until they * have been ported. + * + * They always contain the same value. In other words FS_ALLOW_IDMAP + * pass down the same value on idmapped mounts as they would on regular + * mounts. */ union { kuid_t ia_uid; diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 0342ff6584fd..0d8625d71733 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -24,8 +24,8 @@ static inline bool is_quota_modification(struct user_namespace *mnt_userns, struct inode *inode, struct iattr *ia) { return ((ia->ia_valid & ATTR_SIZE) || - i_uid_needs_update(&init_user_ns, ia, inode) || - i_gid_needs_update(&init_user_ns, ia, inode)); + i_uid_needs_update(mnt_userns, ia, inode) || + i_gid_needs_update(mnt_userns, ia, inode)); } #if defined(CONFIG_QUOTA) -- cgit v1.2.3 From 0cae04373b77a117830e5f7d7aaa7eaf01f950d5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 6 Jun 2022 09:47:33 +0200 Subject: dmaengine: remove DMA_MEMCPY_SG once again This was removed before due to the complete lack of users, but 3218910fd585 ("dmaengine: Add core function and capability check for DMA_MEMCPY_SG") and 29cf37fa6dd9 ("dmaengine: Add consumer for the new DMA_MEMCPY_SG API function.") added it back despite still not having any users whatsoever. Fixes: 3218910fd585 ("dmaengine: Add core function and capability check for DMA_MEMCPY_SG") Fixes: 29cf37fa6dd9 ("dmaengine: Add consumer for the new DMA_MEMCPY_SG API function.") Signed-off-by: Christoph Hellwig Acked-by: Michal Simek Link: https://lore.kernel.org/r/20220606074733.622616-1-hch@lst.de Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index b46b88e6aa0d..c923f4e60f24 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -50,7 +50,6 @@ enum dma_status { */ enum dma_transaction_type { DMA_MEMCPY, - DMA_MEMCPY_SG, DMA_XOR, DMA_PQ, DMA_XOR_VAL, @@ -887,11 +886,6 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)( struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, size_t len, unsigned long flags); - struct dma_async_tx_descriptor *(*device_prep_dma_memcpy_sg)( - struct dma_chan *chan, - struct scatterlist *dst_sg, unsigned int dst_nents, - struct scatterlist *src_sg, unsigned int src_nents, - unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_xor)( struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, unsigned int src_cnt, size_t len, unsigned long flags); @@ -1060,20 +1054,6 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy( len, flags); } -static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy_sg( - struct dma_chan *chan, - struct scatterlist *dst_sg, unsigned int dst_nents, - struct scatterlist *src_sg, unsigned int src_nents, - unsigned long flags) -{ - if (!chan || !chan->device || !chan->device->device_prep_dma_memcpy_sg) - return NULL; - - return chan->device->device_prep_dma_memcpy_sg(chan, dst_sg, dst_nents, - src_sg, src_nents, - flags); -} - static inline bool dmaengine_is_metadata_mode_supported(struct dma_chan *chan, enum dma_desc_metadata_mode mode) { -- cgit v1.2.3 From 742ab6df974ae8384a2dd213db1a3a06cf6d8936 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:15:32 +0200 Subject: x86/kvm/vmx: Make noinstr clean The recent mmio_stale_data fixes broke the noinstr constraints: vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x15b: call to wrmsrl.constprop.0() leaves .noinstr.text section vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x1bf: call to kvm_arch_has_assigned_device() leaves .noinstr.text section make it all happy again. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c20f2d55840c..83cf7fd842e0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1513,7 +1513,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm) { } -static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) +static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) { return false; } -- cgit v1.2.3 From 6b80b59b3555706508008f1f127b5412c89c7fd8 Mon Sep 17 00:00:00 2001 From: Alexandre Chartre Date: Tue, 14 Jun 2022 23:15:49 +0200 Subject: x86/bugs: Report AMD retbleed vulnerability Report that AMD x86 CPUs are vulnerable to the RETBleed (Arbitrary Speculative Code Execution with Return Instructions) attack. [peterz: add hygon] [kim: invert parity; fam15h] Co-developed-by: Kim Phillips Signed-off-by: Kim Phillips Signed-off-by: Alexandre Chartre Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 2c7477354744..314802f98b9d 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, extern ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_retbleed(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit v1.2.3 From a09a6e2399ba0595c3042b3164f3ca68a3cff33e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Jun 2022 23:16:03 +0200 Subject: objtool: Add entry UNRET validation Since entry asm is tricky, add a validation pass that ensures the retbleed mitigation has been done before the first actual RET instruction. Entry points are those that either have UNWIND_HINT_ENTRY, which acts as UNWIND_HINT_EMPTY but marks the instruction as an entry point, or those that have UWIND_HINT_IRET_REGS at +0. This is basically a variant of validate_branch() that is intra-function and it will simply follow all branches from marked entry points and ensures that all paths lead to ANNOTATE_UNRET_END. If a path hits RET or an indirection the path is a fail and will be reported. There are 3 ANNOTATE_UNRET_END instances: - UNTRAIN_RET itself - exception from-kernel; this path doesn't need UNTRAIN_RET - all early exceptions; these also don't need UNTRAIN_RET Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Josh Poimboeuf Signed-off-by: Borislav Petkov --- include/linux/objtool.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 15b940ec1eac..b026f1ae39c6 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -32,11 +32,14 @@ struct unwind_hint { * * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. * Useful for code which doesn't have an ELF function annotation. + * + * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. */ #define UNWIND_HINT_TYPE_CALL 0 #define UNWIND_HINT_TYPE_REGS 1 #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 #define UNWIND_HINT_TYPE_FUNC 3 +#define UNWIND_HINT_TYPE_ENTRY 4 #ifdef CONFIG_OBJTOOL -- cgit v1.2.3 From 8faea26e611189e933ea2281975ff4dc7c1106b6 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 24 Jun 2022 12:52:40 +0200 Subject: objtool: Re-add UNWIND_HINT_{SAVE_RESTORE} Commit c536ed2fffd5 ("objtool: Remove SAVE/RESTORE hints") removed the save/restore unwind hints because they were no longer needed. Now they're going to be needed again so re-add them. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov --- include/linux/objtool.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index b026f1ae39c6..10bc88cc3bf6 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -40,6 +40,8 @@ struct unwind_hint { #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 #define UNWIND_HINT_TYPE_FUNC 3 #define UNWIND_HINT_TYPE_ENTRY 4 +#define UNWIND_HINT_TYPE_SAVE 5 +#define UNWIND_HINT_TYPE_RESTORE 6 #ifdef CONFIG_OBJTOOL @@ -127,7 +129,7 @@ struct unwind_hint { * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints /* struct unwind_hint */ @@ -180,7 +182,7 @@ struct unwind_hint { #define ASM_REACHABLE #else #define ANNOTATE_INTRA_FUNCTION_CALL -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm -- cgit v1.2.3 From 7283f862bd991c8657e9bf1c02db772fcf018f13 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 22 Jun 2022 16:01:33 +0200 Subject: drm: Implement DRM aperture helpers under video/ Implement DRM's aperture helpers under video/ for sharing with other sub-systems. Remove DRM-isms from the interface. The helpers track the ownership of framebuffer apertures and provide hand-over from firmware, such as EFI and VESA, to native graphics drivers. Other subsystems, such as fbdev and vfio, also have to maintain ownership of framebuffer apertures. Moving DRM's aperture helpers to a more public location allows all subsystems to interact with each other and share a common implementation. The aperture helpers are selected by the various firmware drivers within DRM and fbdev, and the VGA text-console driver. The original DRM interface is kept in place for use by DRM drivers. v3: * prefix all interfaces with aperture_ (Javier) * rework and simplify documentation (Javier) * rename struct dev_aperture to struct aperture_range * rebase onto latest DRM * update MAINTAINERS entry Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Tested-by: Laszlo Ersek Link: https://patchwork.freedesktop.org/patch/msgid/20220622140134.12763-3-tzimmermann@suse.de --- include/linux/aperture.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 include/linux/aperture.h (limited to 'include/linux') diff --git a/include/linux/aperture.h b/include/linux/aperture.h new file mode 100644 index 000000000000..442f15a57cad --- /dev/null +++ b/include/linux/aperture.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef _LINUX_APERTURE_H_ +#define _LINUX_APERTURE_H_ + +#include + +struct pci_dev; +struct platform_device; + +#if defined(CONFIG_APERTURE_HELPERS) +int devm_aperture_acquire_for_platform_device(struct platform_device *pdev, + resource_size_t base, + resource_size_t size); + +int aperture_remove_conflicting_devices(resource_size_t base, resource_size_t size, + bool primary, const char *name); + +int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev, const char *name); +#else +static inline int devm_aperture_acquire_for_platform_device(struct platform_device *pdev, + resource_size_t base, + resource_size_t size) +{ + return 0; +} + +static inline int aperture_remove_conflicting_devices(resource_size_t base, resource_size_t size, + bool primary, const char *name) +{ + return 0; +} + +static inline int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev, const char *name) +{ + return 0; +} +#endif + +/** + * aperture_remove_all_conflicting_devices - remove all existing framebuffers + * @primary: also kick vga16fb if present; only relevant for VGA devices + * @name: a descriptive name of the requesting driver + * + * This function removes all graphics device drivers. Use this function on systems + * that can have their framebuffer located anywhere in memory. + * + * Returns: + * 0 on success, or a negative errno code otherwise + */ +static inline int aperture_remove_all_conflicting_devices(bool primary, const char *name) +{ + return aperture_remove_conflicting_devices(0, (resource_size_t)-1, primary, name); +} + +#endif -- cgit v1.2.3 From 7dc54d3b8d9100c65b0860f76342fc9f3b4694d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 24 Jun 2022 16:39:50 +0200 Subject: net: pcs: add Renesas MII converter driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a PCS driver for the MII converter that is present on the Renesas RZ/N1 SoC. This MII converter is reponsible for converting MII to RMII/RGMII or act as a MII pass-trough. Exposing it as a PCS allows to reuse it in both the switch driver and the stmmac driver. Currently, this driver only allows the PCS to be used by the dual Cortex-A7 subsystem since the register locking system is not used. Signed-off-by: Clément Léger Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/pcs-rzn1-miic.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/linux/pcs-rzn1-miic.h (limited to 'include/linux') diff --git a/include/linux/pcs-rzn1-miic.h b/include/linux/pcs-rzn1-miic.h new file mode 100644 index 000000000000..56d12b21365d --- /dev/null +++ b/include/linux/pcs-rzn1-miic.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Schneider Electric + * + * Clément Léger + */ + +#ifndef __LINUX_PCS_MIIC_H +#define __LINUX_PCS_MIIC_H + +struct phylink; +struct device_node; + +struct phylink_pcs *miic_create(struct device *dev, struct device_node *np); + +void miic_destroy(struct phylink_pcs *pcs); + +#endif /* __LINUX_PCS_MIIC_H */ -- cgit v1.2.3 From 8da443b1a4036b5863fd2ec7e0251164b704c726 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 14 Jun 2022 11:05:34 +0200 Subject: tty/vt: consolemap: rename struct vc_data::vc_uni_pagedir* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a follow-up to the commit 4173f018aae1 (tty/vt: consolemap: rename and document struct uni_pagedir), rename also the members of struct vc_data. I.e. pagedir -> pagedict. And while touching all the places, remove also the unnecessary vc_ prefix. Suggested-by: Ilpo Järvinen Reviewed-by: Ilpo Järvinen Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220614090537.15557-5-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/console_struct.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index f75033f0277f..1518568aaf0f 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -157,8 +157,8 @@ struct vc_data { unsigned int vc_bell_duration; /* Console bell duration */ unsigned short vc_cur_blink_ms; /* Cursor blink duration */ struct vc_data **vc_display_fg; /* [!] Ptr to var holding fg console for this display */ - struct uni_pagedict *vc_uni_pagedir; - struct uni_pagedict **vc_uni_pagedir_loc; /* [!] Location of uni_pagedict variable for this console */ + struct uni_pagedict *uni_pagedict; + struct uni_pagedict **uni_pagedict_loc; /* [!] Location of uni_pagedict variable for this console */ struct uni_screen *vc_uni_screen; /* unicode screen content */ /* additional information is in vt_kern.h */ }; -- cgit v1.2.3 From 1714582a3a087eda8786d5a1b32b2ec86ca8a303 Mon Sep 17 00:00:00 2001 From: David Jander Date: Tue, 21 Jun 2022 08:12:24 +0200 Subject: spi: Move ctlr->cur_msg_prepared to struct spi_message This enables the possibility to transfer a message that is not at the current tip of the async message queue. This is in preparation of the next patch(es) which enable spi_sync messages to skip the queue altogether. Signed-off-by: David Jander Link: https://lore.kernel.org/r/20220621061234.3626638-2-david@protonic.nl Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index c96f526d9a20..1a75c26742f2 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -385,8 +385,6 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @queue: message queue * @idling: the device is entering idle state * @cur_msg: the currently in-flight message - * @cur_msg_prepared: spi_prepare_message was called for the currently - * in-flight message * @cur_msg_mapped: message has been mapped for DMA * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip * selected @@ -621,7 +619,6 @@ struct spi_controller { bool running; bool rt; bool auto_runtime_pm; - bool cur_msg_prepared; bool cur_msg_mapped; char last_cs; bool last_cs_mode_high; @@ -988,6 +985,7 @@ struct spi_transfer { * @queue: for use by whichever driver currently owns the message * @state: for use by whichever driver currently owns the message * @resources: for resource management when the spi message is processed + * @prepared: spi_prepare_message was called for the this message * * A @spi_message is used to execute an atomic sequence of data transfers, * each represented by a struct spi_transfer. The sequence is "atomic" @@ -1037,6 +1035,9 @@ struct spi_message { /* list of spi_res reources when the spi message is processed */ struct list_head resources; + + /* spi_prepare_message was called for this message */ + bool prepared; }; static inline void spi_message_init_no_memset(struct spi_message *m) -- cgit v1.2.3 From ae7d2346dc89ae89a6e0aabe6037591a11e593c0 Mon Sep 17 00:00:00 2001 From: David Jander Date: Tue, 21 Jun 2022 08:12:25 +0200 Subject: spi: Don't use the message queue if possible in spi_sync The interaction with the controller message queue and its corresponding auxiliary flags and variables requires the use of the queue_lock which is costly. Since spi_sync will transfer the complete message anyway, and not return until it is finished, there is no need to put the message into the queue if the queue is empty. This can save a lot of overhead. As an example of how significant this is, when using the MCP2518FD SPI CAN controller on a i.MX8MM SoC, the time during which the interrupt line stays active (during 3 relatively short spi_sync messages), is reduced from 98us to 72us by this patch. Signed-off-by: David Jander Link: https://lore.kernel.org/r/20220621061234.3626638-3-david@protonic.nl Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 1a75c26742f2..74261a83b5fa 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -461,6 +461,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @irq_flags: Interrupt enable state during PTP system timestamping * @fallback: fallback to pio if dma transfer return failure with * SPI_TRANS_FAIL_NO_START. + * @queue_empty: signal green light for opportunistically skipping the queue + * for spi_sync transfers. * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals @@ -677,6 +679,9 @@ struct spi_controller { /* Interrupt enable state during PTP system timestamping */ unsigned long irq_flags; + + /* Flag for enabling opportunistic skipping of the queue in spi_sync */ + bool queue_empty; }; static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) @@ -986,6 +991,7 @@ struct spi_transfer { * @state: for use by whichever driver currently owns the message * @resources: for resource management when the spi message is processed * @prepared: spi_prepare_message was called for the this message + * @sync: this message took the direct sync path skipping the async queue * * A @spi_message is used to execute an atomic sequence of data transfers, * each represented by a struct spi_transfer. The sequence is "atomic" @@ -1037,7 +1043,10 @@ struct spi_message { struct list_head resources; /* spi_prepare_message was called for this message */ - bool prepared; + bool prepared; + + /* this message is skipping the async queue */ + bool sync; }; static inline void spi_message_init_no_memset(struct spi_message *m) -- cgit v1.2.3 From 66a221593cb26dd6aabba63bcd18173f4e69c7ab Mon Sep 17 00:00:00 2001 From: David Jander Date: Tue, 21 Jun 2022 08:12:30 +0200 Subject: spi: Remove the now unused ctlr->idling flag The ctlr->idling flag is never checked now, so we don't need to set it either. Signed-off-by: David Jander Link: https://lore.kernel.org/r/20220621061234.3626638-8-david@protonic.nl Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 74261a83b5fa..c58f46be762f 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -383,7 +383,6 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @pump_messages: work struct for scheduling work to the message pump * @queue_lock: spinlock to syncronise access to message queue * @queue: message queue - * @idling: the device is entering idle state * @cur_msg: the currently in-flight message * @cur_msg_mapped: message has been mapped for DMA * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip @@ -616,7 +615,6 @@ struct spi_controller { spinlock_t queue_lock; struct list_head queue; struct spi_message *cur_msg; - bool idling; bool busy; bool running; bool rt; -- cgit v1.2.3 From 69fa95905d40846756d22402690ddf5361a9d13b Mon Sep 17 00:00:00 2001 From: David Jander Date: Tue, 21 Jun 2022 08:12:33 +0200 Subject: spi: Ensure the io_mutex is held until spi_finalize_current_message() This patch introduces a completion that is completed in spi_finalize_current_message() and waited for in __spi_pump_transfer_message(). This way all manipulation of ctlr->cur_msg is done with the io_mutex held and strictly ordered: __spi_pump_transfer_message() will not return until spi_finalize_current_message() is done using ctlr->cur_msg, and its calling context is only touching ctlr->cur_msg after returning. Due to this, we can safely drop the spin-locks around ctlr->cur_msg. Signed-off-by: David Jander Link: https://lore.kernel.org/r/20220621061234.3626638-11-david@protonic.nl Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index c58f46be762f..c56e0d240a58 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -384,6 +384,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @queue_lock: spinlock to syncronise access to message queue * @queue: message queue * @cur_msg: the currently in-flight message + * @cur_msg_completion: a completion for the current in-flight message * @cur_msg_mapped: message has been mapped for DMA * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip * selected @@ -615,6 +616,7 @@ struct spi_controller { spinlock_t queue_lock; struct list_head queue; struct spi_message *cur_msg; + struct completion cur_msg_completion; bool busy; bool running; bool rt; @@ -989,7 +991,6 @@ struct spi_transfer { * @state: for use by whichever driver currently owns the message * @resources: for resource management when the spi message is processed * @prepared: spi_prepare_message was called for the this message - * @sync: this message took the direct sync path skipping the async queue * * A @spi_message is used to execute an atomic sequence of data transfers, * each represented by a struct spi_transfer. The sequence is "atomic" @@ -1042,9 +1043,6 @@ struct spi_message { /* spi_prepare_message was called for this message */ bool prepared; - - /* this message is skipping the async queue */ - bool sync; }; static inline void spi_message_init_no_memset(struct spi_message *m) -- cgit v1.2.3 From dc3029056b02414c29b6627e3dd7b16624725ae9 Mon Sep 17 00:00:00 2001 From: David Jander Date: Tue, 21 Jun 2022 08:12:34 +0200 Subject: spi: opportunistically skip ctlr->cur_msg_completion There are only a few drivers that do not call spi_finalize_current_message() in the context of transfer_one_message(), and even for those cases the completion ctlr->cur_msg_completion is not needed always. The calls to complete() and wait_for_completion() each take a spin-lock, which is costly. This patch makes it possible to avoid those calls in the big majority of cases, by introducing two flags that with the help of ordering via barriers can avoid using the completion safely. In case of a race with the context calling spi_finalize_current_message(), the scheme errs on the safe side and takes the completion. The impact of this patch is worth the effort: On a i.MX8MM SoC, the time the SPI bus is idle between two consecutive calls to spi_sync(), is reduced from 19.6us to 16.8us... roughly 15%. Signed-off-by: David Jander Link: https://lore.kernel.org/r/20220621061234.3626638-12-david@protonic.nl Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index c56e0d240a58..eb0d316e3c36 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -385,6 +385,12 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @queue: message queue * @cur_msg: the currently in-flight message * @cur_msg_completion: a completion for the current in-flight message + * @cur_msg_incomplete: Flag used internally to opportunistically skip + * the @cur_msg_completion. This flag is used to check if the driver has + * already called spi_finalize_current_message(). + * @cur_msg_need_completion: Flag used internally to opportunistically skip + * the @cur_msg_completion. This flag is used to signal the context that + * is running spi_finalize_current_message() that it needs to complete() * @cur_msg_mapped: message has been mapped for DMA * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip * selected @@ -617,6 +623,8 @@ struct spi_controller { struct list_head queue; struct spi_message *cur_msg; struct completion cur_msg_completion; + bool cur_msg_incomplete; + bool cur_msg_need_completion; bool busy; bool running; bool rt; -- cgit v1.2.3 From 4a2dcc35911324d6fcde09b1760cf4f2962699ef Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:23 -0700 Subject: block: introduce bdev_dma_alignment helper Preparing for upcoming dma_alignment users that have a block_device, but don't need the request_queue. Signed-off-by: Keith Busch Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-5-kbusch@fb.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2f7b43444c5f..2556fcdb645b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1365,6 +1365,11 @@ static inline int queue_dma_alignment(const struct request_queue *q) return q ? q->dma_alignment : 511; } +static inline unsigned int bdev_dma_alignment(struct block_device *bdev) +{ + return queue_dma_alignment(bdev_get_queue(bdev)); +} + static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { -- cgit v1.2.3 From cfa320f72882f0e944e2237287db84b0f7df877d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:27 -0700 Subject: iov: introduce iov_iter_aligned The existing iov_iter_alignment() function returns the logical OR of address and length. For cases where address and length need to be considered separately, introduce a helper function that a caller can specificy length and address masks that indicate if the iov is unaligned. Cc: Alexander Viro Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-9-kbusch@fb.com Signed-off-by: Jens Axboe --- include/linux/uio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 739285fe5a2f..34ba4a731179 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -219,6 +219,8 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); #endif size_t iov_iter_zero(size_t bytes, struct iov_iter *); +bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, + unsigned len_mask); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, -- cgit v1.2.3 From 5debd9691c3ac64c3acd6867c264ad38bbe48cdc Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:28 -0700 Subject: block: introduce bdev_iter_is_aligned helper Provide a convenient function for this repeatable coding pattern. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-10-kbusch@fb.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2556fcdb645b..0b8bc1fe0b2c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1370,6 +1370,13 @@ static inline unsigned int bdev_dma_alignment(struct block_device *bdev) return queue_dma_alignment(bdev_get_queue(bdev)); } +static inline bool bdev_iter_is_aligned(struct block_device *bdev, + struct iov_iter *iter) +{ + return iov_iter_is_aligned(iter, bdev_dma_alignment(bdev), + bdev_logical_block_size(bdev) - 1); +} + static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { -- cgit v1.2.3 From b1a000d3b8ec582da64bb644be633e5a0beffcbf Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:29 -0700 Subject: block: relax direct io memory alignment Use the address alignment requirements from the block_device for direct io instead of requiring addresses be aligned to the block size. User space can discover the alignment requirements from the dma_alignment queue attribute. User space can specify any hardware compatible DMA offset for each segment, but every segment length is still required to be a multiple of the block size. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-11-kbusch@fb.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0b8bc1fe0b2c..886c44e97434 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -424,6 +424,11 @@ struct request_queue { unsigned long nr_requests; /* Max # of requests */ unsigned int dma_pad_mask; + /* + * Drivers that set dma_alignment to less than 511 must be prepared to + * handle individual bvec's that are not a multiple of a SECTOR_SIZE + * due to possible offsets. + */ unsigned int dma_alignment; #ifdef CONFIG_BLK_INLINE_ENCRYPTION -- cgit v1.2.3 From 8689461be3f1ce6686bc26f1f379790bb0fc7a8c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 11:09:29 +0200 Subject: block: factor out a chunk_size_left helper Factor out a helper from blk_max_size_offset so that it can be reused independently. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Pankaj Raghav Link: https://lore.kernel.org/r/20220614090934.570632-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 886c44e97434..283961257cc9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -934,6 +934,17 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, return q->limits.max_sectors; } +/* + * Return how much of the chunk is left to be used for I/O at a given offset. + */ +static inline unsigned int blk_chunk_sectors_left(sector_t offset, + unsigned int chunk_sectors) +{ + if (unlikely(!is_power_of_2(chunk_sectors))) + return chunk_sectors - sector_div(offset, chunk_sectors); + return chunk_sectors - (offset & (chunk_sectors - 1)); +} + /* * Return maximum size of a request at given offset. Only valid for * file system requests. @@ -949,12 +960,8 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q, return q->limits.max_sectors; } - if (likely(is_power_of_2(chunk_sectors))) - chunk_sectors -= offset & (chunk_sectors - 1); - else - chunk_sectors -= sector_div(offset, chunk_sectors); - - return min(q->limits.max_sectors, chunk_sectors); + return min(q->limits.max_sectors, + blk_chunk_sectors_left(offset, chunk_sectors)); } /* -- cgit v1.2.3 From efef739d5f37dc998b113fb965aea68d42a9eddc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 11:09:33 +0200 Subject: block: fold blk_max_size_offset into get_max_io_size Now that blk_max_size_offset has a single caller left, fold it into that and clean up the naming convention for the local variables there. Signed-off-by: Christoph Hellwig Reviewed-by: Pankaj Raghav Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220614090934.570632-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 283961257cc9..652c357dafb9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -945,25 +945,6 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset, return chunk_sectors - (offset & (chunk_sectors - 1)); } -/* - * Return maximum size of a request at given offset. Only valid for - * file system requests. - */ -static inline unsigned int blk_max_size_offset(struct request_queue *q, - sector_t offset, - unsigned int chunk_sectors) -{ - if (!chunk_sectors) { - if (q->limits.chunk_sectors) - chunk_sectors = q->limits.chunk_sectors; - else - return q->limits.max_sectors; - } - - return min(q->limits.max_sectors, - blk_chunk_sectors_left(offset, chunk_sectors)); -} - /* * Access functions for manipulating queue properties */ -- cgit v1.2.3 From 2a9336c42a6abdcef564d522648a684a474a3483 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 11:09:34 +0200 Subject: block: move blk_queue_get_max_sectors to blk.h blk_queue_get_max_sectors is private to the block layer, so move it out of blkdev.h. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220614090934.570632-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 652c357dafb9..b2d42201bd5d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -921,19 +921,6 @@ static inline unsigned int bio_zone_is_seq(struct bio *bio) } #endif /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, - int op) -{ - if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)) - return min(q->limits.max_discard_sectors, - UINT_MAX >> SECTOR_SHIFT); - - if (unlikely(op == REQ_OP_WRITE_ZEROES)) - return q->limits.max_write_zeroes_sectors; - - return q->limits.max_sectors; -} - /* * Return how much of the chunk is left to be used for I/O at a given offset. */ -- cgit v1.2.3 From e589f46445960c274cc813a1cc8e2fc73b2a1849 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:26 +0200 Subject: block: fix default IO priority handling again Commit e70344c05995 ("block: fix default IO priority handling") introduced an inconsistency in get_current_ioprio() that tasks without IO context return IOPRIO_DEFAULT priority while tasks with freshly allocated IO context will return 0 (IOPRIO_CLASS_NONE/0) IO priority. Tasks without IO context used to be rare before 5a9d041ba2f6 ("block: move io_context creation into where it's needed") but after this commit they became common because now only BFQ IO scheduler setups task's IO context. Similar inconsistency is there for get_task_ioprio() so this inconsistency is now exposed to userspace and userspace will see different IO priority for tasks operating on devices with BFQ compared to devices without BFQ. Furthemore the changes done by commit e70344c05995 change the behavior when no IO priority is set for BFQ IO scheduler which is also documented in ioprio_set(2) manpage: "If no I/O scheduler has been set for a thread, then by default the I/O priority will follow the CPU nice value (setpriority(2)). In Linux kernels before version 2.6.24, once an I/O priority had been set using ioprio_set(), there was no way to reset the I/O scheduling behavior to the default. Since Linux 2.6.24, specifying ioprio as 0 can be used to reset to the default I/O scheduling behavior." So make sure we default to IOPRIO_CLASS_NONE as used to be the case before commit e70344c05995. Also cleanup alloc_io_context() to explicitely set this IO priority for the allocated IO context to avoid future surprises. Note that we tweak ioprio_best() to maintain ioprio_get(2) behavior and make this commit easily backportable. CC: stable@vger.kernel.org Fixes: e70344c05995 ("block: fix default IO priority handling") Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-1-jack@suse.cz Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 3f53bc27a19b..3d088a88f832 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -11,7 +11,7 @@ /* * Default IO priority. */ -#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM) +#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0) /* * Check that a priority value has a valid class. -- cgit v1.2.3 From f7eda402878b12bc0884c5bc1192a9e76ad121fb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:27 +0200 Subject: block: Return effective IO priority from get_current_ioprio() get_current_ioprio() is used to initialize IO priority of various requests. As such it should be returning the effective IO priority of the task (i.e., reflecting the fact that unset IO priority should get set based on task's CPU priority) so that the conversion is concentrated in one place. Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-2-jack@suse.cz Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 3d088a88f832..61ed6bb4998e 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -53,10 +53,17 @@ static inline int task_nice_ioclass(struct task_struct *task) static inline int get_current_ioprio(void) { struct io_context *ioc = current->io_context; + int prio; if (ioc) - return ioc->ioprio; - return IOPRIO_DEFAULT; + prio = ioc->ioprio; + else + prio = IOPRIO_DEFAULT; + + if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE) + prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current), + task_nice_ioprio(current)); + return prio; } /* -- cgit v1.2.3 From 893e5d32d5832674bcf6465f27958e883b72b346 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:28 +0200 Subject: block: Generalize get_current_ioprio() for any task get_current_ioprio() operates only on current task. We will need the same functionality for other tasks as well. Generalize get_current_ioprio() for that and also move the bulk out of the header file because it is large enough. Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-3-jack@suse.cz Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 61ed6bb4998e..9752cf4a9c7c 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -46,24 +46,18 @@ static inline int task_nice_ioclass(struct task_struct *task) return IOPRIO_CLASS_BE; } -/* - * If the calling process has set an I/O priority, use that. Otherwise, return - * the default I/O priority. - */ -static inline int get_current_ioprio(void) +#ifdef CONFIG_BLOCK +int __get_task_ioprio(struct task_struct *p); +#else +static inline int __get_task_ioprio(struct task_struct *p) { - struct io_context *ioc = current->io_context; - int prio; - - if (ioc) - prio = ioc->ioprio; - else - prio = IOPRIO_DEFAULT; + return IOPRIO_DEFAULT; +} +#endif /* CONFIG_BLOCK */ - if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE) - prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current), - task_nice_ioprio(current)); - return prio; +static inline int get_current_ioprio(void) +{ + return __get_task_ioprio(current); } /* -- cgit v1.2.3 From fc25545e17bd74befe0b8ab2c65ac84936be5066 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:29 +0200 Subject: block: Make ioprio_best() static Nobody outside of block/ioprio.c uses it. Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-4-jack@suse.cz Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 9752cf4a9c7c..7578d4f6a969 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -60,11 +60,6 @@ static inline int get_current_ioprio(void) return __get_task_ioprio(current); } -/* - * For inheritance, return the highest of the two given priorities - */ -extern int ioprio_best(unsigned short aprio, unsigned short bprio); - extern int set_task_ioprio(struct task_struct *task, int ioprio); #ifdef CONFIG_BLOCK -- cgit v1.2.3 From ab24a01b276508dc884761bcb8e2759c36702377 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 14 Jun 2022 10:50:54 +0300 Subject: tty: Add closing marker into comment in tty_ldisc.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The closing `` is missing. Add it. Fixes: 6bb6fa6908eb ("tty: Implement lookahead to process XON/XOFF timely") Reported-by: Stephen Rothwell Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/9bc6d45d-48c8-519-1646-78ba22505b1f@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_ldisc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index 33678e1936f6..ede6f2157f32 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -187,7 +187,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * function for automatic flow control. * * @lookahead_buf: [DRV] ``void ()(struct tty_struct *tty, - * const unsigned char *cp, const char *fp, int count) + * const unsigned char *cp, const char *fp, int count)`` * * This function is called by the low-level tty driver for characters * not eaten by ->receive_buf() or ->receive_buf2(). It is useful for -- cgit v1.2.3 From f9008285bb69e4713918a665250ab2d356b731ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 13 Jun 2022 14:39:05 +0300 Subject: serial: Drop timeout from uart_port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 31f6bd7fad3b ("serial: Store character timing information to uart_port"), per frame timing information is available on uart_port. Uart port's timeout can be derived from frame_time by multiplying with fifosize. Most callers of uart_poll_timeout are not made under port's lock. To be on the safe side, make sure frame_time is only accessed once. As fifo_size is effectively a constant, it shouldn't cause any issues. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220613113905.22962-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8032ffa741ed..faaf2372c60d 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -232,7 +232,6 @@ struct uart_port { int hw_stopped; /* sw-assisted CTS flow state */ unsigned int mctrl; /* current modem ctrl settings */ - unsigned int timeout; /* character-based timeout */ unsigned int frame_time; /* frame timing in ns */ unsigned int type; /* port type */ const struct uart_ops *ops; @@ -335,10 +334,23 @@ unsigned int uart_get_baud_rate(struct uart_port *port, struct ktermios *termios unsigned int max); unsigned int uart_get_divisor(struct uart_port *port, unsigned int baud); +/* + * Calculates FIFO drain time. + */ +static inline unsigned long uart_fifo_timeout(struct uart_port *port) +{ + u64 fifo_timeout = (u64)READ_ONCE(port->frame_time) * port->fifosize; + + /* Add .02 seconds of slop */ + fifo_timeout += 20 * NSEC_PER_MSEC; + + return max(nsecs_to_jiffies(fifo_timeout), 1UL); +} + /* Base timer interval for polling */ static inline int uart_poll_timeout(struct uart_port *port) { - int timeout = port->timeout; + int timeout = uart_fifo_timeout(port); return timeout > 6 ? (timeout / 2 - 2) : 1; } -- cgit v1.2.3 From eb47b59afb7e46c952d7b03884245364990d4910 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 24 Jun 2022 23:54:23 +0300 Subject: serial: Convert SERIAL_XMIT_SIZE to UART_XMIT_SIZE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both UART_XMIT_SIZE and SERIAL_XMIT_SIZE are defined. Make them all UART_XMIT_SIZE. Reviewed-by: Jiri Slaby Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220624205424.12686-6-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial.h b/include/linux/serial.h index 0b8b7d7c8f33..70a9866e4abb 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -9,7 +9,6 @@ #ifndef _LINUX_SERIAL_H #define _LINUX_SERIAL_H -#include #include /* Helper for dealing with UART_LCR_WLEN* defines */ @@ -25,11 +24,6 @@ struct async_icount { __u32 buf_overrun; }; -/* - * The size of the serial xmit buffer is 1 page, or 4096 bytes - */ -#define SERIAL_XMIT_SIZE PAGE_SIZE - #include #endif /* _LINUX_SERIAL_H */ -- cgit v1.2.3 From 34619de1b8cb52afa90bbeb3b4fbad34c28f19cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 24 Jun 2022 23:54:24 +0300 Subject: serial: Consolidate BOTH_EMPTY use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per file BOTH_EMPTY defines are littering our source code here and there. Define once in serial.h and create helper for the check too. Reviewed-by: Jiri Slaby Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220624205424.12686-7-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial.h b/include/linux/serial.h index 70a9866e4abb..3d6fe3ef92cf 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -10,10 +10,19 @@ #define _LINUX_SERIAL_H #include +#include /* Helper for dealing with UART_LCR_WLEN* defines */ #define UART_LCR_WLEN(x) ((x) - 5) +/* FIFO and shifting register empty */ +#define UART_LSR_BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) + +static inline bool uart_lsr_tx_empty(u16 lsr) +{ + return (lsr & UART_LSR_BOTH_EMPTY) == UART_LSR_BOTH_EMPTY; +} + /* * Counters of the input lines (CTS, DSR, RI, CD) interrupts */ -- cgit v1.2.3 From f8ba5680a56be696b3f4343ed0a591abab807da4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 24 Jun 2022 23:42:05 +0300 Subject: serial: 8250: make saved LSR larger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DW flags address received as BIT(8) in LSR. In order to not lose that on read, enlarge lsr_saved_flags to u16. Adjust lsr/status variables and related call chains to use u16. Technically, some of these type conversion would not be needed but it doesn't hurt to be consistent. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220624204210.11112-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index ff84a3ed10ea..4565f25ba9a2 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -119,7 +119,7 @@ struct uart_8250_port { * be immediately processed. */ #define LSR_SAVE_FLAGS UART_LSR_BRK_ERROR_BITS - unsigned char lsr_saved_flags; + u16 lsr_saved_flags; #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA unsigned char msr_saved_flags; @@ -170,8 +170,8 @@ extern void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud, unsigned int quot_frac); extern int fsl8250_handle_irq(struct uart_port *port); int serial8250_handle_irq(struct uart_port *port, unsigned int iir); -unsigned char serial8250_rx_chars(struct uart_8250_port *up, unsigned char lsr); -void serial8250_read_char(struct uart_8250_port *up, unsigned char lsr); +u16 serial8250_rx_chars(struct uart_8250_port *up, u16 lsr); +void serial8250_read_char(struct uart_8250_port *up, u16 lsr); void serial8250_tx_chars(struct uart_8250_port *up); unsigned int serial8250_modem_status(struct uart_8250_port *up); void serial8250_init_port(struct uart_8250_port *up); -- cgit v1.2.3 From 507bd6fbaaefcb8dd89bd00baddf00b439d30c51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 24 Jun 2022 23:42:06 +0300 Subject: serial: 8250: create lsr_save_mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow drivers to alter LSR save mask. Reviewed-by: Andy Shevchenko Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220624204210.11112-3-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 4565f25ba9a2..8c7b793aa4d7 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -120,6 +120,7 @@ struct uart_8250_port { */ #define LSR_SAVE_FLAGS UART_LSR_BRK_ERROR_BITS u16 lsr_saved_flags; + u16 lsr_save_mask; #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA unsigned char msr_saved_flags; -- cgit v1.2.3 From ae50bb2752836277ae15aa4e9d99074d6d947946 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 24 Jun 2022 23:42:08 +0300 Subject: serial: take termios_rwsem for ->rs485_config() & pass termios as param MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be able to alter ADDRB within ->rs485_config(), take termios_rwsem before calling ->rs485_config() and pass termios. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220624204210.11112-5-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index faaf2372c60d..b7b86ee3cb12 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -133,6 +133,7 @@ struct uart_port { unsigned int old); void (*handle_break)(struct uart_port *); int (*rs485_config)(struct uart_port *, + struct ktermios *termios, struct serial_rs485 *rs485); int (*iso7816_config)(struct uart_port *, struct serial_iso7816 *iso7816); -- cgit v1.2.3 From 4d0548a7b806a78ba253f1389b9ecdcaca47d583 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Mon, 27 Jun 2022 08:01:58 -0500 Subject: mnt_idmapping: return false when comparing two invalid ids INVALID_VFS{U,G}ID represent ids which have no mapping in the target mnt_usersns. This can happen for a couple of different reasons -- the source id might be valid but has no mapping in mnt_userns, or the source id might have been invalid (either due to a failed mapping or because it was set to invalid to indicate it is uninitialized). This means that two arbitrary vfs{u,g}ids which are both invalid could represent two different underlying ids, or they could represent a failed mapping and an uninitialized value. In these situation the vfs{u,g}id equality functions evaluate these ids as equal, and care must be taken when comparing ids to avoid problems. It would be less error prone to always evaluate two invalid ids as not equal to each other, and to check explicitly for vfs{u,g}id validity when that is needed. Change all vfs{u,g}id equality functions to return false when both ids are invalid. Functions for checking whether an id is valid exist and are already being used by code which needs to check this. Link: https://lore.kernel.org/linux-fsdevel/YrIMZirGoE0VIO45@do-x1extreme Signed-off-by: Seth Forshee Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Christian Brauner (Microsoft) --- include/linux/mnt_idmapping.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 6a752b61088b..21bd22a7b326 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -60,12 +60,12 @@ static inline bool vfsgid_valid(vfsgid_t gid) static inline bool vfsuid_eq(vfsuid_t left, vfsuid_t right) { - return __vfsuid_val(left) == __vfsuid_val(right); + return vfsuid_valid(left) && __vfsuid_val(left) == __vfsuid_val(right); } static inline bool vfsgid_eq(vfsgid_t left, vfsgid_t right) { - return __vfsgid_val(left) == __vfsgid_val(right); + return vfsgid_valid(left) && __vfsgid_val(left) == __vfsgid_val(right); } /** @@ -76,10 +76,11 @@ static inline bool vfsgid_eq(vfsgid_t left, vfsgid_t right) * Check whether @kuid and @vfsuid have the same values. * * Return: true if @kuid and @vfsuid have the same value, false if not. + * Comparison between two invalid uids returns false. */ static inline bool vfsuid_eq_kuid(vfsuid_t vfsuid, kuid_t kuid) { - return __vfsuid_val(vfsuid) == __kuid_val(kuid); + return vfsuid_valid(vfsuid) && __vfsuid_val(vfsuid) == __kuid_val(kuid); } /** @@ -90,10 +91,11 @@ static inline bool vfsuid_eq_kuid(vfsuid_t vfsuid, kuid_t kuid) * Check whether @kgid and @vfsgid have the same values. * * Return: true if @kgid and @vfsgid have the same value, false if not. + * Comparison between two invalid gids returns false. */ static inline bool vfsgid_eq_kgid(kgid_t kgid, vfsgid_t vfsgid) { - return __vfsgid_val(vfsgid) == __kgid_val(kgid); + return vfsgid_valid(vfsgid) && __vfsgid_val(vfsgid) == __kgid_val(kgid); } /* -- cgit v1.2.3 From 38a523a2946d3a0961d141d477a1ee2b1f3bdbb1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 27 Jun 2022 16:36:57 +0200 Subject: Revert "devcoredump: remove the useless gfp_t parameter in dev_coredumpv and dev_coredumpm" This reverts commit 77515ebaf01920e2db49e04672ef669a7c2907f2 as it causes build problems in linux-next. It needs to be reintroduced in a way that can allow the api to evolve and not require a "flag day" to catch all users. Link: https://lore.kernel.org/r/20220623160723.7a44b573@canb.auug.org.au Cc: Duoming Zhou Cc: Brian Norris Cc: Johannes Berg Reported-by: Stephen Rothwell Signed-off-by: Greg Kroah-Hartman --- include/linux/devcoredump.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h index c7d840d824c3..c008169ed2c6 100644 --- a/include/linux/devcoredump.h +++ b/include/linux/devcoredump.h @@ -52,26 +52,27 @@ static inline void _devcd_free_sgtable(struct scatterlist *table) #ifdef CONFIG_DEV_COREDUMP -void dev_coredumpv(struct device *dev, void *data, size_t datalen); +void dev_coredumpv(struct device *dev, void *data, size_t datalen, + gfp_t gfp); void dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, + void *data, size_t datalen, gfp_t gfp, ssize_t (*read)(char *buffer, loff_t offset, size_t count, void *data, size_t datalen), void (*free)(void *data)); void dev_coredumpsg(struct device *dev, struct scatterlist *table, - size_t datalen); + size_t datalen, gfp_t gfp); #else static inline void dev_coredumpv(struct device *dev, void *data, - size_t datalen) + size_t datalen, gfp_t gfp) { vfree(data); } static inline void dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, + void *data, size_t datalen, gfp_t gfp, ssize_t (*read)(char *buffer, loff_t offset, size_t count, void *data, size_t datalen), void (*free)(void *data)) @@ -80,7 +81,7 @@ dev_coredumpm(struct device *dev, struct module *owner, } static inline void dev_coredumpsg(struct device *dev, struct scatterlist *table, - size_t datalen) + size_t datalen, gfp_t gfp) { _devcd_free_sgtable(table); } -- cgit v1.2.3 From 086c00c71fc8d47db6983f419a45f9ee167de03f Mon Sep 17 00:00:00 2001 From: Imran Khan Date: Wed, 15 Jun 2022 12:10:56 +1000 Subject: kernfs: make ->attr.open RCU protected. After removal of kernfs_open_node->refcnt in the previous patch, kernfs_open_node_lock can be removed as well by making ->attr.open RCU protected. kernfs_put_open_node can delegate freeing to ->attr.open to RCU and other readers of ->attr.open can do so under rcu_read_(un)lock. Suggested by: Al Viro Acked-by: Tejun Heo Signed-off-by: Imran Khan Link: https://lore.kernel.org/r/20220615021059.862643-2-imran.f.khan@oracle.com Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index e2ae15a6225e..13f54f078a52 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -114,7 +114,7 @@ struct kernfs_elem_symlink { struct kernfs_elem_attr { const struct kernfs_ops *ops; - struct kernfs_open_node *open; + struct kernfs_open_node __rcu *open; loff_t size; struct kernfs_node *notify_next; /* for kernfs_notify() */ }; -- cgit v1.2.3 From b8f35fa1188b84035c59d4842826c4e93a1b1c9f Mon Sep 17 00:00:00 2001 From: Imran Khan Date: Wed, 15 Jun 2022 12:10:57 +1000 Subject: kernfs: Change kernfs_notify_list to llist. At present kernfs_notify_list is implemented as a singly linked list of kernfs_node(s), where last element points to itself and value of ->attr.next tells if node is present on the list or not. Both addition and deletion to list happen under kernfs_notify_lock. Change kernfs_notify_list to llist so that addition to list can heppen locklessly. Suggested by: Al Viro Acked-by: Tejun Heo Signed-off-by: Imran Khan Link: https://lore.kernel.org/r/20220615021059.862643-3-imran.f.khan@oracle.com Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 13f54f078a52..2dd9c8df0f4f 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -116,7 +116,7 @@ struct kernfs_elem_attr { const struct kernfs_ops *ops; struct kernfs_open_node __rcu *open; loff_t size; - struct kernfs_node *notify_next; /* for kernfs_notify() */ + struct llist_node notify_next; /* for kernfs_notify() */ }; /* -- cgit v1.2.3 From 1d25b84e444ad66313c473407979ea9cd33deb3f Mon Sep 17 00:00:00 2001 From: Imran Khan Date: Wed, 15 Jun 2022 12:10:59 +1000 Subject: kernfs: Replace global kernfs_open_file_mutex with hashed mutexes. In current kernfs design a single mutex, kernfs_open_file_mutex, protects the list of kernfs_open_file instances corresponding to a sysfs attribute. So even if different tasks are opening or closing different sysfs files they can contend on osq_lock of this mutex. The contention is more apparent in large scale systems with few hundred CPUs where most of the CPUs have running tasks that are opening, accessing or closing sysfs files at any point of time. Using hashed mutexes in place of a single global mutex, can significantly reduce contention around global mutex and hence can provide better scalability. Moreover as these hashed mutexes are not part of kernfs_node objects we will not see any singnificant change in memory utilization of kernfs based file systems like sysfs, cgroupfs etc. Modify interface introduced in previous patch to make use of hashed mutexes. Use kernfs_node address as hashing key. Acked-by: Tejun Heo Signed-off-by: Imran Khan Link: https://lore.kernel.org/r/20220615021059.862643-5-imran.f.khan@oracle.com Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 2dd9c8df0f4f..13e703f615f7 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -18,6 +18,7 @@ #include #include #include +#include struct file; struct dentry; @@ -34,6 +35,62 @@ struct kernfs_fs_context; struct kernfs_open_node; struct kernfs_iattrs; +/* + * NR_KERNFS_LOCK_BITS determines size (NR_KERNFS_LOCKS) of hash + * table of locks. + * Having a small hash table would impact scalability, since + * more and more kernfs_node objects will end up using same lock + * and having a very large hash table would waste memory. + * + * At the moment size of hash table of locks is being set based on + * the number of CPUs as follows: + * + * NR_CPU NR_KERNFS_LOCK_BITS NR_KERNFS_LOCKS + * 1 1 2 + * 2-3 2 4 + * 4-7 4 16 + * 8-15 6 64 + * 16-31 8 256 + * 32 and more 10 1024 + * + * The above relation between NR_CPU and number of locks is based + * on some internal experimentation which involved booting qemu + * with different values of smp, performing some sysfs operations + * on all CPUs and observing how increase in number of locks impacts + * completion time of these sysfs operations on each CPU. + */ +#ifdef CONFIG_SMP +#define NR_KERNFS_LOCK_BITS (2 * (ilog2(NR_CPUS < 32 ? NR_CPUS : 32))) +#else +#define NR_KERNFS_LOCK_BITS 1 +#endif + +#define NR_KERNFS_LOCKS (1 << NR_KERNFS_LOCK_BITS) + +/* + * There's one kernfs_open_file for each open file and one kernfs_open_node + * for each kernfs_node with one or more open files. + * + * filp->private_data points to seq_file whose ->private points to + * kernfs_open_file. + * + * kernfs_open_files are chained at kernfs_open_node->files, which is + * protected by kernfs_global_locks.open_file_mutex[i]. + * + * To reduce possible contention in sysfs access, arising due to single + * locks, use an array of locks (e.g. open_file_mutex) and use kernfs_node + * object address as hash keys to get the index of these locks. + * + * Hashed mutexes are safe to use here because operations using these don't + * rely on global exclusion. + * + * In future we intend to replace other global locks with hashed ones as well. + * kernfs_global_locks acts as a holder for all such hash tables. + */ +struct kernfs_global_locks { + struct mutex open_file_mutex[NR_KERNFS_LOCKS]; +}; + enum kernfs_node_type { KERNFS_DIR = 0x0001, KERNFS_FILE = 0x0002, -- cgit v1.2.3 From 8f486cab263ca1b761c1aab9b36975afd355b799 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Thu, 23 Jun 2022 01:03:42 -0700 Subject: driver core: fw_devlink: Allow firmware to mark devices as best effort When firmware sets the FWNODE_FLAG_BEST_EFFORT flag for a fwnode, fw_devlink will do a best effort ordering for that device where it'll only enforce the probe/suspend/resume ordering of that device with suppliers that have drivers. The driver of that device can then decide if it wants to defer probe or probe without the suppliers. This will be useful for avoid probe delays of the console device that were caused by commit 71066545b48e ("driver core: Set fw_devlink.strict=1 by default"). Fixes: 71066545b48e ("driver core: Set fw_devlink.strict=1 by default") Reported-by: Sascha Hauer Reported-by: Peng Fan Tested-by: Peng Fan Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20220623080344.783549-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 9a81c4410b9f..89b9bdfca925 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -27,11 +27,15 @@ struct device; * driver needs its child devices to be bound with * their respective drivers as soon as they are * added. + * BEST_EFFORT: The fwnode/device needs to probe early and might be missing some + * suppliers. Only enforce ordering with suppliers that have + * drivers. */ #define FWNODE_FLAG_LINKS_ADDED BIT(0) #define FWNODE_FLAG_NOT_DEVICE BIT(1) #define FWNODE_FLAG_INITIALIZED BIT(2) #define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD BIT(3) +#define FWNODE_FLAG_BEST_EFFORT BIT(4) struct fwnode_handle { struct fwnode_handle *secondary; -- cgit v1.2.3 From d1877e639bc6bf1c3131eda3f9ede73f8da96c22 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 8 Jun 2022 12:55:13 -0600 Subject: vfio: de-extern-ify function prototypes The use of 'extern' in function prototypes has been disrecommended in the kernel coding style for several years now, remove them from all vfio related files so contributors no longer need to decide between style and consistency. Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Eric Farman Reviewed-by: Eric Auger Reviewed-by: Cornelia Huck Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/165471414407.203056.474032786990662279.stgit@omen Signed-off-by: Alex Williamson --- include/linux/vfio.h | 70 +++++++++++++++++++++---------------------- include/linux/vfio_pci_core.h | 65 ++++++++++++++++++++-------------------- 2 files changed, 66 insertions(+), 69 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index aa888cc51757..49580fa2073a 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -140,19 +140,19 @@ int vfio_mig_get_next_state(struct vfio_device *device, /* * External user API */ -extern struct iommu_group *vfio_file_iommu_group(struct file *file); -extern bool vfio_file_enforced_coherent(struct file *file); -extern void vfio_file_set_kvm(struct file *file, struct kvm *kvm); -extern bool vfio_file_has_dev(struct file *file, struct vfio_device *device); +struct iommu_group *vfio_file_iommu_group(struct file *file); +bool vfio_file_enforced_coherent(struct file *file); +void vfio_file_set_kvm(struct file *file, struct kvm *kvm); +bool vfio_file_has_dev(struct file *file, struct vfio_device *device); #define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long)) -extern int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, - int npage, int prot, unsigned long *phys_pfn); -extern int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, - int npage); -extern int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, - void *data, size_t len, bool write); +int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, + int npage, int prot, unsigned long *phys_pfn); +int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, + int npage); +int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, + void *data, size_t len, bool write); /* each type has independent events */ enum vfio_notify_type { @@ -162,13 +162,13 @@ enum vfio_notify_type { /* events for VFIO_IOMMU_NOTIFY */ #define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) -extern int vfio_register_notifier(struct vfio_device *device, - enum vfio_notify_type type, - unsigned long *required_events, - struct notifier_block *nb); -extern int vfio_unregister_notifier(struct vfio_device *device, - enum vfio_notify_type type, - struct notifier_block *nb); +int vfio_register_notifier(struct vfio_device *device, + enum vfio_notify_type type, + unsigned long *required_events, + struct notifier_block *nb); +int vfio_unregister_notifier(struct vfio_device *device, + enum vfio_notify_type type, + struct notifier_block *nb); /* @@ -178,25 +178,24 @@ struct vfio_info_cap { struct vfio_info_cap_header *buf; size_t size; }; -extern struct vfio_info_cap_header *vfio_info_cap_add( - struct vfio_info_cap *caps, size_t size, u16 id, u16 version); -extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset); +struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, + size_t size, u16 id, + u16 version); +void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset); -extern int vfio_info_add_capability(struct vfio_info_cap *caps, - struct vfio_info_cap_header *cap, - size_t size); +int vfio_info_add_capability(struct vfio_info_cap *caps, + struct vfio_info_cap_header *cap, size_t size); -extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, - int num_irqs, int max_irq_type, - size_t *data_size); +int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, + int num_irqs, int max_irq_type, + size_t *data_size); struct pci_dev; #if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH) -extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev); -extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev); -extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, - unsigned int cmd, - unsigned long arg); +void vfio_spapr_pci_eeh_open(struct pci_dev *pdev); +void vfio_spapr_pci_eeh_release(struct pci_dev *pdev); +long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, unsigned int cmd, + unsigned long arg); #else static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev) { @@ -230,10 +229,9 @@ struct virqfd { struct virqfd **pvirqfd; }; -extern int vfio_virqfd_enable(void *opaque, - int (*handler)(void *, void *), - void (*thread)(void *, void *), - void *data, struct virqfd **pvirqfd, int fd); -extern void vfio_virqfd_disable(struct virqfd **pvirqfd); +int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *), + void (*thread)(void *, void *), void *data, + struct virqfd **pvirqfd, int fd); +void vfio_virqfd_disable(struct virqfd **pvirqfd); #endif /* VFIO_H */ diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 23c176d4b073..22de2bce6394 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -147,23 +147,23 @@ struct vfio_pci_core_device { #define is_irq_none(vdev) (!(is_intx(vdev) || is_msi(vdev) || is_msix(vdev))) #define irq_is(vdev, type) (vdev->irq_type == type) -extern void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev); -extern void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev); +void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev); +void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev); -extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, - uint32_t flags, unsigned index, - unsigned start, unsigned count, void *data); +int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, + uint32_t flags, unsigned index, + unsigned start, unsigned count, void *data); -extern ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite); +ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, + char __user *buf, size_t count, + loff_t *ppos, bool iswrite); -extern ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite); +ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); #ifdef CONFIG_VFIO_PCI_VGA -extern ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite); +ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); #else static inline ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, @@ -173,32 +173,31 @@ static inline ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, } #endif -extern long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, - uint64_t data, int count, int fd); +long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, + uint64_t data, int count, int fd); -extern int vfio_pci_init_perm_bits(void); -extern void vfio_pci_uninit_perm_bits(void); +int vfio_pci_init_perm_bits(void); +void vfio_pci_uninit_perm_bits(void); -extern int vfio_config_init(struct vfio_pci_core_device *vdev); -extern void vfio_config_free(struct vfio_pci_core_device *vdev); +int vfio_config_init(struct vfio_pci_core_device *vdev); +void vfio_config_free(struct vfio_pci_core_device *vdev); -extern int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev, - unsigned int type, unsigned int subtype, - const struct vfio_pci_regops *ops, - size_t size, u32 flags, void *data); +int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev, + unsigned int type, unsigned int subtype, + const struct vfio_pci_regops *ops, + size_t size, u32 flags, void *data); -extern int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, - pci_power_t state); +int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, + pci_power_t state); -extern bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev); -extern void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device - *vdev); -extern u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev); -extern void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, - u16 cmd); +bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev); +void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev); +u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev); +void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, + u16 cmd); #ifdef CONFIG_VFIO_PCI_IGD -extern int vfio_pci_igd_init(struct vfio_pci_core_device *vdev); +int vfio_pci_igd_init(struct vfio_pci_core_device *vdev); #else static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) { @@ -207,8 +206,8 @@ static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) #endif #ifdef CONFIG_S390 -extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, - struct vfio_info_cap *caps); +int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, + struct vfio_info_cap *caps); #else static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps) -- cgit v1.2.3 From ee65728e103bb7dd99d8604bf6c7aa89c7d7e446 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 27 Jun 2022 09:00:26 +0300 Subject: docs: rename Documentation/vm to Documentation/mm so it will be consistent with code mm directory and with Documentation/admin-guide/mm and won't be confused with virtual machines. Signed-off-by: Mike Rapoport Suggested-by: Matthew Wilcox Tested-by: Ira Weiny Acked-by: Jonathan Corbet Acked-by: Wu XiangCheng --- include/linux/hmm.h | 4 ++-- include/linux/memremap.h | 2 +- include/linux/mmu_notifier.h | 2 +- include/linux/sched/mm.h | 4 ++-- include/linux/swap.h | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index d5a6f101f843..126a36571667 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -4,7 +4,7 @@ * * Authors: Jérôme Glisse * - * See Documentation/vm/hmm.rst for reasons and overview of what HMM is. + * See Documentation/mm/hmm.rst for reasons and overview of what HMM is. */ #ifndef LINUX_HMM_H #define LINUX_HMM_H @@ -100,7 +100,7 @@ struct hmm_range { }; /* - * Please see Documentation/vm/hmm.rst for how to use the range API. + * Please see Documentation/mm/hmm.rst for how to use the range API. */ int hmm_range_fault(struct hmm_range *range); diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 8af304f6b504..9f5ee49482de 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -39,7 +39,7 @@ struct vmem_altmap { * must be treated as an opaque object, rather than a "normal" struct page. * * A more complete discussion of unaddressable memory may be found in - * include/linux/hmm.h and Documentation/vm/hmm.rst. + * include/linux/hmm.h and Documentation/mm/hmm.rst. * * MEMORY_DEVICE_FS_DAX: * Host memory that has similar access semantics as System RAM i.e. DMA diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 45fc2c81e370..d6c06e140277 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -198,7 +198,7 @@ struct mmu_notifier_ops { * invalidate_range_start()/end() notifiers, as * invalidate_range() already catches the points in time when an * external TLB range needs to be flushed. For more in depth - * discussion on this see Documentation/vm/mmu_notifier.rst + * discussion on this see Documentation/mm/mmu_notifier.rst * * Note that this function might be called with just a sub-range * of what was passed to invalidate_range_start()/end(), if diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 8cd975a8bfeb..2a243616f222 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -29,7 +29,7 @@ extern struct mm_struct *mm_alloc(void); * * Use mmdrop() to release the reference acquired by mmgrab(). * - * See also for an in-depth explanation + * See also for an in-depth explanation * of &mm_struct.mm_count vs &mm_struct.mm_users. */ static inline void mmgrab(struct mm_struct *mm) @@ -92,7 +92,7 @@ static inline void mmdrop_sched(struct mm_struct *mm) * * Use mmput() to release the reference acquired by mmget(). * - * See also for an in-depth explanation + * See also for an in-depth explanation * of &mm_struct.mm_count vs &mm_struct.mm_users. */ static inline void mmget(struct mm_struct *mm) diff --git a/include/linux/swap.h b/include/linux/swap.h index 0c0fed1b348f..95a5b7aa1ae9 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -74,7 +74,7 @@ static inline int current_is_kswapd(void) /* * Unaddressable device memory support. See include/linux/hmm.h and - * Documentation/vm/hmm.rst. Short description is we need struct pages for + * Documentation/mm/hmm.rst. Short description is we need struct pages for * device memory that is unaddressable (inaccessible) by CPU, so that we can * migrate part of a process memory to device memory. * -- cgit v1.2.3 From 70fb5ccf2ebb09a0c8ebba775041567812d45f86 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Mon, 13 Jun 2022 00:34:28 +0800 Subject: sched/fair: Introduce SIS_UTIL to search idle CPU based on sum of util_avg [Problem Statement] select_idle_cpu() might spend too much time searching for an idle CPU, when the system is overloaded. The following histogram is the time spent in select_idle_cpu(), when running 224 instances of netperf on a system with 112 CPUs per LLC domain: @usecs: [0] 533 | | [1] 5495 | | [2, 4) 12008 | | [4, 8) 239252 | | [8, 16) 4041924 |@@@@@@@@@@@@@@ | [16, 32) 12357398 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [32, 64) 14820255 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [64, 128) 13047682 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [128, 256) 8235013 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [256, 512) 4507667 |@@@@@@@@@@@@@@@ | [512, 1K) 2600472 |@@@@@@@@@ | [1K, 2K) 927912 |@@@ | [2K, 4K) 218720 | | [4K, 8K) 98161 | | [8K, 16K) 37722 | | [16K, 32K) 6715 | | [32K, 64K) 477 | | [64K, 128K) 7 | | netperf latency usecs: ======= case load Lat_99th std% TCP_RR thread-224 257.39 ( 0.21) The time spent in select_idle_cpu() is visible to netperf and might have a negative impact. [Symptom analysis] The patch [1] from Mel Gorman has been applied to track the efficiency of select_idle_sibling. Copy the indicators here: SIS Search Efficiency(se_eff%): A ratio expressed as a percentage of runqueues scanned versus idle CPUs found. A 100% efficiency indicates that the target, prev or recent CPU of a task was idle at wakeup. The lower the efficiency, the more runqueues were scanned before an idle CPU was found. SIS Domain Search Efficiency(dom_eff%): Similar, except only for the slower SIS patch. SIS Fast Success Rate(fast_rate%): Percentage of SIS that used target, prev or recent CPUs. SIS Success rate(success_rate%): Percentage of scans that found an idle CPU. The test is based on Aubrey's schedtests tool, including netperf, hackbench, schbench and tbench. Test on vanilla kernel: schedstat_parse.py -f netperf_vanilla.log case load se_eff% dom_eff% fast_rate% success_rate% TCP_RR 28 threads 99.978 18.535 99.995 100.000 TCP_RR 56 threads 99.397 5.671 99.964 100.000 TCP_RR 84 threads 21.721 6.818 73.632 100.000 TCP_RR 112 threads 12.500 5.533 59.000 100.000 TCP_RR 140 threads 8.524 4.535 49.020 100.000 TCP_RR 168 threads 6.438 3.945 40.309 99.999 TCP_RR 196 threads 5.397 3.718 32.320 99.982 TCP_RR 224 threads 4.874 3.661 25.775 99.767 UDP_RR 28 threads 99.988 17.704 99.997 100.000 UDP_RR 56 threads 99.528 5.977 99.970 100.000 UDP_RR 84 threads 24.219 6.992 76.479 100.000 UDP_RR 112 threads 13.907 5.706 62.538 100.000 UDP_RR 140 threads 9.408 4.699 52.519 100.000 UDP_RR 168 threads 7.095 4.077 44.352 100.000 UDP_RR 196 threads 5.757 3.775 35.764 99.991 UDP_RR 224 threads 5.124 3.704 28.748 99.860 schedstat_parse.py -f schbench_vanilla.log (each group has 28 tasks) case load se_eff% dom_eff% fast_rate% success_rate% normal 1 mthread 99.152 6.400 99.941 100.000 normal 2 mthreads 97.844 4.003 99.908 100.000 normal 3 mthreads 96.395 2.118 99.917 99.998 normal 4 mthreads 55.288 1.451 98.615 99.804 normal 5 mthreads 7.004 1.870 45.597 61.036 normal 6 mthreads 3.354 1.346 20.777 34.230 normal 7 mthreads 2.183 1.028 11.257 21.055 normal 8 mthreads 1.653 0.825 7.849 15.549 schedstat_parse.py -f hackbench_vanilla.log (each group has 28 tasks) case load se_eff% dom_eff% fast_rate% success_rate% process-pipe 1 group 99.991 7.692 99.999 100.000 process-pipe 2 groups 99.934 4.615 99.997 100.000 process-pipe 3 groups 99.597 3.198 99.987 100.000 process-pipe 4 groups 98.378 2.464 99.958 100.000 process-pipe 5 groups 27.474 3.653 89.811 99.800 process-pipe 6 groups 20.201 4.098 82.763 99.570 process-pipe 7 groups 16.423 4.156 77.398 99.316 process-pipe 8 groups 13.165 3.920 72.232 98.828 process-sockets 1 group 99.977 5.882 99.999 100.000 process-sockets 2 groups 99.927 5.505 99.996 100.000 process-sockets 3 groups 99.397 3.250 99.980 100.000 process-sockets 4 groups 79.680 4.258 98.864 99.998 process-sockets 5 groups 7.673 2.503 63.659 92.115 process-sockets 6 groups 4.642 1.584 58.946 88.048 process-sockets 7 groups 3.493 1.379 49.816 81.164 process-sockets 8 groups 3.015 1.407 40.845 75.500 threads-pipe 1 group 99.997 0.000 100.000 100.000 threads-pipe 2 groups 99.894 2.932 99.997 100.000 threads-pipe 3 groups 99.611 4.117 99.983 100.000 threads-pipe 4 groups 97.703 2.624 99.937 100.000 threads-pipe 5 groups 22.919 3.623 87.150 99.764 threads-pipe 6 groups 18.016 4.038 80.491 99.557 threads-pipe 7 groups 14.663 3.991 75.239 99.247 threads-pipe 8 groups 12.242 3.808 70.651 98.644 threads-sockets 1 group 99.990 6.667 99.999 100.000 threads-sockets 2 groups 99.940 5.114 99.997 100.000 threads-sockets 3 groups 99.469 4.115 99.977 100.000 threads-sockets 4 groups 87.528 4.038 99.400 100.000 threads-sockets 5 groups 6.942 2.398 59.244 88.337 threads-sockets 6 groups 4.359 1.954 49.448 87.860 threads-sockets 7 groups 2.845 1.345 41.198 77.102 threads-sockets 8 groups 2.871 1.404 38.512 74.312 schedstat_parse.py -f tbench_vanilla.log case load se_eff% dom_eff% fast_rate% success_rate% loopback 28 threads 99.976 18.369 99.995 100.000 loopback 56 threads 99.222 7.799 99.934 100.000 loopback 84 threads 19.723 6.819 70.215 100.000 loopback 112 threads 11.283 5.371 55.371 99.999 loopback 140 threads 0.000 0.000 0.000 0.000 loopback 168 threads 0.000 0.000 0.000 0.000 loopback 196 threads 0.000 0.000 0.000 0.000 loopback 224 threads 0.000 0.000 0.000 0.000 According to the test above, if the system becomes busy, the SIS Search Efficiency(se_eff%) drops significantly. Although some benchmarks would finally find an idle CPU(success_rate% = 100%), it is doubtful whether it is worth it to search the whole LLC domain. [Proposal] It would be ideal to have a crystal ball to answer this question: How many CPUs must a wakeup path walk down, before it can find an idle CPU? Many potential metrics could be used to predict the number. One candidate is the sum of util_avg in this LLC domain. The benefit of choosing util_avg is that it is a metric of accumulated historic activity, which seems to be smoother than instantaneous metrics (such as rq->nr_running). Besides, choosing the sum of util_avg would help predict the load of the LLC domain more precisely, because SIS_PROP uses one CPU's idle time to estimate the total LLC domain idle time. In summary, the lower the util_avg is, the more select_idle_cpu() should scan for idle CPU, and vice versa. When the sum of util_avg in this LLC domain hits 85% or above, the scan stops. The reason to choose 85% as the threshold is that this is the imbalance_pct(117) when a LLC sched group is overloaded. Introduce the quadratic function: y = SCHED_CAPACITY_SCALE - p * x^2 and y'= y / SCHED_CAPACITY_SCALE x is the ratio of sum_util compared to the CPU capacity: x = sum_util / (llc_weight * SCHED_CAPACITY_SCALE) y' is the ratio of CPUs to be scanned in the LLC domain, and the number of CPUs to scan is calculated by: nr_scan = llc_weight * y' Choosing quadratic function is because: [1] Compared to the linear function, it scans more aggressively when the sum_util is low. [2] Compared to the exponential function, it is easier to calculate. [3] It seems that there is no accurate mapping between the sum of util_avg and the number of CPUs to be scanned. Use heuristic scan for now. For a platform with 112 CPUs per LLC, the number of CPUs to scan is: sum_util% 0 5 15 25 35 45 55 65 75 85 86 ... scan_nr 112 111 108 102 93 81 65 47 25 1 0 ... For a platform with 16 CPUs per LLC, the number of CPUs to scan is: sum_util% 0 5 15 25 35 45 55 65 75 85 86 ... scan_nr 16 15 15 14 13 11 9 6 3 0 0 ... Furthermore, to minimize the overhead of calculating the metrics in select_idle_cpu(), borrow the statistics from periodic load balance. As mentioned by Abel, on a platform with 112 CPUs per LLC, the sum_util calculated by periodic load balance after 112 ms would decay to about 0.5 * 0.5 * 0.5 * 0.7 = 8.75%, thus bringing a delay in reflecting the latest utilization. But it is a trade-off. Checking the util_avg in newidle load balance would be more frequent, but it brings overhead - multiple CPUs write/read the per-LLC shared variable and introduces cache contention. Tim also mentioned that, it is allowed to be non-optimal in terms of scheduling for the short-term variations, but if there is a long-term trend in the load behavior, the scheduler can adjust for that. When SIS_UTIL is enabled, the select_idle_cpu() uses the nr_scan calculated by SIS_UTIL instead of the one from SIS_PROP. As Peter and Mel suggested, SIS_UTIL should be enabled by default. This patch is based on the util_avg, which is very sensitive to the CPU frequency invariance. There is an issue that, when the max frequency has been clamp, the util_avg would decay insanely fast when the CPU is idle. Commit addca285120b ("cpufreq: intel_pstate: Handle no_turbo in frequency invariance") could be used to mitigate this symptom, by adjusting the arch_max_freq_ratio when turbo is disabled. But this issue is still not thoroughly fixed, because the current code is unaware of the user-specified max CPU frequency. [Test result] netperf and tbench were launched with 25% 50% 75% 100% 125% 150% 175% 200% of CPU number respectively. Hackbench and schbench were launched by 1, 2 ,4, 8 groups. Each test lasts for 100 seconds and repeats 3 times. The following is the benchmark result comparison between baseline:vanilla v5.19-rc1 and compare:patched kernel. Positive compare% indicates better performance. Each netperf test is a: netperf -4 -H 127.0.1 -t TCP/UDP_RR -c -C -l 100 netperf.throughput ======= case load baseline(std%) compare%( std%) TCP_RR 28 threads 1.00 ( 0.34) -0.16 ( 0.40) TCP_RR 56 threads 1.00 ( 0.19) -0.02 ( 0.20) TCP_RR 84 threads 1.00 ( 0.39) -0.47 ( 0.40) TCP_RR 112 threads 1.00 ( 0.21) -0.66 ( 0.22) TCP_RR 140 threads 1.00 ( 0.19) -0.69 ( 0.19) TCP_RR 168 threads 1.00 ( 0.18) -0.48 ( 0.18) TCP_RR 196 threads 1.00 ( 0.16) +194.70 ( 16.43) TCP_RR 224 threads 1.00 ( 0.16) +197.30 ( 7.85) UDP_RR 28 threads 1.00 ( 0.37) +0.35 ( 0.33) UDP_RR 56 threads 1.00 ( 11.18) -0.32 ( 0.21) UDP_RR 84 threads 1.00 ( 1.46) -0.98 ( 0.32) UDP_RR 112 threads 1.00 ( 28.85) -2.48 ( 19.61) UDP_RR 140 threads 1.00 ( 0.70) -0.71 ( 14.04) UDP_RR 168 threads 1.00 ( 14.33) -0.26 ( 11.16) UDP_RR 196 threads 1.00 ( 12.92) +186.92 ( 20.93) UDP_RR 224 threads 1.00 ( 11.74) +196.79 ( 18.62) Take the 224 threads as an example, the SIS search metrics changes are illustrated below: vanilla patched 4544492 +237.5% 15338634 sched_debug.cpu.sis_domain_search.avg 38539 +39686.8% 15333634 sched_debug.cpu.sis_failed.avg 128300000 -87.9% 15551326 sched_debug.cpu.sis_scanned.avg 5842896 +162.7% 15347978 sched_debug.cpu.sis_search.avg There is -87.9% less CPU scans after patched, which indicates lower overhead. Besides, with this patch applied, there is -13% less rq lock contention in perf-profile.calltrace.cycles-pp._raw_spin_lock.raw_spin_rq_lock_nested .try_to_wake_up.default_wake_function.woken_wake_function. This might help explain the performance improvement - Because this patch allows the waking task to remain on the previous CPU, rather than grabbing other CPUs' lock. Each hackbench test is a: hackbench -g $job --process/threads --pipe/sockets -l 1000000 -s 100 hackbench.throughput ========= case load baseline(std%) compare%( std%) process-pipe 1 group 1.00 ( 1.29) +0.57 ( 0.47) process-pipe 2 groups 1.00 ( 0.27) +0.77 ( 0.81) process-pipe 4 groups 1.00 ( 0.26) +1.17 ( 0.02) process-pipe 8 groups 1.00 ( 0.15) -4.79 ( 0.02) process-sockets 1 group 1.00 ( 0.63) -0.92 ( 0.13) process-sockets 2 groups 1.00 ( 0.03) -0.83 ( 0.14) process-sockets 4 groups 1.00 ( 0.40) +5.20 ( 0.26) process-sockets 8 groups 1.00 ( 0.04) +3.52 ( 0.03) threads-pipe 1 group 1.00 ( 1.28) +0.07 ( 0.14) threads-pipe 2 groups 1.00 ( 0.22) -0.49 ( 0.74) threads-pipe 4 groups 1.00 ( 0.05) +1.88 ( 0.13) threads-pipe 8 groups 1.00 ( 0.09) -4.90 ( 0.06) threads-sockets 1 group 1.00 ( 0.25) -0.70 ( 0.53) threads-sockets 2 groups 1.00 ( 0.10) -0.63 ( 0.26) threads-sockets 4 groups 1.00 ( 0.19) +11.92 ( 0.24) threads-sockets 8 groups 1.00 ( 0.08) +4.31 ( 0.11) Each tbench test is a: tbench -t 100 $job 127.0.0.1 tbench.throughput ====== case load baseline(std%) compare%( std%) loopback 28 threads 1.00 ( 0.06) -0.14 ( 0.09) loopback 56 threads 1.00 ( 0.03) -0.04 ( 0.17) loopback 84 threads 1.00 ( 0.05) +0.36 ( 0.13) loopback 112 threads 1.00 ( 0.03) +0.51 ( 0.03) loopback 140 threads 1.00 ( 0.02) -1.67 ( 0.19) loopback 168 threads 1.00 ( 0.38) +1.27 ( 0.27) loopback 196 threads 1.00 ( 0.11) +1.34 ( 0.17) loopback 224 threads 1.00 ( 0.11) +1.67 ( 0.22) Each schbench test is a: schbench -m $job -t 28 -r 100 -s 30000 -c 30000 schbench.latency_90%_us ======== case load baseline(std%) compare%( std%) normal 1 mthread 1.00 ( 31.22) -7.36 ( 20.25)* normal 2 mthreads 1.00 ( 2.45) -0.48 ( 1.79) normal 4 mthreads 1.00 ( 1.69) +0.45 ( 0.64) normal 8 mthreads 1.00 ( 5.47) +9.81 ( 14.28) *Consider the Standard Deviation, this -7.36% regression might not be valid. Also, a OLTP workload with a commercial RDBMS has been tested, and there is no significant change. There were concerns that unbalanced tasks among CPUs would cause problems. For example, suppose the LLC domain is composed of 8 CPUs, and 7 tasks are bound to CPU0~CPU6, while CPU7 is idle: CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 util_avg 1024 1024 1024 1024 1024 1024 1024 0 Since the util_avg ratio is 87.5%( = 7/8 ), which is higher than 85%, select_idle_cpu() will not scan, thus CPU7 is undetected during scan. But according to Mel, it is unlikely the CPU7 will be idle all the time because CPU7 could pull some tasks via CPU_NEWLY_IDLE. lkp(kernel test robot) has reported a regression on stress-ng.sock on a very busy system. According to the sched_debug statistics, it might be caused by SIS_UTIL terminates the scan and chooses a previous CPU earlier, and this might introduce more context switch, especially involuntary preemption, which impacts a busy stress-ng. This regression has shown that, not all benchmarks in every scenario benefit from idle CPU scan limit, and it needs further investigation. Besides, there is slight regression in hackbench's 16 groups case when the LLC domain has 16 CPUs. Prateek mentioned that we should scan aggressively in an LLC domain with 16 CPUs. Because the cost to search for an idle one among 16 CPUs is negligible. The current patch aims to propose a generic solution and only considers the util_avg. Something like the below could be applied on top of the current patch to fulfill the requirement: if (llc_weight <= 16) nr_scan = nr_scan * 32 / llc_weight; For LLC domain with 16 CPUs, the nr_scan will be expanded to 2 times large. The smaller the CPU number this LLC domain has, the larger nr_scan will be expanded. This needs further investigation. There is also ongoing work[2] from Abel to filter out the busy CPUs during wakeup, to further speed up the idle CPU scan. And it could be a following-up optimization on top of this change. Suggested-by: Tim Chen Suggested-by: Peter Zijlstra Signed-off-by: Chen Yu Signed-off-by: Peter Zijlstra (Intel) Tested-by: Yicong Yang Tested-by: Mohini Narkhede Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20220612163428.849378-1-yu.c.chen@intel.com --- include/linux/sched/topology.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 56cffe42abbc..816df6cc444e 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -81,6 +81,7 @@ struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; int has_idle_cores; + int nr_idle_scan; }; struct sched_domain { -- cgit v1.2.3 From 119a784c81270eb88e573174ed2209225d646656 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 16 Jun 2022 11:06:23 -0700 Subject: perf/core: Add a new read format to get a number of lost samples Sometimes we want to know an accurate number of samples even if it's lost. Currenlty PERF_RECORD_LOST is generated for a ring-buffer which might be shared with other events. So it's hard to know per-event lost count. Add event->lost_samples field and PERF_FORMAT_LOST to retrieve it from userspace. Original-patch-by: Jiri Olsa Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220616180623.1358843-1-namhyung@kernel.org --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index da759560eec5..ee8b9ecdc03b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -759,6 +759,8 @@ struct perf_event { struct pid_namespace *ns; u64 id; + atomic64_t lost_samples; + u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; -- cgit v1.2.3 From bb4479994945e9170534389a7762eb56149320ac Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Tue, 21 Jun 2022 10:04:10 +0100 Subject: sched, drivers: Remove max param from effective_cpu_util()/sched_cpu_util() effective_cpu_util() already has a `int cpu' parameter which allows to retrieve the CPU capacity scale factor (or maximum CPU capacity) inside this function via an arch_scale_cpu_capacity(cpu). A lot of code calling effective_cpu_util() (or the shim sched_cpu_util()) needs the maximum CPU capacity, i.e. it will call arch_scale_cpu_capacity() already. But not having to pass it into effective_cpu_util() will make the EAS wake-up code easier, especially when the maximum CPU capacity reduced by the thermal pressure is passed through the EAS wake-up functions. Due to the asymmetric CPU capacity support of arm/arm64 architectures, arch_scale_cpu_capacity(int cpu) is a per-CPU variable read access via per_cpu(cpu_scale, cpu) on such a system. On all other architectures it is a a compile-time constant (SCHED_CAPACITY_SCALE). Signed-off-by: Dietmar Eggemann Signed-off-by: Peter Zijlstra (Intel) Acked-by: Vincent Guittot Tested-by: Lukasz Luba Link: https://lkml.kernel.org/r/20220621090414.433602-4-vdonnefort@google.com --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c46f3a63b758..88b8817b827d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2257,7 +2257,7 @@ static inline bool owner_on_cpu(struct task_struct *owner) } /* Returns effective CPU energy utilization, as seen by the scheduler */ -unsigned long sched_cpu_util(int cpu, unsigned long max); +unsigned long sched_cpu_util(int cpu); #endif /* CONFIG_SMP */ #ifdef CONFIG_RSEQ -- cgit v1.2.3 From 586b3b7600e44c1cad52c683ccfbb76fb2c10cc8 Mon Sep 17 00:00:00 2001 From: Sai Krishna Potthuri Date: Fri, 17 Jun 2022 16:16:56 +0530 Subject: firmware: xilinx: Add configuration values for tri-state Add configuration values(enable/disable) for tri-state parameter. Signed-off-by: Sai Krishna Potthuri Link: https://lore.kernel.org/r/1655462819-28801-2-git-send-email-lakshmi.sai.krishna.potthuri@xilinx.com Signed-off-by: Linus Walleij --- include/linux/firmware/xlnx-zynqmp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 1ec73d5352c3..4901fb31de3e 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -368,6 +368,11 @@ enum pm_pinctrl_drive_strength { PM_PINCTRL_DRIVE_STRENGTH_12MA = 3, }; +enum pm_pinctrl_tri_state { + PM_PINCTRL_TRI_STATE_DISABLE = 0, + PM_PINCTRL_TRI_STATE_ENABLE = 1, +}; + enum zynqmp_pm_shutdown_type { ZYNQMP_PM_SHUTDOWN_TYPE_SHUTDOWN = 0, ZYNQMP_PM_SHUTDOWN_TYPE_RESET = 1, -- cgit v1.2.3 From 0e584d46218e3b9dc12a98e18e81a0cd3e0d5419 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 28 Jun 2022 10:46:22 +0100 Subject: regulator: fix a kernel-doc warning document n_ramp_values field at struct regulator_desc, in order to solve this warning: include/linux/regulator/driver.h:434: warning: Function parameter or member 'n_ramp_values' not described in 'regulator_desc' Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/15efc16e878aa327aa2769023bcdf959a795f41d.1656409369.git.mchehab@kernel.org Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 0228caaa6741..f9a7461e72b8 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -348,6 +348,7 @@ enum regulator_type { * @ramp_delay_table: Table for mapping the regulator ramp-rate values. Values * should be given in units of V/S (uV/uS). See the * regulator_set_ramp_delay_regmap(). + * @n_ramp_values: number of elements at @ramp_delay_table. * * @enable_time: Time taken for initial enable of regulator (in uS). * @off_on_delay: guard time (in uS), before re-enabling a regulator -- cgit v1.2.3 From 1f90307e5f0d7bc9a336ead528f616a5df8e5944 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jun 2022 08:05:49 +0200 Subject: block: remove QUEUE_FLAG_DEAD Disallow setting the blk-mq state on any queue that is already dying as setting the state even then is a bad idea, and remove the now unused QUEUE_FLAG_DEAD flag. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20220619060552.1850436-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b2d42201bd5d..f4632f4fe884 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -564,7 +564,6 @@ struct request_queue { #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ -#define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ @@ -592,7 +591,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_has_srcu(q) test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags) -#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ -- cgit v1.2.3 From 6f8191fdf41d3a53cc1d63fe2234e812c55a0092 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jun 2022 08:05:51 +0200 Subject: block: simplify disk shutdown Set the queue dying flag and call blk_mq_exit_queue from del_gendisk for all disks that do not have separately allocated queues, and thus remove the need to call blk_cleanup_queue for them. Rename blk_cleanup_disk to blk_mq_destroy_queue to make it clear that this function is intended only for separately allocated blk-mq queues. This saves an extra queue freeze for devices without a separately allocated queue. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20220619060552.1850436-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +++ include/linux/blkdev.h | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e2d9daf7e8dd..0fd96e92c6c6 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -686,10 +686,13 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, \ __blk_mq_alloc_disk(set, queuedata, &__key); \ }) +struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, + struct lock_class_key *lkclass); struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); void blk_mq_unregister_dev(struct device *, struct request_queue *); +void blk_mq_destroy_queue(struct request_queue *); int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f4632f4fe884..530eeccffda3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -148,6 +148,7 @@ struct gendisk { #define GD_NATIVE_CAPACITY 3 #define GD_ADDED 4 #define GD_SUPPRESS_PART_SCAN 5 +#define GD_OWNS_QUEUE 6 struct mutex open_mutex; /* open/close mutex */ unsigned open_partitions; /* number of open partitions */ @@ -815,8 +816,6 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) int bdev_disk_changed(struct gendisk *disk, bool invalidate); -struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, - struct lock_class_key *lkclass); void put_disk(struct gendisk *disk); struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); @@ -933,7 +932,6 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset, /* * Access functions for manipulating queue properties */ -extern void blk_cleanup_queue(struct request_queue *); void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce limit); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); -- cgit v1.2.3 From 8b9ab62662048a3274361c7e5f64037c2c133e2c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jun 2022 08:05:52 +0200 Subject: block: remove blk_cleanup_disk blk_cleanup_disk is nothing but a trivial wrapper for put_disk now, so remove it. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20220619060552.1850436-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 530eeccffda3..22b12531aeb7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -834,7 +834,6 @@ struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); \ __blk_alloc_disk(node_id, &__key); \ }) -void blk_cleanup_disk(struct gendisk *disk); int __register_blkdev(unsigned int major, const char *name, void (*probe)(dev_t devt)); -- cgit v1.2.3 From cc5c516df028b221d94c65c47c5ae8d20f61b6f9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jun 2022 19:18:45 +0200 Subject: block: simplify blktrace sysfs attribute creation Add the trace attributes to the default gendisk attributes, just like we already do for partitions. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220628171850.1313069-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 623e22492afa..f6f9b544365a 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -77,10 +77,6 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); -extern void blk_trace_remove_sysfs(struct device *dev); -extern int blk_trace_init_sysfs(struct device *dev); - -extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) @@ -91,13 +87,7 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) # define blk_add_cgroup_trace_msg(q, cg, fmt, ...) do { } while (0) -# define blk_trace_remove_sysfs(dev) do { } while (0) # define blk_trace_note_message_enabled(q) (false) -static inline int blk_trace_init_sysfs(struct device *dev) -{ - return 0; -} - #endif /* CONFIG_BLK_DEV_IO_TRACE */ #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 8682b92e5ab852b93739a0f2b261fff4c733be57 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jun 2022 19:18:50 +0200 Subject: blk-mq: cleanup disk sysfs registration Pass a gendisk to the sysfs register/unregister functions and give them descriptive names. Also move the unregistration helper next to the one doing the registration. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220628171850.1313069-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 0fd96e92c6c6..43aad0da3305 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -691,7 +691,6 @@ struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); -void blk_mq_unregister_dev(struct device *, struct request_queue *); void blk_mq_destroy_queue(struct request_queue *); int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); -- cgit v1.2.3 From 8add9a3a2243166f8f60fc20e876caaf30a333f7 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 28 Jun 2022 15:18:21 +0100 Subject: efi: Simplify arch_efi_call_virt() macro Currently, the arch_efi_call_virt() assumes all users of it will have defined a type 'efi_##f##_t' to make use of it. Simplify the arch_efi_call_virt() macro by eliminating the explicit need for efi_##f##_t type for every user of this macro. Signed-off-by: Sudeep Holla Acked-by: Russell King (Oracle) [ardb: apply Sudeep's ARM fix to i686, Loongarch and RISC-V too] Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 93ce85a14a46..9ff63acef1ec 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1200,6 +1200,8 @@ static inline void efi_check_for_embedded_firmwares(void) { } efi_status_t efi_random_get_seed(void); +#define arch_efi_call_virt(p, f, args...) ((p)->f(args)) + /* * Arch code can implement the following three template macros, avoiding * reptition for the void/non-void return cases of {__,}efi_call_virt(): -- cgit v1.2.3 From c3497fd009ef2c59eea60d21c3ac22de3585ed7d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jun 2022 19:50:29 -0400 Subject: fix short copy handling in copy_mc_pipe_to_iter() Unlike other copying operations on ITER_PIPE, copy_mc_to_iter() can result in a short copy. In that case we need to trim the unused buffers, as well as the length of partially filled one - it's not enough to set ->head, ->iov_offset and ->count to reflect how much had we copied. Not hard to fix, fortunately... I'd put a helper (pipe_discard_from(pipe, head)) into pipe_fs_i.h, rather than iov_iter.c - it has nothing to do with iov_iter and having it will allow us to avoid an ugly kludge in fs/splice.c. We could put it into lib/iov_iter.c for now and move it later, but I don't see the point going that way... Cc: stable@kernel.org # 4.19+ Fixes: ca146f6f091e "lib/iov_iter: Fix pipe handling in _copy_to_iter_mcsafe()" Reviewed-by: Jeff Layton Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Al Viro --- include/linux/pipe_fs_i.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index cb0fd633a610..4ea496924106 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -229,6 +229,15 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe, return buf->ops->try_steal(pipe, buf); } +static inline void pipe_discard_from(struct pipe_inode_info *pipe, + unsigned int old_head) +{ + unsigned int mask = pipe->ring_size - 1; + + while (pipe->head > old_head) + pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]); +} + /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE -- cgit v1.2.3 From 9adf24a40978c19f57f44572b292b38938da7686 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 29 Jun 2022 12:25:53 +0200 Subject: fs: port HAS_UNMAPPED_ID() to vfs{g,u}id_t The HAS_UNMAPPED_ID() helper is fully self contained so we can port it to vfs{g,u}id_t without much effort. Cc: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d6e3347cbf69..ec2e35886779 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2323,8 +2323,8 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags static inline bool HAS_UNMAPPED_ID(struct user_namespace *mnt_userns, struct inode *inode) { - return !uid_valid(i_uid_into_mnt(mnt_userns, inode)) || - !gid_valid(i_gid_into_mnt(mnt_userns, inode)); + return !vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)) || + !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode)); } static inline int iocb_flags(struct file *file); -- cgit v1.2.3 From fc04dafd263d8c0b0251b63d47f35b29373d50f2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 29 Jun 2022 13:15:10 +0200 Subject: mnt_idmapping: use new helpers in mapped_fs{g,u}id() The old non-type safe helpers will soon be removed. Cc: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) --- include/linux/mnt_idmapping.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 21bd22a7b326..be83643cadff 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -422,7 +422,8 @@ static inline bool vfsgid_has_fsmapping(struct user_namespace *mnt_userns, static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns, struct user_namespace *fs_userns) { - return mapped_kuid_user(mnt_userns, fs_userns, current_fsuid()); + return from_vfsuid(mnt_userns, fs_userns, + VFSUIDT_INIT(current_fsuid())); } /** @@ -441,7 +442,8 @@ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns, static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns, struct user_namespace *fs_userns) { - return mapped_kgid_user(mnt_userns, fs_userns, current_fsgid()); + return from_vfsgid(mnt_userns, fs_userns, + VFSGIDT_INIT(current_fsgid())); } #endif /* _LINUX_MNT_IDMAPPING_H */ -- cgit v1.2.3 From acd6510dd7ab3664b69eb99e37c4fd6325a7d442 Mon Sep 17 00:00:00 2001 From: Tanmay Shah Date: Tue, 7 Jun 2022 15:42:54 -0700 Subject: firmware: xilinx: Add TF_A_PM_REGISTER_SGI SMC call SGI interrupt register and reset is performed by EEMI ioctl IOCTL_REGISTER_SGI. However, this is not correct use of EEMI call. SGI registration functionality does not qualify as energy management activity and so shouldn't be mapped to EEMI call. This new call will replace IOCTL_REGISTER_SGI and will be handled by TF-A specific handler in TF-A. To maintain backward compatibility for a while firmware driver will still use IOCTL_REGISTER_SGI as fallback strategy if new call fails or is not supported by TF-A. This new design also helps to make TF-A as pass through layer for EEMI calls. So we don't have to maintain PM_IOCTL as EEMI API ID in TF-A. Signed-off-by: Tanmay Shah Acked-by: Michal Simek Link: https://lore.kernel.org/r/20220607224253.54919-1-tanmay.shah@xilinx.com Signed-off-by: Michal Simek --- include/linux/firmware/xlnx-zynqmp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 1ec73d5352c3..cbde3b1fa414 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -34,6 +34,7 @@ #define PM_API_VERSION_2 2 /* ATF only commands */ +#define TF_A_PM_REGISTER_SGI 0xa04 #define PM_GET_TRUSTZONE_VERSION 0xa03 #define PM_SET_SUSPEND_MODE 0xa02 #define GET_CALLBACK_DATA 0xa01 @@ -468,6 +469,7 @@ int zynqmp_pm_feature(const u32 api_id); int zynqmp_pm_is_function_supported(const u32 api_id, const u32 id); int zynqmp_pm_set_feature_config(enum pm_feature_config_id id, u32 value); int zynqmp_pm_get_feature_config(enum pm_feature_config_id id, u32 *payload); +int zynqmp_pm_register_sgi(u32 sgi_num, u32 reset); #else static inline int zynqmp_pm_get_api_version(u32 *version) { @@ -733,6 +735,11 @@ static inline int zynqmp_pm_get_feature_config(enum pm_feature_config_id id, { return -ENODEV; } + +static inline int zynqmp_pm_register_sgi(u32 sgi_num, u32 reset) +{ + return -ENODEV; +} #endif #endif /* __FIRMWARE_ZYNQMP_H__ */ -- cgit v1.2.3 From 6ffcd825e7d0416d78fd41cd5b7856a78122cc8c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 28 Jun 2022 20:41:40 -0400 Subject: mm: Remove __delete_from_page_cache() This wrapper is no longer used. Remove it and all references to it. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/pagemap.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ce96866fbec4..44b9c265a234 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1107,10 +1107,6 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio, void filemap_remove_folio(struct folio *folio); void delete_from_page_cache(struct page *page); void __filemap_remove_folio(struct folio *folio, void *shadow); -static inline void __delete_from_page_cache(struct page *page, void *shadow) -{ - __filemap_remove_folio(page_folio(page), shadow); -} void replace_page_cache_page(struct page *old, struct page *new); void delete_from_page_cache_batch(struct address_space *mapping, struct folio_batch *fbatch); -- cgit v1.2.3 From 2bb876b58d593d7f2522ec0f41f20a74fde76822 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 1 Jun 2022 15:13:59 -0400 Subject: filemap: Remove add_to_page_cache() and add_to_page_cache_locked() These functions have no more users, so delete them. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Mike Kravetz Reviewed-by: Muchun Song --- include/linux/pagemap.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 44b9c265a234..dee519ef7436 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1098,8 +1098,6 @@ size_t fault_in_subpage_writeable(char __user *uaddr, size_t size); size_t fault_in_safe_writeable(const char __user *uaddr, size_t size); size_t fault_in_readable(const char __user *uaddr, size_t size); -int add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp); int add_to_page_cache_lru(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp); int filemap_add_folio(struct address_space *mapping, struct folio *folio, @@ -1115,22 +1113,6 @@ bool filemap_release_folio(struct folio *folio, gfp_t gfp); loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end, int whence); -/* - * Like add_to_page_cache_locked, but used to add newly allocated pages: - * the page is new, so we can just run __SetPageLocked() against it. - */ -static inline int add_to_page_cache(struct page *page, - struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) -{ - int error; - - __SetPageLocked(page); - error = add_to_page_cache_locked(page, mapping, offset, gfp_mask); - if (unlikely(error)) - __ClearPageLocked(page); - return error; -} - /* Must be non-static for BPF error injection */ int __filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp); -- cgit v1.2.3 From be0ced5e9cb81a4c2edefd081a7794a1814fb60d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 3 Jun 2022 15:30:25 -0400 Subject: filemap: Add filemap_get_folios() This is the equivalent of find_get_pages() but fills a folio_batch instead of an array of pages. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Christian Brauner (Microsoft) --- include/linux/pagemap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index dee519ef7436..cfd0e8001b3b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -718,6 +718,8 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) return head + (index & (thp_nr_pages(head) - 1)); } +unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, + pgoff_t end, struct folio_batch *fbatch); unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, pgoff_t end, unsigned int nr_pages, struct page **pages); -- cgit v1.2.3 From 77414d195f905dd43f58bce82118775ffa59575c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 4 Jun 2022 17:39:09 -0400 Subject: vmscan: Add check_move_unevictable_folios() Change the guts of check_move_unevictable_pages() over to use folios and add check_move_unevictable_pages() as a wrapper. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Christian Brauner (Microsoft) --- include/linux/swap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 0c0fed1b348f..8672a7123ccd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -438,7 +438,8 @@ static inline bool node_reclaim_enabled(void) return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP); } -extern void check_move_unevictable_pages(struct pagevec *pvec); +void check_move_unevictable_folios(struct folio_batch *fbatch); +void check_move_unevictable_pages(struct pagevec *pvec); extern void kswapd_run(int nid); extern void kswapd_stop(int nid); -- cgit v1.2.3 From bb4b42ba926224e26ed699e51def164b4b163935 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 4 Jun 2022 17:46:02 -0400 Subject: filemap: Remove find_get_pages_range() and associated functions All callers of find_get_pages_range(), pagevec_lookup_range() and pagevec_lookup() have now been removed. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Christian Brauner (Microsoft) --- include/linux/pagemap.h | 3 --- include/linux/pagevec.h | 10 ---------- 2 files changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index cfd0e8001b3b..87d4ea571240 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -720,9 +720,6 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); -unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, - pgoff_t end, unsigned int nr_pages, - struct page **pages); unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, unsigned int nr_pages, struct page **pages); unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 67b1246f136b..6649154a2115 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -27,16 +27,6 @@ struct pagevec { void __pagevec_release(struct pagevec *pvec); void __pagevec_lru_add(struct pagevec *pvec); -unsigned pagevec_lookup_range(struct pagevec *pvec, - struct address_space *mapping, - pgoff_t *start, pgoff_t end); -static inline unsigned pagevec_lookup(struct pagevec *pvec, - struct address_space *mapping, - pgoff_t *start) -{ - return pagevec_lookup_range(pvec, mapping, start, (pgoff_t)-1); -} - unsigned pagevec_lookup_range_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag); -- cgit v1.2.3 From 0e8e08cca5e3256a6209f02b482bee96fb91ba1b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 29 Apr 2022 08:49:28 -0400 Subject: netfs: Remove extern from function prototypes The 'extern' keyword is not necessary and removing it lets us shorten some lines. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/netfs.h | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 1773e5df8e65..11df38d03359 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -276,19 +276,18 @@ struct netfs_cache_ops { }; struct readahead_control; -extern void netfs_readahead(struct readahead_control *); +void netfs_readahead(struct readahead_control *); int netfs_read_folio(struct file *, struct folio *); -extern int netfs_write_begin(struct netfs_inode *, - struct file *, struct address_space *, - loff_t, unsigned int, struct folio **, - void **); - -extern void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); -extern void netfs_get_subrequest(struct netfs_io_subrequest *subreq, - enum netfs_sreq_ref_trace what); -extern void netfs_put_subrequest(struct netfs_io_subrequest *subreq, - bool was_async, enum netfs_sreq_ref_trace what); -extern void netfs_stats_show(struct seq_file *); +int netfs_write_begin(struct netfs_inode *, struct file *, + struct address_space *, loff_t pos, unsigned int len, + struct folio **, void **fsdata); + +void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); +void netfs_get_subrequest(struct netfs_io_subrequest *subreq, + enum netfs_sreq_ref_trace what); +void netfs_put_subrequest(struct netfs_io_subrequest *subreq, + bool was_async, enum netfs_sreq_ref_trace what); +void netfs_stats_show(struct seq_file *); /** * netfs_inode - Get the netfs inode context from the inode -- cgit v1.2.3 From a57f68ddc8865d59a19783080cc52fb4a11dc209 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 24 Jun 2022 17:18:45 +0300 Subject: reset: Fix devm bulk optional exclusive control getter Most likely due to copy-paste mistake the device managed version of the denoted reset control getter has been implemented with invalid semantic, which can be immediately spotted by having "WARN_ON(shared && acquired)" warning in the system log as soon as the method is called. Anyway let's fix it by altering the boolean arguments passed to the __devm_reset_control_bulk_get() method from - shared = true, optional = false, acquired = true to + shared = false, optional = true, acquired = true That's what they were supposed to be in the first place (see the non-devm version of the same method: reset_control_bulk_get_optional_exclusive()). Fixes: 48d71395896d ("reset: Add reset_control_bulk API") Signed-off-by: Serge Semin Reviewed-by: Dmitry Osipenko Signed-off-by: Philipp Zabel Link: https://lore.kernel.org/r/20220624141853.7417-2-Sergey.Semin@baikalelectronics.ru --- include/linux/reset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/reset.h b/include/linux/reset.h index 8a21b5756c3e..514ddf003efc 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -731,7 +731,7 @@ static inline int __must_check devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, true); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true); } /** -- cgit v1.2.3 From 77940f0d96cd2ec9fe2125f74f513a7254bcdd7f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 29 Jun 2022 16:31:12 +0200 Subject: mnt_idmapping: align kernel doc and parameter order The kernel documentation added for the new helpers had a few tiny ordering issues. Fix them. Signed-off-by: Christian Brauner (Microsoft) --- include/linux/mnt_idmapping.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index be83643cadff..41dc80f8b67c 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -70,12 +70,12 @@ static inline bool vfsgid_eq(vfsgid_t left, vfsgid_t right) /** * vfsuid_eq_kuid - check whether kuid and vfsuid have the same value - * @kuid: the kuid to compare * @vfsuid: the vfsuid to compare + * @kuid: the kuid to compare * - * Check whether @kuid and @vfsuid have the same values. + * Check whether @vfsuid and @kuid have the same values. * - * Return: true if @kuid and @vfsuid have the same value, false if not. + * Return: true if @vfsuid and @kuid have the same value, false if not. * Comparison between two invalid uids returns false. */ static inline bool vfsuid_eq_kuid(vfsuid_t vfsuid, kuid_t kuid) @@ -85,15 +85,15 @@ static inline bool vfsuid_eq_kuid(vfsuid_t vfsuid, kuid_t kuid) /** * vfsgid_eq_kgid - check whether kgid and vfsgid have the same value - * @kgid: the kgid to compare * @vfsgid: the vfsgid to compare + * @kgid: the kgid to compare * - * Check whether @kgid and @vfsgid have the same values. + * Check whether @vfsgid and @kgid have the same values. * - * Return: true if @kgid and @vfsgid have the same value, false if not. + * Return: true if @vfsgid and @kgid have the same value, false if not. * Comparison between two invalid gids returns false. */ -static inline bool vfsgid_eq_kgid(kgid_t kgid, vfsgid_t vfsgid) +static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid) { return vfsgid_valid(vfsgid) && __vfsgid_val(vfsgid) == __kgid_val(kgid); } @@ -171,7 +171,7 @@ static inline bool no_idmapping(const struct user_namespace *mnt_userns, } /** - * mapped_kuid_fs - map a filesystem kuid into a mnt_userns + * make_vfsuid - map a filesystem kuid into a mnt_userns * @mnt_userns: the mount's idmapping * @fs_userns: the filesystem's idmapping * @kuid : kuid to be mapped @@ -216,7 +216,7 @@ static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns, } /** - * mapped_kgid_fs - map a filesystem kgid into a mnt_userns + * make_vfsgid - map a filesystem kgid into a mnt_userns * @mnt_userns: the mount's idmapping * @fs_userns: the filesystem's idmapping * @kgid : kgid to be mapped -- cgit v1.2.3 From 6a27d28c81bc5843de2490688a04ee5baa6615e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 29 Jun 2022 08:20:12 +0200 Subject: block: move ->ia_ranges from the request_queue to the gendisk Independent access ranges only matter for file system I/O and are only valid with a registered gendisk, so move them there. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20220629062013.1331068-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 22b12531aeb7..b9a94c53c6cd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -171,6 +171,12 @@ struct gendisk { struct badblocks *bb; struct lockdep_map lockdep_map; u64 diskseq; + + /* + * Independent sector access ranges. This is always NULL for + * devices that do not have multiple independent access ranges. + */ + struct blk_independent_access_ranges *ia_ranges; }; static inline bool disk_live(struct gendisk *disk) @@ -539,12 +545,6 @@ struct request_queue { bool mq_sysfs_init_done; - /* - * Independent sector access ranges. This is always NULL for - * devices that do not have multiple independent access ranges. - */ - struct blk_independent_access_ranges *ia_ranges; - /** * @srcu: Sleepable RCU. Use as lock when type of the request queue * is blocking (BLK_MQ_F_BLOCKING). Must be the last member -- cgit v1.2.3 From 445cbd219ac3b8f451153c210aaf97adcbf4bd02 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Thu, 23 Jun 2022 22:14:09 +0100 Subject: regmap-irq: Convert bool bitfields to unsigned int Use 'unsigned int' for bitfields for consistency with most other kernel code. Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20220623211420.918875-2-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index d5b08f4f0dc0..bcbc5d4ffd30 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1534,19 +1534,19 @@ struct regmap_irq_chip { unsigned int type_base; unsigned int *virt_reg_base; unsigned int irq_reg_stride; - bool mask_writeonly:1; - bool init_ack_masked:1; - bool mask_invert:1; - bool use_ack:1; - bool ack_invert:1; - bool clear_ack:1; - bool wake_invert:1; - bool runtime_pm:1; - bool type_invert:1; - bool type_in_mask:1; - bool clear_on_unmask:1; - bool not_fixed_stride:1; - bool status_invert:1; + unsigned int mask_writeonly:1; + unsigned int init_ack_masked:1; + unsigned int mask_invert:1; + unsigned int use_ack:1; + unsigned int ack_invert:1; + unsigned int clear_ack:1; + unsigned int wake_invert:1; + unsigned int runtime_pm:1; + unsigned int type_invert:1; + unsigned int type_in_mask:1; + unsigned int clear_on_unmask:1; + unsigned int not_fixed_stride:1; + unsigned int status_invert:1; int num_regs; -- cgit v1.2.3 From 53a1a16dcc972163bd5816192d5d63ae433c9e56 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Thu, 23 Jun 2022 22:14:10 +0100 Subject: regmap-irq: Remove unused type_reg_stride field It appears that no chip ever required a nonzero type_reg_stride and commit 1066cfbdfa3f ("regmap-irq: Extend sub-irq to support non-fixed reg strides") broke support. Just remove the field. Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20220623211420.918875-3-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index bcbc5d4ffd30..58db2206f193 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1503,8 +1503,6 @@ struct regmap_irq_sub_irq_map { * @num_type_reg: Number of type registers. * @num_virt_regs: Number of non-standard irq configuration registers. * If zero unsupported. - * @type_reg_stride: Stride to use for chips where type registers are not - * contiguous. * @handle_pre_irq: Driver specific callback to handle interrupt from device * before regmap_irq_handler process the interrupts. * @handle_post_irq: Driver specific callback to handle interrupt from device @@ -1555,7 +1553,6 @@ struct regmap_irq_chip { int num_type_reg; int num_virt_regs; - unsigned int type_reg_stride; int (*handle_pre_irq)(void *irq_drv_data); int (*handle_post_irq)(void *irq_drv_data); -- cgit v1.2.3 From 610fdd668e6af48fcae7908161d14eee3a95ec92 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Thu, 23 Jun 2022 22:14:12 +0100 Subject: regmap-irq: Remove an unnecessary restriction on type_in_mask Check types_supported instead of checking type_rising/falling_val when using type_in_mask interrupts. This makes the intent clearer and allows a type_in_mask irq to support level or edge triggers, rather than only edge triggers. Update the documentation and comments to reflect the new behavior. This shouldn't affect existing drivers, because if they didn't set types_supported properly the type buffer wouldn't be updated. Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20220623211420.918875-5-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 58db2206f193..b404c59b3b20 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1484,9 +1484,11 @@ struct regmap_irq_sub_irq_map { * @clear_ack: Use this to set 1 and 0 or vice-versa to clear interrupts. * @wake_invert: Inverted wake register: cleared bits are wake enabled. * @type_invert: Invert the type flags. - * @type_in_mask: Use the mask registers for controlling irq type. For - * interrupts defining type_rising/falling_mask use mask_base - * for edge configuration and never update bits in type_base. + * @type_in_mask: Use the mask registers for controlling irq type. Use this if + * the hardware provides separate bits for rising/falling edge + * or low/high level interrupts and they should be combined into + * a single logical interrupt. Use &struct regmap_irq_type data + * to define the mask bit for each irq type. * @clear_on_unmask: For chips with interrupts cleared on read: read the status * registers before unmasking interrupts to clear any bits * set when they were masked. -- cgit v1.2.3 From ad22b3e98f9430896bd4bd8f4fbff4667f02a0c8 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Thu, 23 Jun 2022 22:14:14 +0100 Subject: regmap-irq: Remove mask_writeonly and regmap_irq_update_bits() Commit a71411dbf6c8 ("regmap: irq: add chip option mask_writeonly") introduced the mask_writeonly option, but it isn't used now and it appears it's never been used by any in-tree drivers. The motivation for the option is mentioned in the commit message, Some irq controllers have writeonly/multipurpose register layouts. In those cases we read invalid data back. [...] The option causes mask register updates to use regmap_write_bits() instead of regmap_update_bits(). However, regmap_write_bits() doesn't solve the reading invalid data problem. It's still a read-modify-write op like regmap_update_bits(). The difference is that 'update bits' will only write the new value if it is different from the current value, while 'write bits' will write the new value unconditionally, even if it's the same as the current value. This seems like a bit of a specialized use case and probably isn't that useful for regmap-irq, so let's just remove the option and go back to using an 'update bits' op for the mask registers. We can always add the option back if some driver ends up needing it in the future. Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20220623211420.918875-7-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index b404c59b3b20..6489b3610362 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1468,7 +1468,6 @@ struct regmap_irq_sub_irq_map { * * @status_base: Base status register address. * @mask_base: Base mask register address. - * @mask_writeonly: Base mask register is write only. * @unmask_base: Base unmask register address. for chips who have * separate mask and unmask registers * @ack_base: Base ack address. If zero then the chip is clear on read. @@ -1534,7 +1533,6 @@ struct regmap_irq_chip { unsigned int type_base; unsigned int *virt_reg_base; unsigned int irq_reg_stride; - unsigned int mask_writeonly:1; unsigned int init_ack_masked:1; unsigned int mask_invert:1; unsigned int use_ack:1; -- cgit v1.2.3 From faa87ce9196dbb074d75bd4aecb8bacf18f19b4e Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Thu, 23 Jun 2022 22:14:16 +0100 Subject: regmap-irq: Introduce config registers for irq types Config registers provide a more uniform approach to handling irq type registers. They are essentially an extension of the virtual registers used by the qcom-pm8008 driver. Config registers can be represented as a 2D array: config_base[0] reg0,0 reg0,1 reg0,2 reg0,3 config_base[1] reg1,0 reg1,1 reg1,2 reg1,3 config_base[2] reg2,0 reg2,1 reg2,2 reg2,3 There are 'num_config_bases' base registers, each of which is used to address 'num_config_regs' registers. The addresses are calculated in the same way as for other bases. It is assumed that an irq's type is controlled by one column of registers; that column is identified by the irq's 'type_reg_offset'. The set_type_config() callback is responsible for updating the config register contents. It receives an array of buffers (each represents a row of registers) and the index of the column to update, along with the 'struct regmap_irq' description and requested irq type. Buffered values are written to registers in regmap_irq_sync_unlock(). Note that the entire register contents are overwritten, which is a minor change in behavior from type registers via 'type_base'. Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20220623211420.918875-9-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 6489b3610362..791c15ad1f63 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1475,6 +1475,7 @@ struct regmap_irq_sub_irq_map { * @wake_base: Base address for wake enables. If zero unsupported. * @type_base: Base address for irq type. If zero unsupported. * @virt_reg_base: Base addresses for extra config regs. + * @config_base: Base address for IRQ type config regs. If null unsupported. * @irq_reg_stride: Stride to use for chips where registers are not contiguous. * @init_ack_masked: Ack all masked interrupts once during initalization. * @mask_invert: Inverted mask register: cleared bits are masked out. @@ -1504,12 +1505,15 @@ struct regmap_irq_sub_irq_map { * @num_type_reg: Number of type registers. * @num_virt_regs: Number of non-standard irq configuration registers. * If zero unsupported. + * @num_config_bases: Number of config base registers. + * @num_config_regs: Number of config registers for each config base register. * @handle_pre_irq: Driver specific callback to handle interrupt from device * before regmap_irq_handler process the interrupts. * @handle_post_irq: Driver specific callback to handle interrupt from device * after handling the interrupts in regmap_irq_handler(). * @set_type_virt: Driver specific callback to extend regmap_irq_set_type() * and configure virt regs. + * @set_type_config: Callback used for configuring irq types. * @irq_drv_data: Driver specific IRQ data which is passed as parameter when * driver specific pre/post interrupt handler is called. * @@ -1532,6 +1536,7 @@ struct regmap_irq_chip { unsigned int wake_base; unsigned int type_base; unsigned int *virt_reg_base; + const unsigned int *config_base; unsigned int irq_reg_stride; unsigned int init_ack_masked:1; unsigned int mask_invert:1; @@ -1553,16 +1558,23 @@ struct regmap_irq_chip { int num_type_reg; int num_virt_regs; + int num_config_bases; + int num_config_regs; int (*handle_pre_irq)(void *irq_drv_data); int (*handle_post_irq)(void *irq_drv_data); int (*set_type_virt)(unsigned int **buf, unsigned int type, unsigned long hwirq, int reg); + int (*set_type_config)(unsigned int **buf, unsigned int type, + const struct regmap_irq *irq_data, int idx); void *irq_drv_data; }; struct regmap_irq_chip_data; +int regmap_irq_set_type_config_simple(unsigned int **buf, unsigned int type, + const struct regmap_irq *irq_data, int idx); + int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags, int irq_base, const struct regmap_irq_chip *chip, struct regmap_irq_chip_data **data); -- cgit v1.2.3 From 9edd4f5aee8470dcfd0db04005908f61fbfae8e0 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Thu, 23 Jun 2022 22:14:17 +0100 Subject: regmap-irq: Deprecate type registers and virtual registers Config registers can be used to replace both type and virtual registers, so mark both features are deprecated and issue a warning if they're used. Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20220623211420.918875-10-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 791c15ad1f63..c9a9bbbb6261 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1473,8 +1473,10 @@ struct regmap_irq_sub_irq_map { * @ack_base: Base ack address. If zero then the chip is clear on read. * Using zero value is possible with @use_ack bit. * @wake_base: Base address for wake enables. If zero unsupported. - * @type_base: Base address for irq type. If zero unsupported. - * @virt_reg_base: Base addresses for extra config regs. + * @type_base: Base address for irq type. If zero unsupported. Deprecated, + * use @config_base instead. + * @virt_reg_base: Base addresses for extra config regs. Deprecated, use + * @config_base instead. * @config_base: Base address for IRQ type config regs. If null unsupported. * @irq_reg_stride: Stride to use for chips where registers are not contiguous. * @init_ack_masked: Ack all masked interrupts once during initalization. @@ -1483,7 +1485,8 @@ struct regmap_irq_sub_irq_map { * @ack_invert: Inverted ack register: cleared bits for ack. * @clear_ack: Use this to set 1 and 0 or vice-versa to clear interrupts. * @wake_invert: Inverted wake register: cleared bits are wake enabled. - * @type_invert: Invert the type flags. + * @type_invert: Invert the type flags. Deprecated, use config registers + * instead. * @type_in_mask: Use the mask registers for controlling irq type. Use this if * the hardware provides separate bits for rising/falling edge * or low/high level interrupts and they should be combined into @@ -1502,9 +1505,11 @@ struct regmap_irq_sub_irq_map { * @irqs: Descriptors for individual IRQs. Interrupt numbers are * assigned based on the index in the array of the interrupt. * @num_irqs: Number of descriptors. - * @num_type_reg: Number of type registers. + * @num_type_reg: Number of type registers. Deprecated, use config registers + * instead. * @num_virt_regs: Number of non-standard irq configuration registers. - * If zero unsupported. + * If zero unsupported. Deprecated, use config registers + * instead. * @num_config_bases: Number of config base registers. * @num_config_regs: Number of config registers for each config base register. * @handle_pre_irq: Driver specific callback to handle interrupt from device @@ -1512,7 +1517,8 @@ struct regmap_irq_sub_irq_map { * @handle_post_irq: Driver specific callback to handle interrupt from device * after handling the interrupts in regmap_irq_handler(). * @set_type_virt: Driver specific callback to extend regmap_irq_set_type() - * and configure virt regs. + * and configure virt regs. Deprecated, use @set_type_config + * callback and config registers instead. * @set_type_config: Callback used for configuring irq types. * @irq_drv_data: Driver specific IRQ data which is passed as parameter when * driver specific pre/post interrupt handler is called. -- cgit v1.2.3 From e8ffb12e7f065db616a3eba79ff138bececf0825 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Thu, 23 Jun 2022 22:14:18 +0100 Subject: regmap-irq: Fix inverted handling of unmask registers To me "unmask" suggests that we write 1s to the register when an interrupt is enabled. This also makes sense because it's the opposite of what the "mask" register does (write 1s to disable an interrupt). But regmap-irq does the opposite: for a disabled interrupt, it writes 1s to "unmask" and 0s to "mask". This is surprising and deviates from the usual way mask registers are handled. Additionally, mask_invert didn't interact with unmask registers properly -- it caused them to be ignored entirely. Fix this by making mask and unmask registers orthogonal, using the following behavior: * Mask registers are written with 1s for disabled interrupts. * Unmask registers are written with 1s for enabled interrupts. This behavior supports both normal or inverted mask registers and separate set/clear registers via different combinations of mask_base/unmask_base. The old unmask register behavior is deprecated. Drivers need to opt-in to the new behavior by setting mask_unmask_non_inverted. Warnings are issued if the driver relies on deprecated behavior. Chips that only set one of mask_base/unmask_base don't have to use the mask_unmask_non_inverted flag because that use case was previously not supported. The mask_invert flag is also deprecated in favor of describing inverted mask registers as unmask registers. Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20220623211420.918875-11-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index c9a9bbbb6261..c9c9e39750ed 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1467,9 +1467,10 @@ struct regmap_irq_sub_irq_map { * main_status set. * * @status_base: Base status register address. - * @mask_base: Base mask register address. - * @unmask_base: Base unmask register address. for chips who have - * separate mask and unmask registers + * @mask_base: Base mask register address. Mask bits are set to 1 when an + * interrupt is masked, 0 when unmasked. + * @unmask_base: Base unmask register address. Unmask bits are set to 1 when + * an interrupt is unmasked and 0 when masked. * @ack_base: Base ack address. If zero then the chip is clear on read. * Using zero value is possible with @use_ack bit. * @wake_base: Base address for wake enables. If zero unsupported. @@ -1481,6 +1482,16 @@ struct regmap_irq_sub_irq_map { * @irq_reg_stride: Stride to use for chips where registers are not contiguous. * @init_ack_masked: Ack all masked interrupts once during initalization. * @mask_invert: Inverted mask register: cleared bits are masked out. + * Deprecated; prefer describing an inverted mask register as + * an unmask register. + * @mask_unmask_non_inverted: Controls mask bit inversion for chips that set + * both @mask_base and @unmask_base. If false, mask and unmask bits are + * inverted (which is deprecated behavior); if true, bits will not be + * inverted and the registers keep their normal behavior. Note that if + * you use only one of @mask_base or @unmask_base, this flag has no + * effect and is unnecessary. Any new drivers that set both @mask_base + * and @unmask_base should set this to true to avoid relying on the + * deprecated behavior. * @use_ack: Use @ack register even if it is zero. * @ack_invert: Inverted ack register: cleared bits for ack. * @clear_ack: Use this to set 1 and 0 or vice-versa to clear interrupts. @@ -1546,6 +1557,7 @@ struct regmap_irq_chip { unsigned int irq_reg_stride; unsigned int init_ack_masked:1; unsigned int mask_invert:1; + unsigned int mask_unmask_non_inverted:1; unsigned int use_ack:1; unsigned int ack_invert:1; unsigned int clear_ack:1; -- cgit v1.2.3 From bdf9b86cd3adbbcf590ab82b74ab8554534c9b6e Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Thu, 23 Jun 2022 22:14:19 +0100 Subject: regmap-irq: Add get_irq_reg() callback Replace the internal sub_irq_reg() function with a public callback that drivers can use when they have more complex register layouts. The default implementation is regmap_irq_get_irq_reg_linear(), used if the chip doesn't provide its own callback. Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20220623211420.918875-12-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index c9c9e39750ed..0d240922d4da 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1440,6 +1440,8 @@ struct regmap_irq_sub_irq_map { unsigned int *offset; }; +struct regmap_irq_chip_data; + /** * struct regmap_irq_chip - Description of a generic regmap irq_chip. * @@ -1531,6 +1533,13 @@ struct regmap_irq_sub_irq_map { * and configure virt regs. Deprecated, use @set_type_config * callback and config registers instead. * @set_type_config: Callback used for configuring irq types. + * @get_irq_reg: Callback for mapping (base register, index) pairs to register + * addresses. The base register will be one of @status_base, + * @mask_base, etc., @main_status, or any of @config_base. + * The index will be in the range [0, num_main_regs[ for the + * main status base, [0, num_type_settings[ for any config + * register base, and [0, num_regs[ for any other base. + * If unspecified then regmap_irq_get_irq_reg_linear() is used. * @irq_drv_data: Driver specific IRQ data which is passed as parameter when * driver specific pre/post interrupt handler is called. * @@ -1585,11 +1594,13 @@ struct regmap_irq_chip { unsigned long hwirq, int reg); int (*set_type_config)(unsigned int **buf, unsigned int type, const struct regmap_irq *irq_data, int idx); + unsigned int (*get_irq_reg)(struct regmap_irq_chip_data *data, + unsigned int base, int index); void *irq_drv_data; }; -struct regmap_irq_chip_data; - +unsigned int regmap_irq_get_irq_reg_linear(struct regmap_irq_chip_data *data, + unsigned int base, int index); int regmap_irq_set_type_config_simple(unsigned int **buf, unsigned int type, const struct regmap_irq *irq_data, int idx); -- cgit v1.2.3 From 48e014ee9a61e8f4700987b82f7cb1dc3c89fa76 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Thu, 23 Jun 2022 22:14:20 +0100 Subject: regmap-irq: Deprecate the not_fixed_stride flag This flag is a bit of a hack and the same thing can be accomplished using a custom ->get_irq_reg() callback. Add a warning to catch any use of the flag. Signed-off-by: Aidan MacDonald Link: https://lore.kernel.org/r/20220623211420.918875-13-aidanmacdonald.0x0@gmail.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 0d240922d4da..7cf2157134ac 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1509,8 +1509,10 @@ struct regmap_irq_chip_data; * registers before unmasking interrupts to clear any bits * set when they were masked. * @not_fixed_stride: Used when chip peripherals are not laid out with fixed - * stride. Must be used with sub_reg_offsets containing the - * offsets to each peripheral. + * stride. Must be used with sub_reg_offsets containing the + * offsets to each peripheral. Deprecated; the same thing + * can be accomplished with a @get_irq_reg callback, without + * the need for a @sub_reg_offsets table. * @status_invert: Inverted status register: cleared bits are active interrupts. * @runtime_pm: Hold a runtime PM lock on the device when accessing it. * -- cgit v1.2.3 From ae92b1c84306a68c361d90b46ad3acef99c10e29 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 28 Jun 2022 10:46:24 +0100 Subject: usb: typec_altmode: add a missing "@" at a kernel-doc parameter Without that, the parameter is not properly parsed: include/linux/usb/typec_altmode.h:132: warning: Function parameter or member 'altmode' not described in 'typec_altmode_get_orientation' Signed-off-by: Mauro Carvalho Chehab Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/70dc4c5d744cf1fe9a0efe6b85deaa0489628282.1656409369.git.mchehab@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_altmode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/typec_altmode.h b/include/linux/usb/typec_altmode.h index 65933cbe9129..350d49012659 100644 --- a/include/linux/usb/typec_altmode.h +++ b/include/linux/usb/typec_altmode.h @@ -124,7 +124,7 @@ struct typec_altmode *typec_match_altmode(struct typec_altmode **altmodes, /** * typec_altmode_get_orientation - Get cable plug orientation - * altmode: Handle to the alternate mode + * @altmode: Handle to the alternate mode */ static inline enum typec_orientation typec_altmode_get_orientation(struct typec_altmode *altmode) -- cgit v1.2.3 From af3f4134006bf3bf894179c08aaf98ed5938cf4e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jun 2022 10:43:04 -0700 Subject: bpf: add bpf_func_t and trampoline helpers I'll be adding lsm cgroup specific helpers that grab trampoline mutex. No functional changes. Reviewed-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220628174314.1216643-2-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d05e1495a06e..d547be9db75f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -56,6 +56,8 @@ typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, struct bpf_iter_aux_info *aux); typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); +typedef unsigned int (*bpf_func_t)(const void *, + const struct bpf_insn *); struct bpf_iter_seq_info { const struct seq_operations *seq_ops; bpf_iter_init_seq_priv_t init_seq_private; @@ -879,8 +881,7 @@ struct bpf_dispatcher { static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( const void *ctx, const struct bpf_insn *insnsi, - unsigned int (*bpf_func)(const void *, - const struct bpf_insn *)) + bpf_func_t bpf_func) { return bpf_func(ctx, insnsi); } @@ -909,8 +910,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ - unsigned int (*bpf_func)(const void *, \ - const struct bpf_insn *)) \ + bpf_func_t bpf_func) \ { \ return bpf_func(ctx, insnsi); \ } \ @@ -921,8 +921,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ - unsigned int (*bpf_func)(const void *, \ - const struct bpf_insn *)); \ + bpf_func_t bpf_func); \ extern struct bpf_dispatcher bpf_dispatcher_##name; #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) -- cgit v1.2.3 From 00442143a2ab7f1da46fbf4d2a99c85df767d49a Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jun 2022 10:43:05 -0700 Subject: bpf: convert cgroup_bpf.progs to hlist This lets us reclaim some space to be used by new cgroup lsm slots. Before: struct cgroup_bpf { struct bpf_prog_array * effective[23]; /* 0 184 */ /* --- cacheline 2 boundary (128 bytes) was 56 bytes ago --- */ struct list_head progs[23]; /* 184 368 */ /* --- cacheline 8 boundary (512 bytes) was 40 bytes ago --- */ u32 flags[23]; /* 552 92 */ /* XXX 4 bytes hole, try to pack */ /* --- cacheline 10 boundary (640 bytes) was 8 bytes ago --- */ struct list_head storages; /* 648 16 */ struct bpf_prog_array * inactive; /* 664 8 */ struct percpu_ref refcnt; /* 672 16 */ struct work_struct release_work; /* 688 32 */ /* size: 720, cachelines: 12, members: 7 */ /* sum members: 716, holes: 1, sum holes: 4 */ /* last cacheline: 16 bytes */ }; After: struct cgroup_bpf { struct bpf_prog_array * effective[23]; /* 0 184 */ /* --- cacheline 2 boundary (128 bytes) was 56 bytes ago --- */ struct hlist_head progs[23]; /* 184 184 */ /* --- cacheline 5 boundary (320 bytes) was 48 bytes ago --- */ u8 flags[23]; /* 368 23 */ /* XXX 1 byte hole, try to pack */ /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */ struct list_head storages; /* 392 16 */ struct bpf_prog_array * inactive; /* 408 8 */ struct percpu_ref refcnt; /* 416 16 */ struct work_struct release_work; /* 432 72 */ /* size: 504, cachelines: 8, members: 7 */ /* sum members: 503, holes: 1, sum holes: 1 */ /* last cacheline: 56 bytes */ }; Suggested-by: Jakub Sitnicki Reviewed-by: Jakub Sitnicki Reviewed-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220628174314.1216643-3-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf-cgroup-defs.h | 4 ++-- include/linux/bpf-cgroup.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index 695d1224a71b..5d268e76d8e6 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -47,8 +47,8 @@ struct cgroup_bpf { * have either zero or one element * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS */ - struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE]; - u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE]; + struct hlist_head progs[MAX_CGROUP_BPF_ATTACH_TYPE]; + u8 flags[MAX_CGROUP_BPF_ATTACH_TYPE]; /* list of cgroup shared storages */ struct list_head storages; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 669d96d074ad..6673acfbf2ef 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -95,7 +95,7 @@ struct bpf_cgroup_link { }; struct bpf_prog_list { - struct list_head node; + struct hlist_node node; struct bpf_prog *prog; struct bpf_cgroup_link *link; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; -- cgit v1.2.3 From 69fd337a975c7e690dfe49d9cb4fe5ba1e6db44e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jun 2022 10:43:06 -0700 Subject: bpf: per-cgroup lsm flavor Allow attaching to lsm hooks in the cgroup context. Attaching to per-cgroup LSM works exactly like attaching to other per-cgroup hooks. New BPF_LSM_CGROUP is added to trigger new mode; the actual lsm hook we attach to is signaled via existing attach_btf_id. For the hooks that have 'struct socket' or 'struct sock' as its first argument, we use the cgroup associated with that socket. For the rest, we use 'current' cgroup (this is all on default hierarchy == v2 only). Note that for some hooks that work on 'struct sock' we still take the cgroup from 'current' because some of them work on the socket that hasn't been properly initialized yet. Behind the scenes, we allocate a shim program that is attached to the trampoline and runs cgroup effective BPF programs array. This shim has some rudimentary ref counting and can be shared between several programs attaching to the same lsm hook from different cgroups. Note that this patch bloats cgroup size because we add 211 cgroup_bpf_attach_type(s) for simplicity sake. This will be addressed in the subsequent patch. Also note that we only add non-sleepable flavor for now. To enable sleepable use-cases, bpf_prog_run_array_cg has to grab trace rcu, shim programs have to be freed via trace rcu, cgroup_bpf.effective should be also trace-rcu-managed + maybe some other changes that I'm not aware of. Reviewed-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220628174314.1216643-4-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf-cgroup-defs.h | 8 ++++++++ include/linux/bpf-cgroup.h | 7 +++++++ include/linux/bpf.h | 24 ++++++++++++++++++++++++ include/linux/bpf_lsm.h | 13 +++++++++++++ include/linux/btf_ids.h | 3 ++- 5 files changed, 54 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index 5d268e76d8e6..b99f8c3e37ea 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -10,6 +10,12 @@ struct bpf_prog_array; +#ifdef CONFIG_BPF_LSM +#define CGROUP_LSM_NUM 211 /* will be addressed in the next patch */ +#else +#define CGROUP_LSM_NUM 0 +#endif + enum cgroup_bpf_attach_type { CGROUP_BPF_ATTACH_TYPE_INVALID = -1, CGROUP_INET_INGRESS = 0, @@ -35,6 +41,8 @@ enum cgroup_bpf_attach_type { CGROUP_INET4_GETSOCKNAME, CGROUP_INET6_GETSOCKNAME, CGROUP_INET_SOCK_RELEASE, + CGROUP_LSM_START, + CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, MAX_CGROUP_BPF_ATTACH_TYPE }; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 6673acfbf2ef..2bd1b5f8de9b 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -23,6 +23,13 @@ struct ctl_table; struct ctl_table_header; struct task_struct; +unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx, + const struct bpf_insn *insn); +unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx, + const struct bpf_insn *insn); +unsigned int __cgroup_bpf_run_lsm_current(const void *ctx, + const struct bpf_insn *insn); + #ifdef CONFIG_CGROUP_BPF #define CGROUP_ATYPE(type) \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d547be9db75f..77cd613a00bd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -794,6 +794,10 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_ u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx); +u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); @@ -1060,6 +1064,7 @@ struct bpf_prog_aux { struct user_struct *user; u64 load_time; /* ns since boottime */ u32 verified_insns; + int cgroup_atype; /* enum cgroup_bpf_attach_type */ struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; #ifdef CONFIG_SECURITY @@ -1167,6 +1172,11 @@ struct bpf_tramp_link { u64 cookie; }; +struct bpf_shim_tramp_link { + struct bpf_tramp_link link; + struct bpf_trampoline *trampoline; +}; + struct bpf_tracing_link { struct bpf_tramp_link link; enum bpf_attach_type attach_type; @@ -1245,6 +1255,9 @@ struct bpf_dummy_ops { int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); #endif +int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, + int cgroup_atype); +void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog); #else static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) { @@ -1268,6 +1281,14 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, { return -EINVAL; } +static inline int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, + int cgroup_atype) +{ + return -EOPNOTSUPP; +} +static inline void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog) +{ +} #endif struct bpf_array { @@ -2368,6 +2389,8 @@ extern const struct bpf_func_proto bpf_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; +extern const struct bpf_func_proto bpf_set_retval_proto; +extern const struct bpf_func_proto bpf_get_retval_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); @@ -2485,6 +2508,7 @@ int bpf_arch_text_invalidate(void *dst, size_t len); struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); +int btf_id_set_index(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 479c101546ad..61787a5f6af9 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -42,6 +42,9 @@ extern const struct bpf_func_proto bpf_inode_storage_get_proto; extern const struct bpf_func_proto bpf_inode_storage_delete_proto; void bpf_inode_storage_free(struct inode *inode); +int bpf_lsm_hook_idx(u32 btf_id); +void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func); + #else /* !CONFIG_BPF_LSM */ static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) @@ -65,6 +68,16 @@ static inline void bpf_inode_storage_free(struct inode *inode) { } +static inline void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, + bpf_func_t *bpf_func) +{ +} + +static inline int bpf_lsm_hook_idx(u32 btf_id) +{ + return -EINVAL; +} + #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 335a19092368..252a4befeab1 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -179,7 +179,8 @@ extern struct btf_id_set name; BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock) + BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCKET, socket) enum { #define BTF_SOCK_TYPE(name, str) name, -- cgit v1.2.3 From c0e19f2c9a3edd38e4b1bdae98eb44555d02bc31 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jun 2022 10:43:07 -0700 Subject: bpf: minimize number of allocated lsm slots per program Previous patch adds 1:1 mapping between all 211 LSM hooks and bpf_cgroup program array. Instead of reserving a slot per possible hook, reserve 10 slots per cgroup for lsm programs. Those slots are dynamically allocated on demand and reclaimed. struct cgroup_bpf { struct bpf_prog_array * effective[33]; /* 0 264 */ /* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */ struct hlist_head progs[33]; /* 264 264 */ /* --- cacheline 8 boundary (512 bytes) was 16 bytes ago --- */ u8 flags[33]; /* 528 33 */ /* XXX 7 bytes hole, try to pack */ struct list_head storages; /* 568 16 */ /* --- cacheline 9 boundary (576 bytes) was 8 bytes ago --- */ struct bpf_prog_array * inactive; /* 584 8 */ struct percpu_ref refcnt; /* 592 16 */ struct work_struct release_work; /* 608 72 */ /* size: 680, cachelines: 11, members: 7 */ /* sum members: 673, holes: 1, sum holes: 7 */ /* last cacheline: 40 bytes */ }; Reviewed-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220628174314.1216643-5-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf-cgroup-defs.h | 3 ++- include/linux/bpf.h | 9 ++++++++- include/linux/bpf_lsm.h | 6 ------ 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index b99f8c3e37ea..7b121bd780eb 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -11,7 +11,8 @@ struct bpf_prog_array; #ifdef CONFIG_BPF_LSM -#define CGROUP_LSM_NUM 211 /* will be addressed in the next patch */ +/* Maximum number of concurrently attachable per-cgroup LSM hooks. */ +#define CGROUP_LSM_NUM 10 #else #define CGROUP_LSM_NUM 0 #endif diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 77cd613a00bd..5d2afa55c7c3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2508,7 +2508,6 @@ int bpf_arch_text_invalidate(void *dst, size_t len); struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); -int btf_id_set_index(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 @@ -2545,4 +2544,12 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); int bpf_dynptr_check_size(u32 size); +#ifdef CONFIG_BPF_LSM +void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype); +void bpf_cgroup_atype_put(int cgroup_atype); +#else +static inline void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype) {} +static inline void bpf_cgroup_atype_put(int cgroup_atype) {} +#endif /* CONFIG_BPF_LSM */ + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 61787a5f6af9..4bcf76a9bb06 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -42,7 +42,6 @@ extern const struct bpf_func_proto bpf_inode_storage_get_proto; extern const struct bpf_func_proto bpf_inode_storage_delete_proto; void bpf_inode_storage_free(struct inode *inode); -int bpf_lsm_hook_idx(u32 btf_id); void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func); #else /* !CONFIG_BPF_LSM */ @@ -73,11 +72,6 @@ static inline void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, { } -static inline int bpf_lsm_hook_idx(u32 btf_id) -{ - return -EINVAL; -} - #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ -- cgit v1.2.3 From 9113d7e48e9128522b9f5a54dfd30dff10509a92 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jun 2022 10:43:09 -0700 Subject: bpf: expose bpf_{g,s}etsockopt to lsm cgroup I don't see how to make it nice without introducing btf id lists for the hooks where these helpers are allowed. Some LSM hooks work on the locked sockets, some are triggering early and don't grab any locks, so have two lists for now: 1. LSM hooks which trigger under socket lock - minority of the hooks, but ideal case for us, we can expose existing BTF-based helpers 2. LSM hooks which trigger without socket lock, but they trigger early in the socket creation path where it should be safe to do setsockopt without any locks 3. The rest are prohibited. I'm thinking that this use-case might be a good gateway to sleeping lsm cgroup hooks in the future. We can either expose lock/unlock operations (and add tracking to the verifier) or have another set of bpf_setsockopt wrapper that grab the locks and might sleep. Reviewed-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220628174314.1216643-7-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5d2afa55c7c3..2b21f2a3452f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2386,6 +2386,8 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; +extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto; +extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; -- cgit v1.2.3 From f163f0302ab69722c052519f4014814bf10026a9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:21 +0200 Subject: context_tracking: Rename context_tracking_user_enter/exit() to user_enter/exit_callable() context_tracking_user_enter() and context_tracking_user_exit() are ASM callable versions of user_enter() and user_exit() for architectures that didn't manage to check the context tracking static key from ASM. Change those function names to better reflect their purpose. [ frederic: Apply Max Filippov feedback. ] Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/context_tracking.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 773035124bad..69532cd18f72 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -19,8 +19,8 @@ extern void __ct_user_exit(enum ctx_state state); extern void context_tracking_enter(enum ctx_state state); extern void context_tracking_exit(enum ctx_state state); -extern void context_tracking_user_enter(void); -extern void context_tracking_user_exit(void); +extern void user_enter_callable(void); +extern void user_exit_callable(void); static inline void user_enter(void) { -- cgit v1.2.3 From fe98db1c6d1ad7349e61a5a2766ad64975bc9ae4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:22 +0200 Subject: context_tracking: Rename context_tracking_enter/exit() to ct_user_enter/exit() context_tracking_enter() and context_tracking_exit() have confusing names that don't explain the fact they are referring to user/guest state. Use more self-explanatory names and shrink to the new context tracking prefix instead. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/context_tracking.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 69532cd18f72..7a5f04ae1758 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -17,21 +17,22 @@ extern void context_tracking_cpu_set(int cpu); extern void __ct_user_enter(enum ctx_state state); extern void __ct_user_exit(enum ctx_state state); -extern void context_tracking_enter(enum ctx_state state); -extern void context_tracking_exit(enum ctx_state state); +extern void ct_user_enter(enum ctx_state state); +extern void ct_user_exit(enum ctx_state state); + extern void user_enter_callable(void); extern void user_exit_callable(void); static inline void user_enter(void) { if (context_tracking_enabled()) - context_tracking_enter(CONTEXT_USER); + ct_user_enter(CONTEXT_USER); } static inline void user_exit(void) { if (context_tracking_enabled()) - context_tracking_exit(CONTEXT_USER); + ct_user_exit(CONTEXT_USER); } /* Called with interrupts disabled. */ @@ -57,7 +58,7 @@ static inline enum ctx_state exception_enter(void) prev_ctx = this_cpu_read(context_tracking.state); if (prev_ctx != CONTEXT_KERNEL) - context_tracking_exit(prev_ctx); + ct_user_exit(prev_ctx); return prev_ctx; } @@ -67,7 +68,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) && context_tracking_enabled()) { if (prev_ctx != CONTEXT_KERNEL) - context_tracking_enter(prev_ctx); + ct_user_enter(prev_ctx); } } -- cgit v1.2.3 From 2a0aafce963dab996b843f6cdb49237b0ae4a118 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:23 +0200 Subject: context_tracking: Rename context_tracking_cpu_set() to ct_cpu_track_user() context_tracking_cpu_set() is called in order to tell a CPU to track user/kernel transitions. Since context tracking is going to expand in to also track transitions from/to idle/IRQ/NMIs, the scope of this function name becomes too broad and needs to be made more specific. Also shorten the prefix to align with the new namespace. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/context_tracking.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 7a5f04ae1758..63259fece7c7 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -11,7 +11,7 @@ #ifdef CONFIG_CONTEXT_TRACKING -extern void context_tracking_cpu_set(int cpu); +extern void ct_cpu_track_user(int cpu); /* Called with interrupts disabled. */ extern void __ct_user_enter(enum ctx_state state); -- cgit v1.2.3 From 24a9c54182b3758801b8ca6c8c237cc2ff654732 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:24 +0200 Subject: context_tracking: Split user tracking Kconfig Context tracking is going to be used not only to track user transitions but also idle/IRQs/NMIs. The user tracking part will then become a separate feature. Prepare Kconfig for that. [ frederic: Apply Max Filippov feedback. ] Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/context_tracking.h | 12 ++++++------ include/linux/context_tracking_state.h | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 63259fece7c7..e35ae66b4794 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -10,7 +10,7 @@ #include -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER extern void ct_cpu_track_user(int cpu); /* Called with interrupts disabled. */ @@ -52,7 +52,7 @@ static inline enum ctx_state exception_enter(void) { enum ctx_state prev_ctx; - if (IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) || + if (IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK) || !context_tracking_enabled()) return 0; @@ -65,7 +65,7 @@ static inline enum ctx_state exception_enter(void) static inline void exception_exit(enum ctx_state prev_ctx) { - if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) && + if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK) && context_tracking_enabled()) { if (prev_ctx != CONTEXT_KERNEL) ct_user_enter(prev_ctx); @@ -109,14 +109,14 @@ static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; } static __always_inline bool context_tracking_guest_enter(void) { return false; } static inline void context_tracking_guest_exit(void) { } -#endif /* !CONFIG_CONTEXT_TRACKING */ +#endif /* !CONFIG_CONTEXT_TRACKING_USER */ #define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond)) -#ifdef CONFIG_CONTEXT_TRACKING_FORCE +#ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE extern void context_tracking_init(void); #else static inline void context_tracking_init(void) { } -#endif /* CONFIG_CONTEXT_TRACKING_FORCE */ +#endif /* CONFIG_CONTEXT_TRACKING_USER_FORCE */ #endif diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index edc7b46376a6..2b46afe105a9 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -22,7 +22,7 @@ struct context_tracking { } state; }; -#ifdef CONFIG_CONTEXT_TRACKING +#ifdef CONFIG_CONTEXT_TRACKING_USER extern struct static_key_false context_tracking_key; DECLARE_PER_CPU(struct context_tracking, context_tracking); @@ -45,6 +45,6 @@ static inline bool context_tracking_enabled_this_cpu(void) static __always_inline bool context_tracking_enabled(void) { return false; } static __always_inline bool context_tracking_enabled_cpu(int cpu) { return false; } static __always_inline bool context_tracking_enabled_this_cpu(void) { return false; } -#endif /* CONFIG_CONTEXT_TRACKING */ +#endif /* CONFIG_CONTEXT_TRACKING_USER */ #endif -- cgit v1.2.3 From 5f278dbd540b7548bc5193552e6d478255c14c2d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 28 Jun 2022 12:10:15 -0700 Subject: iosys-map: Add per-word read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of always falling back to memcpy_fromio() for any size, prefer using read{b,w,l}(). When reading struct members it's common to read individual integer variables individually. Going through memcpy_fromio() for each of them poses a high penalty. Employ a similar trick as __seqprop() by using _Generic() to generate only the specific call based on a type-compatible variable. For a pariticular i915 workload producing GPU context switches, __get_engine_usage_record() is particularly hot since the engine usage is read from device local memory with dgfx, possibly multiple times since it's racy. Test execution time for this test shows a ~12.5% improvement with DG2: Before: nrepeats = 1000; min = 7.63243e+06; max = 1.01817e+07; median = 9.52548e+06; var = 526149; After: nrepeats = 1000; min = 7.03402e+06; max = 8.8832e+06; median = 8.33955e+06; var = 333113; Other things attempted that didn't prove very useful: 1) Change the _Generic() on x86 to just dereference the memory address 2) Change __get_engine_usage_record() to do just 1 read per loop, comparing with the previous value read 3) Change __get_engine_usage_record() to access the fields directly as it was before the conversion to iosys-map (3) did gave a small improvement (~3%), but doesn't seem to scale well to other similar cases in the driver. Additional test by Chris Wilson using gem_create from igt with some changes to track object creation time. This happens to accidentally stress this code path: Pre iosys_map conversion of engine busyness: lmem0: Creating 262144 4KiB objects took 59274.2ms Unpatched: lmem0: Creating 262144 4KiB objects took 108830.2ms With readl (this patch): lmem0: Creating 262144 4KiB objects took 61348.6ms s/readl/READ_ONCE/ lmem0: Creating 262144 4KiB objects took 61333.2ms So we do take a little bit more time than before the conversion, but that is due to other factors: bringing the READ_ONCE back would be as good as just doing this conversion. v2: - Remove default from _Generic() - callers wanting to read more than u64 should use iosys_map_memcpy_from() - Add READ_ONCE() cases dereferencing the pointer when using system memory v3: - Fix precedence issue when casting inside READ_ONCE(). By not using () around vaddr__ the offset was not part of the cast, but rather added to it, producing a wrong address - Remove compiletime_assert() as READ_ONCE() already contains it Signed-off-by: Lucas De Marchi Reviewed-by: Christian König # v1 Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220628191016.3899428-1-lucas.demarchi@intel.com --- include/linux/iosys-map.h | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index 4b8406ee8bc4..d19977e011ae 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -6,6 +6,7 @@ #ifndef __IOSYS_MAP_H__ #define __IOSYS_MAP_H__ +#include #include #include @@ -333,6 +334,23 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, memset(dst->vaddr + offset, value, len); } +#ifdef CONFIG_64BIT +#define __iosys_map_rd_io_u64_case(val_, vaddr_iomem_) \ + u64: val_ = readq(vaddr_iomem_) +#else +#define __iosys_map_rd_io_u64_case(val_, vaddr_iomem_) \ + u64: memcpy_fromio(&(val_), vaddr_iomem_, sizeof(u64)) +#endif + +#define __iosys_map_rd_io(val__, vaddr_iomem__, type__) _Generic(val__, \ + u8: val__ = readb(vaddr_iomem__), \ + u16: val__ = readw(vaddr_iomem__), \ + u32: val__ = readl(vaddr_iomem__), \ + __iosys_map_rd_io_u64_case(val__, vaddr_iomem__)) + +#define __iosys_map_rd_sys(val__, vaddr__, type__) \ + val__ = READ_ONCE(*(type__ *)(vaddr__)) + /** * iosys_map_rd - Read a C-type value from the iosys_map * @@ -340,16 +358,21 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * @offset__: The offset from which to read * @type__: Type of the value being read * - * Read a C type value from iosys_map, handling possible un-aligned accesses to - * the mapping. + * Read a C type value (u8, u16, u32 and u64) from iosys_map. For other types or + * if pointer may be unaligned (and problematic for the architecture supported), + * use iosys_map_memcpy_from(). * * Returns: * The value read from the mapping. */ -#define iosys_map_rd(map__, offset__, type__) ({ \ - type__ val; \ - iosys_map_memcpy_from(&val, map__, offset__, sizeof(val)); \ - val; \ +#define iosys_map_rd(map__, offset__, type__) ({ \ + type__ val; \ + if ((map__)->is_iomem) { \ + __iosys_map_rd_io(val, (map__)->vaddr_iomem + (offset__), type__);\ + } else { \ + __iosys_map_rd_sys(val, (map__)->vaddr + (offset__), type__); \ + } \ + val; \ }) /** @@ -379,9 +402,10 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * * Read a value from iosys_map considering its layout is described by a C struct * starting at @struct_offset__. The field offset and size is calculated and its - * value read handling possible un-aligned memory accesses. For example: suppose - * there is a @struct foo defined as below and the value ``foo.field2.inner2`` - * needs to be read from the iosys_map: + * value read. If the field access would incur in un-aligned access, then either + * iosys_map_memcpy_from() needs to be used or the architecture must support it. + * For example: suppose there is a @struct foo defined as below and the value + * ``foo.field2.inner2`` needs to be read from the iosys_map: * * .. code-block:: c * -- cgit v1.2.3 From 6fb5ee7cec06266a29f25ecc01a23b9d107f64e1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 28 Jun 2022 12:10:16 -0700 Subject: iosys-map: Add per-word write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Like was done for read, provide the equivalent for write. Even if current users are not in the hot path, this should future-proof it. v2: - Remove default from _Generic() - callers wanting to write more than u64 should use iosys_map_memcpy_to() - Add WRITE_ONCE() cases dereferencing the pointer when using system memory v3: - Fix precedence issue when casting inside WRITE_ONCE(). By not using () around vaddr__ the offset was not part of the cast, but rather added to it, producing a wrong address - Remove compiletime_assert() as WRITE_ONCE() already contains it Signed-off-by: Lucas De Marchi Reviewed-by: Reviewed-by: Christian König # v1 Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20220628191016.3899428-2-lucas.demarchi@intel.com --- include/linux/iosys-map.h | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index d19977e011ae..a533cae189d7 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -337,9 +337,13 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, #ifdef CONFIG_64BIT #define __iosys_map_rd_io_u64_case(val_, vaddr_iomem_) \ u64: val_ = readq(vaddr_iomem_) +#define __iosys_map_wr_io_u64_case(val_, vaddr_iomem_) \ + u64: writeq(val_, vaddr_iomem_) #else #define __iosys_map_rd_io_u64_case(val_, vaddr_iomem_) \ u64: memcpy_fromio(&(val_), vaddr_iomem_, sizeof(u64)) +#define __iosys_map_wr_io_u64_case(val_, vaddr_iomem_) \ + u64: memcpy_toio(vaddr_iomem_, &(val_), sizeof(u64)) #endif #define __iosys_map_rd_io(val__, vaddr_iomem__, type__) _Generic(val__, \ @@ -351,6 +355,15 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, #define __iosys_map_rd_sys(val__, vaddr__, type__) \ val__ = READ_ONCE(*(type__ *)(vaddr__)) +#define __iosys_map_wr_io(val__, vaddr_iomem__, type__) _Generic(val__, \ + u8: writeb(val__, vaddr_iomem__), \ + u16: writew(val__, vaddr_iomem__), \ + u32: writel(val__, vaddr_iomem__), \ + __iosys_map_wr_io_u64_case(val__, vaddr_iomem__)) + +#define __iosys_map_wr_sys(val__, vaddr__, type__) \ + WRITE_ONCE(*(type__ *)(vaddr__), val__) + /** * iosys_map_rd - Read a C-type value from the iosys_map * @@ -383,12 +396,17 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * @type__: Type of the value being written * @val__: Value to write * - * Write a C-type value to the iosys_map, handling possible un-aligned accesses - * to the mapping. + * Write a C type value (u8, u16, u32 and u64) to the iosys_map. For other types + * or if pointer may be unaligned (and problematic for the architecture + * supported), use iosys_map_memcpy_to() */ -#define iosys_map_wr(map__, offset__, type__, val__) ({ \ - type__ val = (val__); \ - iosys_map_memcpy_to(map__, offset__, &val, sizeof(val)); \ +#define iosys_map_wr(map__, offset__, type__, val__) ({ \ + type__ val = (val__); \ + if ((map__)->is_iomem) { \ + __iosys_map_wr_io(val, (map__)->vaddr_iomem + (offset__), type__);\ + } else { \ + __iosys_map_wr_sys(val, (map__)->vaddr + (offset__), type__); \ + } \ }) /** @@ -469,10 +487,12 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * @field__: Member of the struct to read * @val__: Value to write * - * Write a value to the iosys_map considering its layout is described by a C struct - * starting at @struct_offset__. The field offset and size is calculated and the - * @val__ is written handling possible un-aligned memory accesses. Refer to - * iosys_map_rd_field() for expected usage and memory layout. + * Write a value to the iosys_map considering its layout is described by a C + * struct starting at @struct_offset__. The field offset and size is calculated + * and the @val__ is written. If the field access would incur in un-aligned + * access, then either iosys_map_memcpy_to() needs to be used or the + * architecture must support it. Refer to iosys_map_rd_field() for expected + * usage and memory layout. */ #define iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__) ({ \ struct_type__ *s; \ -- cgit v1.2.3 From c381d02b2fd5f82d2207db1b9b25ff60d0d9c27c Mon Sep 17 00:00:00 2001 From: Yuwei Wang Date: Wed, 29 Jun 2022 08:48:31 +0000 Subject: sysctl: add proc_dointvec_ms_jiffies_minmax add proc_dointvec_ms_jiffies_minmax to fit read msecs value to jiffies with a limited range of values Signed-off-by: Yuwei Wang Signed-off-by: Paolo Abeni --- include/linux/sysctl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 80263f7cdb77..17b42ce89d3e 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -75,6 +75,8 @@ int proc_douintvec_minmax(struct ctl_table *table, int write, void *buffer, int proc_dou8vec_minmax(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int proc_dointvec_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec_ms_jiffies(struct ctl_table *, int, void *, size_t *, -- cgit v1.2.3 From 95c8222f0e52b09b7607616274e7cae84d519a9b Mon Sep 17 00:00:00 2001 From: David Jander Date: Wed, 29 Jun 2022 16:25:18 +0200 Subject: spi: spi.c: Fix comment style Capitalize first word in comment where appropriate and add parentheses to function names. Reported-by: Andy Shevchenko Signed-off-by: David Jander Link: https://lore.kernel.org/r/20220629142519.3985486-3-david@protonic.nl Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 82 ++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index eb0d316e3c36..e6c73d5ff1a8 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -176,13 +176,13 @@ extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer); struct spi_device { struct device dev; struct spi_controller *controller; - struct spi_controller *master; /* compatibility layer */ + struct spi_controller *master; /* Compatibility layer */ u32 max_speed_hz; u8 chip_select; u8 bits_per_word; bool rt; -#define SPI_NO_TX BIT(31) /* no transmit wire */ -#define SPI_NO_RX BIT(30) /* no receive wire */ +#define SPI_NO_TX BIT(31) /* No transmit wire */ +#define SPI_NO_RX BIT(30) /* No receive wire */ /* * All bits defined above should be covered by SPI_MODE_KERNEL_MASK. * The SPI_MODE_KERNEL_MASK has the SPI_MODE_USER_MASK counterpart, @@ -199,14 +199,14 @@ struct spi_device { void *controller_data; char modalias[SPI_NAME_SIZE]; const char *driver_override; - struct gpio_desc *cs_gpiod; /* chip select gpio desc */ - struct spi_delay word_delay; /* inter-word delay */ + struct gpio_desc *cs_gpiod; /* Chip select gpio desc */ + struct spi_delay word_delay; /* Inter-word delay */ /* CS delays */ struct spi_delay cs_setup; struct spi_delay cs_hold; struct spi_delay cs_inactive; - /* the statistics */ + /* The statistics */ struct spi_statistics __percpu *pcpu_statistics; /* @@ -228,7 +228,7 @@ static inline struct spi_device *to_spi_device(struct device *dev) return dev ? container_of(dev, struct spi_device, dev) : NULL; } -/* most drivers won't need to care about device refcounting */ +/* Most drivers won't need to care about device refcounting */ static inline struct spi_device *spi_dev_get(struct spi_device *spi) { return (spi && get_device(&spi->dev)) ? spi : NULL; @@ -251,7 +251,7 @@ static inline void spi_set_ctldata(struct spi_device *spi, void *state) spi->controller_state = state; } -/* device driver data */ +/* Device driver data */ static inline void spi_set_drvdata(struct spi_device *spi, void *data) { @@ -318,7 +318,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 chip_select); -/* use a define to avoid include chaining to get THIS_MODULE */ +/* Use a define to avoid include chaining to get THIS_MODULE */ #define spi_register_driver(driver) \ __spi_register_driver(THIS_MODULE, driver) @@ -486,7 +486,7 @@ struct spi_controller { struct list_head list; - /* other than negative (== assign one dynamically), bus_num is fully + /* Other than negative (== assign one dynamically), bus_num is fully * board-specific. usually that simplifies to being SOC-specific. * example: one SOC has three SPI controllers, numbered 0..2, * and one board's schematics might show it using SPI-2. software @@ -499,7 +499,7 @@ struct spi_controller { */ u16 num_chipselect; - /* some SPI controllers pose alignment requirements on DMAable + /* Some SPI controllers pose alignment requirements on DMAable * buffers; let protocol drivers know about these requirements. */ u16 dma_alignment; @@ -510,29 +510,29 @@ struct spi_controller { /* spi_device.mode flags override flags for this controller */ u32 buswidth_override_bits; - /* bitmask of supported bits_per_word for transfers */ + /* Bitmask of supported bits_per_word for transfers */ u32 bits_per_word_mask; #define SPI_BPW_MASK(bits) BIT((bits) - 1) #define SPI_BPW_RANGE_MASK(min, max) GENMASK((max) - 1, (min) - 1) - /* limits on transfer speed */ + /* Limits on transfer speed */ u32 min_speed_hz; u32 max_speed_hz; - /* other constraints relevant to this driver */ + /* Other constraints relevant to this driver */ u16 flags; -#define SPI_CONTROLLER_HALF_DUPLEX BIT(0) /* can't do full duplex */ -#define SPI_CONTROLLER_NO_RX BIT(1) /* can't do buffer read */ -#define SPI_CONTROLLER_NO_TX BIT(2) /* can't do buffer write */ -#define SPI_CONTROLLER_MUST_RX BIT(3) /* requires rx */ -#define SPI_CONTROLLER_MUST_TX BIT(4) /* requires tx */ +#define SPI_CONTROLLER_HALF_DUPLEX BIT(0) /* Can't do full duplex */ +#define SPI_CONTROLLER_NO_RX BIT(1) /* Can't do buffer read */ +#define SPI_CONTROLLER_NO_TX BIT(2) /* Can't do buffer write */ +#define SPI_CONTROLLER_MUST_RX BIT(3) /* Requires rx */ +#define SPI_CONTROLLER_MUST_TX BIT(4) /* Requires tx */ #define SPI_MASTER_GPIO_SS BIT(5) /* GPIO CS must select slave */ - /* flag indicating if the allocation of this struct is devres-managed */ + /* Flag indicating if the allocation of this struct is devres-managed */ bool devm_allocated; - /* flag indicating this is an SPI slave controller */ + /* Flag indicating this is an SPI slave controller */ bool slave; /* @@ -548,11 +548,11 @@ struct spi_controller { /* Used to avoid adding the same CS twice */ struct mutex add_lock; - /* lock and mutex for SPI bus locking */ + /* Lock and mutex for SPI bus locking */ spinlock_t bus_lock_spinlock; struct mutex bus_lock_mutex; - /* flag indicating that the SPI bus is locked for exclusive use */ + /* Flag indicating that the SPI bus is locked for exclusive use */ bool bus_lock_flag; /* Setup mode and clock, etc (spi driver may call many times). @@ -573,7 +573,7 @@ struct spi_controller { */ int (*set_cs_timing)(struct spi_device *spi); - /* bidirectional bulk transfers + /* Bidirectional bulk transfers * * + The transfer() method may not sleep; its main role is * just to add the message to the queue. @@ -595,7 +595,7 @@ struct spi_controller { int (*transfer)(struct spi_device *spi, struct spi_message *mesg); - /* called on release() to free memory provided by spi_controller */ + /* Called on release() to free memory provided by spi_controller */ void (*cleanup)(struct spi_device *spi); /* @@ -666,14 +666,14 @@ struct spi_controller { s8 unused_native_cs; s8 max_native_cs; - /* statistics */ + /* Statistics */ struct spi_statistics __percpu *pcpu_statistics; /* DMA channels for use with core dmaengine helpers */ struct dma_chan *dma_tx; struct dma_chan *dma_rx; - /* dummy data for full duplex devices */ + /* Dummy data for full duplex devices */ void *dummy_rx; void *dummy_tx; @@ -738,7 +738,7 @@ void spi_take_timestamp_post(struct spi_controller *ctlr, struct spi_transfer *xfer, size_t progress, bool irqs_off); -/* the spi driver core manages memory for the spi_controller classdev */ +/* The spi driver core manages memory for the spi_controller classdev */ extern struct spi_controller *__spi_alloc_controller(struct device *host, unsigned int size, bool slave); @@ -808,7 +808,7 @@ typedef void (*spi_res_release_t)(struct spi_controller *ctlr, struct spi_res { struct list_head entry; spi_res_release_t release; - unsigned long long data[]; /* guarantee ull alignment */ + unsigned long long data[]; /* Guarantee ull alignment */ }; /*---------------------------------------------------------------------------*/ @@ -941,7 +941,7 @@ struct spi_res { * and its transfers, ignore them until its completion callback. */ struct spi_transfer { - /* it's ok if tx_buf == rx_buf (right?) + /* It's ok if tx_buf == rx_buf (right?) * for MicroWire, one buffer must be null * buffers must work with dma_*map_single() calls, unless * spi_message.is_dma_mapped reports a pre-existing mapping @@ -1032,24 +1032,24 @@ struct spi_message { * tell them about such special cases. */ - /* completion is reported through a callback */ + /* Completion is reported through a callback */ void (*complete)(void *context); void *context; unsigned frame_length; unsigned actual_length; int status; - /* for optional use by whatever driver currently owns the + /* For optional use by whatever driver currently owns the * spi_message ... between calls to spi_async and then later * complete(), that's the spi_controller controller driver. */ struct list_head queue; void *state; - /* list of spi_res reources when the spi message is processed */ + /* List of spi_res reources when the spi message is processed */ struct list_head resources; - /* spi_prepare_message was called for this message */ + /* spi_prepare_message() was called for this message */ bool prepared; }; @@ -1154,7 +1154,7 @@ spi_max_transfer_size(struct spi_device *spi) if (ctlr->max_transfer_size) tr_max = ctlr->max_transfer_size(spi); - /* transfer size limit must not be greater than message size limit */ + /* Transfer size limit must not be greater than message size limit */ return min(tr_max, msg_max); } @@ -1305,7 +1305,7 @@ spi_read(struct spi_device *spi, void *buf, size_t len) return spi_sync_transfer(spi, &t, 1); } -/* this copies txbuf and rxbuf data; for small transfers only! */ +/* This copies txbuf and rxbuf data; for small transfers only! */ extern int spi_write_then_read(struct spi_device *spi, const void *txbuf, unsigned n_tx, void *rxbuf, unsigned n_rx); @@ -1328,7 +1328,7 @@ static inline ssize_t spi_w8r8(struct spi_device *spi, u8 cmd) status = spi_write_then_read(spi, &cmd, 1, &result, 1); - /* return negative errno or unsigned value */ + /* Return negative errno or unsigned value */ return (status < 0) ? status : result; } @@ -1353,7 +1353,7 @@ static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd) status = spi_write_then_read(spi, &cmd, 1, &result, 2); - /* return negative errno or unsigned value */ + /* Return negative errno or unsigned value */ return (status < 0) ? status : result; } @@ -1433,7 +1433,7 @@ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd) * are active in some dynamic board configuration models. */ struct spi_board_info { - /* the device name and module name are coupled, like platform_bus; + /* The device name and module name are coupled, like platform_bus; * "modalias" is normally the driver name. * * platform_data goes to spi_device.dev.platform_data, @@ -1446,7 +1446,7 @@ struct spi_board_info { void *controller_data; int irq; - /* slower signaling on noisy or low voltage boards */ + /* Slower signaling on noisy or low voltage boards */ u32 max_speed_hz; @@ -1475,7 +1475,7 @@ struct spi_board_info { extern int spi_register_board_info(struct spi_board_info const *info, unsigned n); #else -/* board init code may ignore whether SPI is configured or not */ +/* Board init code may ignore whether SPI is configured or not */ static inline int spi_register_board_info(struct spi_board_info const *info, unsigned n) { return 0; } -- cgit v1.2.3 From 72a43046b61a3fe7164a622224bcdfc3cf6b795d Mon Sep 17 00:00:00 2001 From: Chanho Park Date: Wed, 29 Jun 2022 09:41:41 +0900 Subject: tty: serial: samsung_tty: loopback mode support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Internal loopback mode can be supported by setting UCON register's Loopback Mode bit. The mode & bit can be supported since s3c2410 and later SoCs. The prefix of LOOPBACK / BIT(5) naming should be also changed to S3C2410_ in order to avoid confusion. Reviewed-by: Krzysztof Kozlowski Reviewed-by: Ilpo Järvinen Signed-off-by: Chanho Park Link: https://lore.kernel.org/r/20220629004141.51484-1-chanho61.park@samsung.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_s3c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_s3c.h b/include/linux/serial_s3c.h index dec15f5b3dec..1672cf0810ef 100644 --- a/include/linux/serial_s3c.h +++ b/include/linux/serial_s3c.h @@ -83,7 +83,7 @@ #define S3C2410_UCON_RXIRQMODE (1<<0) #define S3C2410_UCON_RXFIFO_TOI (1<<7) #define S3C2443_UCON_RXERR_IRQEN (1<<6) -#define S3C2443_UCON_LOOPBACK (1<<5) +#define S3C2410_UCON_LOOPBACK (1<<5) #define S3C2410_UCON_DEFAULT (S3C2410_UCON_TXILEVEL | \ S3C2410_UCON_RXILEVEL | \ -- cgit v1.2.3 From f9b11229b79c0fb2100b5bb4628a101b1d37fbf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 29 Jun 2022 12:48:41 +0300 Subject: serial: 8250: Fix PM usage_count for console handover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When console is enabled, univ8250_console_setup() calls serial8250_console_setup() before .dev is set to uart_port. Therefore, it will not call pm_runtime_get_sync(). Later, when the actual driver is going to take over univ8250_console_exit() is called. As .dev is already set, serial8250_console_exit() makes pm_runtime_put_sync() call with usage count being zero triggering PM usage count warning (extra debug for univ8250_console_setup(), univ8250_console_exit(), and serial8250_register_ports()): [ 0.068987] univ8250_console_setup ttyS0 nodev [ 0.499670] printk: console [ttyS0] enabled [ 0.717955] printk: console [ttyS0] printing thread started [ 1.960163] serial8250_register_ports assigned dev for ttyS0 [ 1.976830] printk: console [ttyS0] disabled [ 1.976888] printk: console [ttyS0] printing thread stopped [ 1.977073] univ8250_console_exit ttyS0 usage:0 [ 1.977075] serial8250 serial8250: Runtime PM usage count underflow! [ 1.977429] dw-apb-uart.6: ttyS0 at MMIO 0x4010006000 (irq = 33, base_baud = 115200) is a 16550A [ 1.977812] univ8250_console_setup ttyS0 usage:2 [ 1.978167] printk: console [ttyS0] printing thread started [ 1.978203] printk: console [ttyS0] enabled To fix the issue, call pm_runtime_get_sync() in serial8250_register_ports() as soon as .dev is set for an uart_port if it has console enabled. This problem became apparent only recently because 82586a721595 ("PM: runtime: Avoid device usage count underflows") added the warning printout. I confirmed this problem also occurs with v5.18 (w/o the warning printout, obviously). Fixes: bedb404e91bb ("serial: 8250_port: Don't use power management for kernel console") Cc: stable Tested-by: Tony Lindgren Reviewed-by: Andy Shevchenko Reviewed-by: Tony Lindgren Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/b4f428e9-491f-daf2-2232-819928dc276e@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 657a0fc68a3f..fde258b3decd 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -390,6 +390,11 @@ static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; static inline int setup_earlycon(char *buf) { return 0; } #endif +static inline bool uart_console_enabled(struct uart_port *port) +{ + return uart_console(port) && (port->cons->flags & CON_ENABLED); +} + struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, -- cgit v1.2.3 From 6e97eba8ad8748fabb795cffc5d9e1a7dcfd7367 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 28 Jun 2022 18:59:10 +0300 Subject: vfio: Split migration ops from main device ops vfio core checks whether the driver sets some migration op (e.g. set_state/get_state) and accordingly calls its op. However, currently mlx5 driver sets the above ops without regards to its migration caps. This might lead to unexpected usage/Oops if user space may call to the above ops even if the driver doesn't support migration. As for example, the migration state_mutex is not initialized in that case. The cleanest way to manage that seems to split the migration ops from the main device ops, this will let the driver setting them separately from the main ops when it's applicable. As part of that, validate ops construction on registration and include a check for VFIO_MIGRATION_STOP_COPY since the uAPI claims it must be set in migration_flags. HISI driver was changed as well to match this scheme. This scheme may enable down the road to come with some extra group of ops (e.g. DMA log) that can be set without regards to the other options based on driver caps. Fixes: 6fadb021266d ("vfio/mlx5: Implement vfio_pci driver for mlx5 devices") Reviewed-by: Kevin Tian Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20220628155910.171454-3-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 49580fa2073a..4d26e149db81 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -32,6 +32,11 @@ struct vfio_device_set { struct vfio_device { struct device *dev; const struct vfio_device_ops *ops; + /* + * mig_ops is a static property of the vfio_device which must be set + * prior to registering the vfio_device. + */ + const struct vfio_migration_ops *mig_ops; struct vfio_group *group; struct vfio_device_set *dev_set; struct list_head dev_set_list; @@ -61,16 +66,6 @@ struct vfio_device { * match, -errno for abort (ex. match with insufficient or incorrect * additional args) * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl - * @migration_set_state: Optional callback to change the migration state for - * devices that support migration. It's mandatory for - * VFIO_DEVICE_FEATURE_MIGRATION migration support. - * The returned FD is used for data transfer according to the FSM - * definition. The driver is responsible to ensure that FD reaches end - * of stream or error whenever the migration FSM leaves a data transfer - * state or before close_device() returns. - * @migration_get_state: Optional callback to get the migration state for - * devices that support migration. It's mandatory for - * VFIO_DEVICE_FEATURE_MIGRATION migration support. */ struct vfio_device_ops { char *name; @@ -87,6 +82,21 @@ struct vfio_device_ops { int (*match)(struct vfio_device *vdev, char *buf); int (*device_feature)(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz); +}; + +/** + * @migration_set_state: Optional callback to change the migration state for + * devices that support migration. It's mandatory for + * VFIO_DEVICE_FEATURE_MIGRATION migration support. + * The returned FD is used for data transfer according to the FSM + * definition. The driver is responsible to ensure that FD reaches end + * of stream or error whenever the migration FSM leaves a data transfer + * state or before close_device() returns. + * @migration_get_state: Optional callback to get the migration state for + * devices that support migration. It's mandatory for + * VFIO_DEVICE_FEATURE_MIGRATION migration support. + */ +struct vfio_migration_ops { struct file *(*migration_set_state)( struct vfio_device *device, enum vfio_device_mig_state new_state); -- cgit v1.2.3 From 0e862838f290147ea9c16db852d8d494b552d38d Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 24 Jun 2022 14:13:07 +0200 Subject: bitops: unify non-atomic bitops prototypes across architectures Currently, there is a mess with the prototypes of the non-atomic bitops across the different architectures: ret bool, int, unsigned long nr int, long, unsigned int, unsigned long addr volatile unsigned long *, volatile void * Thankfully, it doesn't provoke any bugs, but can sometimes make the compiler angry when it's not handy at all. Adjust all the prototypes to the following standard: ret bool retval can be only 0 or 1 nr unsigned long native; signed makes no sense addr volatile unsigned long * bitmaps are arrays of ulongs Next, some architectures don't define 'arch_' versions as they don't support instrumentation, others do. To make sure there is always the same set of callables present and to ease any potential future changes, make them all follow the rule: * architecture-specific files define only 'arch_' versions; * non-prefixed versions can be defined only in asm-generic files; and place the non-prefixed definitions into a new file in asm-generic to be included by non-instrumented architectures. Finally, add some static assertions in order to prevent people from making a mess in this room again. I also used the %__always_inline attribute consistently, so that they always get resolved to the actual operations. Suggested-by: Andy Shevchenko Signed-off-by: Alexander Lobakin Acked-by: Mark Rutland Reviewed-by: Yury Norov Reviewed-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/linux/bitops.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 7aaed501f768..87087454a288 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -26,12 +26,29 @@ extern unsigned int __sw_hweight16(unsigned int w); extern unsigned int __sw_hweight32(unsigned int w); extern unsigned long __sw_hweight64(__u64 w); +#include + /* * Include this here because some architectures need generic_ffs/fls in * scope */ #include +/* Check that the bitops prototypes are sane */ +#define __check_bitop_pr(name) \ + static_assert(__same_type(arch_##name, generic_##name) && \ + __same_type(name, generic_##name)) + +__check_bitop_pr(__set_bit); +__check_bitop_pr(__clear_bit); +__check_bitop_pr(__change_bit); +__check_bitop_pr(__test_and_set_bit); +__check_bitop_pr(__test_and_clear_bit); +__check_bitop_pr(__test_and_change_bit); +__check_bitop_pr(test_bit); + +#undef __check_bitop_pr + static inline int get_bitmask_order(unsigned int count) { int order; -- cgit v1.2.3 From bb7379bfa680bd48b468e856475778db2ad866c1 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 24 Jun 2022 14:13:08 +0200 Subject: bitops: define const_*() versions of the non-atomics Define const_*() variants of the non-atomic bitops to be used when the input arguments are compile-time constants, so that the compiler will be always able to resolve those to compile-time constants as well. Those are mostly direct aliases for generic_*() with one exception for const_test_bit(): the original one is declared atomic-safe and thus doesn't discard the `volatile` qualifier, so in order to let optimize code, define it separately disregarding the qualifier. Add them to the compile-time type checks as well just in case. Suggested-by: Marco Elver Signed-off-by: Alexander Lobakin Reviewed-by: Marco Elver Reviewed-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/linux/bitops.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 87087454a288..d393297287d5 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -37,6 +37,7 @@ extern unsigned long __sw_hweight64(__u64 w); /* Check that the bitops prototypes are sane */ #define __check_bitop_pr(name) \ static_assert(__same_type(arch_##name, generic_##name) && \ + __same_type(const_##name, generic_##name) && \ __same_type(name, generic_##name)) __check_bitop_pr(__set_bit); -- cgit v1.2.3 From e69eb9c460f128b71c6b995d75a05244e4b6cc3e Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 24 Jun 2022 14:13:09 +0200 Subject: bitops: wrap non-atomic bitops with a transparent macro In preparation for altering the non-atomic bitops with a macro, wrap them in a transparent definition. This requires prepending one more '_' to their names in order to be able to do that seamlessly. It is a simple change, given that all the non-prefixed definitions are now in asm-generic. sparc32 already has several triple-underscored functions, so I had to rename them ('___' -> 'sp32_'). Signed-off-by: Alexander Lobakin Reviewed-by: Marco Elver Reviewed-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/linux/bitops.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index d393297287d5..3c3afbae1533 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -26,8 +26,24 @@ extern unsigned int __sw_hweight16(unsigned int w); extern unsigned int __sw_hweight32(unsigned int w); extern unsigned long __sw_hweight64(__u64 w); +/* + * Defined here because those may be needed by architecture-specific static + * inlines. + */ + #include +#define bitop(op, nr, addr) \ + op(nr, addr) + +#define __set_bit(nr, addr) bitop(___set_bit, nr, addr) +#define __clear_bit(nr, addr) bitop(___clear_bit, nr, addr) +#define __change_bit(nr, addr) bitop(___change_bit, nr, addr) +#define __test_and_set_bit(nr, addr) bitop(___test_and_set_bit, nr, addr) +#define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) +#define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) +#define test_bit(nr, addr) bitop(_test_bit, nr, addr) + /* * Include this here because some architectures need generic_ffs/fls in * scope @@ -38,7 +54,7 @@ extern unsigned long __sw_hweight64(__u64 w); #define __check_bitop_pr(name) \ static_assert(__same_type(arch_##name, generic_##name) && \ __same_type(const_##name, generic_##name) && \ - __same_type(name, generic_##name)) + __same_type(_##name, generic_##name)) __check_bitop_pr(__set_bit); __check_bitop_pr(__clear_bit); -- cgit v1.2.3 From b03fc1173c0c2bb8fad61902a862985cecdc4b1b Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 24 Jun 2022 14:13:10 +0200 Subject: bitops: let optimize out non-atomic bitops on compile-time constants Currently, many architecture-specific non-atomic bitop implementations use inline asm or other hacks which are faster or more robust when working with "real" variables (i.e. fields from the structures etc.), but the compilers have no clue how to optimize them out when called on compile-time constants. That said, the following code: DECLARE_BITMAP(foo, BITS_PER_LONG) = { }; // -> unsigned long foo[1]; unsigned long bar = BIT(BAR_BIT); unsigned long baz = 0; __set_bit(FOO_BIT, foo); baz |= BIT(BAZ_BIT); BUILD_BUG_ON(!__builtin_constant_p(test_bit(FOO_BIT, foo)); BUILD_BUG_ON(!__builtin_constant_p(bar & BAR_BIT)); BUILD_BUG_ON(!__builtin_constant_p(baz & BAZ_BIT)); triggers the first assertion on x86_64, which means that the compiler is unable to evaluate it to a compile-time initializer when the architecture-specific bitop is used even if it's obvious. In order to let the compiler optimize out such cases, expand the bitop() macro to use the "constant" C non-atomic bitop implementations when all of the arguments passed are compile-time constants, which means that the result will be a compile-time constant as well, so that it produces more efficient and simple code in 100% cases, comparing to the architecture-specific counterparts. The savings are architecture, compiler and compiler flags dependent, for example, on x86_64 -O2: GCC 12: add/remove: 78/29 grow/shrink: 332/525 up/down: 31325/-61560 (-30235) LLVM 13: add/remove: 79/76 grow/shrink: 184/537 up/down: 55076/-141892 (-86816) LLVM 14: add/remove: 10/3 grow/shrink: 93/138 up/down: 3705/-6992 (-3287) and ARM64 (courtesy of Mark): GCC 11: add/remove: 92/29 grow/shrink: 933/2766 up/down: 39340/-82580 (-43240) LLVM 14: add/remove: 21/11 grow/shrink: 620/651 up/down: 12060/-15824 (-3764) Cc: Mark Rutland Signed-off-by: Alexander Lobakin Reviewed-by: Marco Elver Signed-off-by: Yury Norov --- include/linux/bitops.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 3c3afbae1533..cf9bf65039f2 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -33,8 +33,24 @@ extern unsigned long __sw_hweight64(__u64 w); #include +/* + * Many architecture-specific non-atomic bitops contain inline asm code and due + * to that the compiler can't optimize them to compile-time expressions or + * constants. In contrary, generic_*() helpers are defined in pure C and + * compilers optimize them just well. + * Therefore, to make `unsigned long foo = 0; __set_bit(BAR, &foo)` effectively + * equal to `unsigned long foo = BIT(BAR)`, pick the generic C alternative when + * the arguments can be resolved at compile time. That expression itself is a + * constant and doesn't bring any functional changes to the rest of cases. + * The casts to `uintptr_t` are needed to mitigate `-Waddress` warnings when + * passing a bitmap from .bss or .data (-> `!!addr` is always true). + */ #define bitop(op, nr, addr) \ - op(nr, addr) + ((__builtin_constant_p(nr) && \ + __builtin_constant_p((uintptr_t)(addr) != (uintptr_t)NULL) && \ + (uintptr_t)(addr) != (uintptr_t)NULL && \ + __builtin_constant_p(*(const unsigned long *)(addr))) ? \ + const##op(nr, addr) : op(nr, addr)) #define __set_bit(nr, addr) bitop(___set_bit, nr, addr) #define __clear_bit(nr, addr) bitop(___clear_bit, nr, addr) -- cgit v1.2.3 From 3e7e5baaaba78075a7f3a57432609e363bf2a486 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 24 Jun 2022 14:13:12 +0200 Subject: bitmap: don't assume compiler evaluates small mem*() builtins calls Intel kernel bot triggered the build bug on ARC architecture that in fact is as follows: DECLARE_BITMAP(bitmap, BITS_PER_LONG); bitmap_clear(bitmap, 0, BITS_PER_LONG); BUILD_BUG_ON(!__builtin_constant_p(*bitmap)); which can be expanded to: unsigned long bitmap[1]; memset(bitmap, 0, sizeof(*bitmap)); BUILD_BUG_ON(!__builtin_constant_p(*bitmap)); In most cases, a compiler is able to expand small/simple mem*() calls to simple assignments or bitops, in this case that would mean: unsigned long bitmap[1] = { 0 }; BUILD_BUG_ON(!__builtin_constant_p(*bitmap)); and on most architectures this works, but not on ARC, despite having -O3 for every build. So, to make this work, in case when the last bit to modify is still within the first long (small_const_nbits()), just use plain assignments for the rest of bitmap_*() functions which still use mem*(), but didn't receive such compile-time optimizations yet. This doesn't have the same coverage as compilers provide, but at least something to start: text: add/remove: 3/7 grow/shrink: 43/78 up/down: 1848/-3370 (-1546) data: add/remove: 1/11 grow/shrink: 0/8 up/down: 4/-356 (-352) notably cpumask_*() family when NR_CPUS <= BITS_PER_LONG: netif_get_num_default_rss_queues 38 4 -34 cpumask_copy 90 - -90 cpumask_clear 146 - -146 and the abovementioned assertion started passing. Signed-off-by: Alexander Lobakin Signed-off-by: Yury Norov --- include/linux/bitmap.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index f091a1664bf1..c91638e507f2 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -238,20 +238,32 @@ extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); - memset(dst, 0, len); + + if (small_const_nbits(nbits)) + *dst = 0; + else + memset(dst, 0, len); } static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) { unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); - memset(dst, 0xff, len); + + if (small_const_nbits(nbits)) + *dst = ~0UL; + else + memset(dst, 0xff, len); } static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits) { unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); - memcpy(dst, src, len); + + if (small_const_nbits(nbits)) + *dst = *src; + else + memcpy(dst, src, len); } /* @@ -431,6 +443,8 @@ static __always_inline void bitmap_set(unsigned long *map, unsigned int start, { if (__builtin_constant_p(nbits) && nbits == 1) __set_bit(start, map); + else if (small_const_nbits(start + nbits)) + *map |= GENMASK(start + nbits - 1, start); else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && __builtin_constant_p(nbits & BITMAP_MEM_MASK) && @@ -445,6 +459,8 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, { if (__builtin_constant_p(nbits) && nbits == 1) __clear_bit(start, map); + else if (small_const_nbits(start + nbits)) + *map &= ~GENMASK(start + nbits - 1, start); else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && __builtin_constant_p(nbits & BITMAP_MEM_MASK) && -- cgit v1.2.3 From 837ced3a1a5d8bb1a637dd584711f31ae6b54d93 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 28 Jun 2022 17:52:38 +0300 Subject: time64.h: consolidate uses of PSEC_PER_NSEC Time-sensitive networking code needs to work with PTP times expressed in nanoseconds, and with packet transmission times expressed in picoseconds, since those would be fractional at higher than gigabit speed when expressed in nanoseconds. Convert the existing uses in tc-taprio and the ocelot/felix DSA driver to a PSEC_PER_NSEC macro. This macro is placed in include/linux/time64.h as opposed to its relatives (PSEC_PER_SEC etc) from include/vdso/time64.h because the vDSO library does not (yet) need/use it. Cc: Andy Lutomirski Cc: Thomas Gleixner Signed-off-by: Vladimir Oltean Reviewed-by: Vincenzo Frascino # for the vDSO parts Signed-off-by: Jakub Kicinski --- include/linux/time64.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/time64.h b/include/linux/time64.h index 81b9686a2079..2fb8232cff1d 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -20,6 +20,9 @@ struct itimerspec64 { struct timespec64 it_value; }; +/* Parameters used to convert the timespec values: */ +#define PSEC_PER_NSEC 1000L + /* Located here for timespec[64]_valid_strict */ #define TIME64_MAX ((s64)~((u64)1 << 63)) #define TIME64_MIN (-TIME64_MAX - 1) -- cgit v1.2.3 From 6708be40047789aa3587a3866b782d5cda7b2a31 Mon Sep 17 00:00:00 2001 From: Peter Chiu Date: Wed, 22 Jun 2022 09:08:20 +0800 Subject: wifi: ieee80211: s1g action frames are not robust S1g action frame with code 22 is not protected so update the robust action frame list. Signed-off-by: Peter Chiu Link: https://lore.kernel.org/r/20220622010820.17522-1-chui-hao.chiu@mediatek.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 870f659087d6..f386f9ed41f3 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4094,6 +4094,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) *category != WLAN_CATEGORY_SELF_PROTECTED && *category != WLAN_CATEGORY_UNPROT_DMG && *category != WLAN_CATEGORY_VHT && + *category != WLAN_CATEGORY_S1G && *category != WLAN_CATEGORY_VENDOR_SPECIFIC; } -- cgit v1.2.3 From 80fc671bcc0173836e9032b0c698ea74c13b9d7c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 1 Jul 2022 11:48:43 +0800 Subject: uacce: Handle parent device removal or parent driver module rmmod The uacce driver must deal with a possible removal of the parent device or parent driver module rmmod at any time. Although uacce_remove(), called on device removal and on driver unbind, prevents future use of the uacce fops by removing the cdev, fops that were called before that point may still be running. Serialize uacce_fops_open() and uacce_remove() with uacce->mutex. Serialize other fops against uacce_remove() with q->mutex. Since we need to protect uacce_fops_poll() which gets called on the fast path, replace uacce->queues_lock with q->mutex to improve scalability. The other fops are only used during setup. uacce_queue_is_valid(), checked under q->mutex or uacce->mutex, denotes whether uacce_remove() has disabled all queues. If that is the case, don't go any further since the parent device is being removed and uacce->ops should not be called anymore. Reported-by: Yang Shen Signed-off-by: Zhangfei Gao Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20220701034843.7502-1-zhangfei.gao@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/uacce.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uacce.h b/include/linux/uacce.h index 48e319f40275..9ce88c28b0a8 100644 --- a/include/linux/uacce.h +++ b/include/linux/uacce.h @@ -70,6 +70,7 @@ enum uacce_q_state { * @wait: wait queue head * @list: index into uacce queues list * @qfrs: pointer of qfr regions + * @mutex: protects queue state * @state: queue state machine * @pasid: pasid associated to the mm * @handle: iommu_sva handle returned by iommu_sva_bind_device() @@ -80,6 +81,7 @@ struct uacce_queue { wait_queue_head_t wait; struct list_head list; struct uacce_qfile_region *qfrs[UACCE_MAX_REGION]; + struct mutex mutex; enum uacce_q_state state; u32 pasid; struct iommu_sva *handle; @@ -97,9 +99,9 @@ struct uacce_queue { * @dev_id: id of the uacce device * @cdev: cdev of the uacce * @dev: dev of the uacce + * @mutex: protects uacce operation * @priv: private pointer of the uacce * @queues: list of queues - * @queues_lock: lock for queues list * @inode: core vfs */ struct uacce_device { @@ -113,9 +115,9 @@ struct uacce_device { u32 dev_id; struct cdev *cdev; struct device dev; + struct mutex mutex; void *priv; struct list_head queues; - struct mutex queues_lock; struct inode *inode; }; -- cgit v1.2.3 From c882716b6d411495f221bdd73e9137357c16a3ea Mon Sep 17 00:00:00 2001 From: Liang He Date: Tue, 28 Jun 2022 10:16:40 +0800 Subject: firmware: Hold a reference for of_find_compatible_node() In of_register_trusted_foundations(), we need to hold the reference returned by of_find_compatible_node() and then use it to call of_node_put() for refcount balance. Signed-off-by: Liang He Link: https://lore.kernel.org/r/20220628021640.4015-1-windhl@126.com Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware/trusted_foundations.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firmware/trusted_foundations.h b/include/linux/firmware/trusted_foundations.h index be5984bda592..931b6c5c72df 100644 --- a/include/linux/firmware/trusted_foundations.h +++ b/include/linux/firmware/trusted_foundations.h @@ -71,12 +71,16 @@ static inline void register_trusted_foundations( static inline void of_register_trusted_foundations(void) { + struct device_node *np = of_find_compatible_node(NULL, NULL, "tlm,trusted-foundations"); + + if (!np) + return; + of_node_put(np); /* * If we find the target should enable TF but does not support it, * fail as the system won't be able to do much anyway */ - if (of_find_compatible_node(NULL, NULL, "tlm,trusted-foundations")) - register_trusted_foundations(NULL); + register_trusted_foundations(NULL); } static inline bool trusted_foundations_registered(void) -- cgit v1.2.3 From 31a371e419c885e0f137ce70395356ba8639dc52 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 29 Jun 2022 17:42:08 +0300 Subject: fanotify: prepare for setting event flags in ignore mask Setting flags FAN_ONDIR FAN_EVENT_ON_CHILD in ignore mask has no effect. The FAN_EVENT_ON_CHILD flag in mask implicitly applies to ignore mask and ignore mask is always implicitly applied to events on directories. Define a mark flag that replaces this legacy behavior with logic of applying the ignore mask according to event flags in ignore mask. Implement the new logic to prepare for supporting an ignore mask that ignores events on children and ignore mask that does not ignore events on directories. To emphasize the change in terminology, also rename ignored_mask mark member to ignore_mask and use accessors to get only the effective ignored events or the ignored events and flags. This change in terminology finally aligns with the "ignore mask" language in man pages and in most of the comments. Link: https://lore.kernel.org/r/20220629144210.2983229-2-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 89 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 9560734759fa..d7d96c806bff 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -518,8 +518,8 @@ struct fsnotify_mark { struct hlist_node obj_list; /* Head of list of marks for an object [mark ref] */ struct fsnotify_mark_connector *connector; - /* Events types to ignore [mark->lock, group->mark_mutex] */ - __u32 ignored_mask; + /* Events types and flags to ignore [mark->lock, group->mark_mutex] */ + __u32 ignore_mask; /* General fsnotify mark flags */ #define FSNOTIFY_MARK_FLAG_ALIVE 0x0001 #define FSNOTIFY_MARK_FLAG_ATTACHED 0x0002 @@ -529,6 +529,7 @@ struct fsnotify_mark { /* fanotify mark flags */ #define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100 #define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200 +#define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS 0x0400 unsigned int flags; /* flags [mark->lock] */ }; @@ -655,15 +656,91 @@ extern void fsnotify_remove_queued_event(struct fsnotify_group *group, /* functions used to manipulate the marks attached to inodes */ -/* Get mask for calculating object interest taking ignored mask into account */ +/* + * Canonical "ignore mask" including event flags. + * + * Note the subtle semantic difference from the legacy ->ignored_mask. + * ->ignored_mask traditionally only meant which events should be ignored, + * while ->ignore_mask also includes flags regarding the type of objects on + * which events should be ignored. + */ +static inline __u32 fsnotify_ignore_mask(struct fsnotify_mark *mark) +{ + __u32 ignore_mask = mark->ignore_mask; + + /* The event flags in ignore mask take effect */ + if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS) + return ignore_mask; + + /* + * Legacy behavior: + * - Always ignore events on dir + * - Ignore events on child if parent is watching children + */ + ignore_mask |= FS_ISDIR; + ignore_mask &= ~FS_EVENT_ON_CHILD; + ignore_mask |= mark->mask & FS_EVENT_ON_CHILD; + + return ignore_mask; +} + +/* Legacy ignored_mask - only event types to ignore */ +static inline __u32 fsnotify_ignored_events(struct fsnotify_mark *mark) +{ + return mark->ignore_mask & ALL_FSNOTIFY_EVENTS; +} + +/* + * Check if mask (or ignore mask) should be applied depending if victim is a + * directory and whether it is reported to a watching parent. + */ +static inline bool fsnotify_mask_applicable(__u32 mask, bool is_dir, + int iter_type) +{ + /* Should mask be applied to a directory? */ + if (is_dir && !(mask & FS_ISDIR)) + return false; + + /* Should mask be applied to a child? */ + if (iter_type == FSNOTIFY_ITER_TYPE_PARENT && + !(mask & FS_EVENT_ON_CHILD)) + return false; + + return true; +} + +/* + * Effective ignore mask taking into account if event victim is a + * directory and whether it is reported to a watching parent. + */ +static inline __u32 fsnotify_effective_ignore_mask(struct fsnotify_mark *mark, + bool is_dir, int iter_type) +{ + __u32 ignore_mask = fsnotify_ignored_events(mark); + + if (!ignore_mask) + return 0; + + /* For non-dir and non-child, no need to consult the event flags */ + if (!is_dir && iter_type != FSNOTIFY_ITER_TYPE_PARENT) + return ignore_mask; + + ignore_mask = fsnotify_ignore_mask(mark); + if (!fsnotify_mask_applicable(ignore_mask, is_dir, iter_type)) + return 0; + + return ignore_mask & ALL_FSNOTIFY_EVENTS; +} + +/* Get mask for calculating object interest taking ignore mask into account */ static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark) { __u32 mask = mark->mask; - if (!mark->ignored_mask) + if (!fsnotify_ignored_events(mark)) return mask; - /* Interest in FS_MODIFY may be needed for clearing ignored mask */ + /* Interest in FS_MODIFY may be needed for clearing ignore mask */ if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) mask |= FS_MODIFY; @@ -671,7 +748,7 @@ static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark) * If mark is interested in ignoring events on children, the object must * show interest in those events for fsnotify_parent() to notice it. */ - return mask | (mark->ignored_mask & ALL_FSNOTIFY_EVENTS); + return mask | mark->ignore_mask; } /* Get mask of events for a list of marks */ -- cgit v1.2.3 From 8afd7215aa97f8868d033f6e1d01a276ab2d29c0 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 29 Jun 2022 17:42:09 +0300 Subject: fanotify: cleanups for fanotify_mark() input validations Create helper fanotify_may_update_existing_mark() for checking for conflicts between existing mark flags and fanotify_mark() flags. Use variable mark_cmd to make the checks for mark command bits cleaner. Link: https://lore.kernel.org/r/20220629144210.2983229-3-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fanotify.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index e517dbcf74ed..a7207f092fd1 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -59,15 +59,16 @@ #define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \ FAN_MARK_FILESYSTEM) +#define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \ + FAN_MARK_FLUSH) + #define FANOTIFY_MARK_FLAGS (FANOTIFY_MARK_TYPE_BITS | \ - FAN_MARK_ADD | \ - FAN_MARK_REMOVE | \ + FANOTIFY_MARK_CMD_BITS | \ FAN_MARK_DONT_FOLLOW | \ FAN_MARK_ONLYDIR | \ FAN_MARK_IGNORED_MASK | \ FAN_MARK_IGNORED_SURV_MODIFY | \ - FAN_MARK_EVICTABLE | \ - FAN_MARK_FLUSH) + FAN_MARK_EVICTABLE) /* * Events that can be reported with data type FSNOTIFY_EVENT_PATH. -- cgit v1.2.3 From e252f2ed1c8c6c3884ab5dd34e003ed21f1fe6e0 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 29 Jun 2022 17:42:10 +0300 Subject: fanotify: introduce FAN_MARK_IGNORE This flag is a new way to configure ignore mask which allows adding and removing the event flags FAN_ONDIR and FAN_EVENT_ON_CHILD in ignore mask. The legacy FAN_MARK_IGNORED_MASK flag would always ignore events on directories and would ignore events on children depending on whether the FAN_EVENT_ON_CHILD flag was set in the (non ignored) mask. FAN_MARK_IGNORE can be used to ignore events on children without setting FAN_EVENT_ON_CHILD in the mark's mask and will not ignore events on directories unconditionally, only when FAN_ONDIR is set in ignore mask. The new behavior is non-downgradable. After calling fanotify_mark() with FAN_MARK_IGNORE once, calling fanotify_mark() with FAN_MARK_IGNORED_MASK on the same object will return EEXIST error. Setting the event flags with FAN_MARK_IGNORE on a non-dir inode mark has no meaning and will return ENOTDIR error. The meaning of FAN_MARK_IGNORED_SURV_MODIFY is preserved with the new FAN_MARK_IGNORE flag, but with a few semantic differences: 1. FAN_MARK_IGNORED_SURV_MODIFY is required for filesystem and mount marks and on an inode mark on a directory. Omitting this flag will return EINVAL or EISDIR error. 2. An ignore mask on a non-directory inode that survives modify could never be downgraded to an ignore mask that does not survive modify. With new FAN_MARK_IGNORE semantics we make that rule explicit - trying to update a surviving ignore mask without the flag FAN_MARK_IGNORED_SURV_MODIFY will return EEXIST error. The conveniene macro FAN_MARK_IGNORE_SURV is added for (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY), because the common case should use short constant names. Link: https://lore.kernel.org/r/20220629144210.2983229-4-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fanotify.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index a7207f092fd1..8ad743def6f3 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -62,11 +62,14 @@ #define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \ FAN_MARK_FLUSH) +#define FANOTIFY_MARK_IGNORE_BITS (FAN_MARK_IGNORED_MASK | \ + FAN_MARK_IGNORE) + #define FANOTIFY_MARK_FLAGS (FANOTIFY_MARK_TYPE_BITS | \ FANOTIFY_MARK_CMD_BITS | \ + FANOTIFY_MARK_IGNORE_BITS | \ FAN_MARK_DONT_FOLLOW | \ FAN_MARK_ONLYDIR | \ - FAN_MARK_IGNORED_MASK | \ FAN_MARK_IGNORED_SURV_MODIFY | \ FAN_MARK_EVICTABLE) -- cgit v1.2.3 From b69a2afd5afce9bf6d56e349d6ab592c916e20f2 Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Thu, 30 Jun 2022 08:36:12 +0000 Subject: x86/kexec: Carry forward IMA measurement log on kexec On kexec file load, the Integrity Measurement Architecture (IMA) subsystem may verify the IMA signature of the kernel and initramfs, and measure it. The command line parameters passed to the kernel in the kexec call may also be measured by IMA. A remote attestation service can verify a TPM quote based on the TPM event log, the IMA measurement list and the TPM PCR data. This can be achieved only if the IMA measurement log is carried over from the current kernel to the next kernel across the kexec call. PowerPC and ARM64 both achieve this using device tree with a "linux,ima-kexec-buffer" node. x86 platforms generally don't make use of device tree, so use the setup_data mechanism to pass the IMA buffer to the new kernel. Signed-off-by: Jonathan McDowell Signed-off-by: Borislav Petkov Reviewed-by: Mimi Zohar # IMA function definitions Link: https://lore.kernel.org/r/YmKyvlF3my1yWTvK@noodles-fedora-PC23Y6EG --- include/linux/ima.h | 5 +++++ include/linux/of.h | 2 -- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 426b1744215e..81708ca0ebc7 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -140,6 +140,11 @@ static inline int ima_measure_critical_data(const char *event_label, #endif /* CONFIG_IMA */ +#ifdef CONFIG_HAVE_IMA_KEXEC +int __init ima_free_kexec_buffer(void); +int __init ima_get_kexec_buffer(void **addr, size_t *size); +#endif + #ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT extern bool arch_ima_get_secureboot(void); extern const char * const *arch_get_ima_policy(void); diff --git a/include/linux/of.h b/include/linux/of.h index f0a5d6b10c5a..20a4e7cb7afe 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -441,8 +441,6 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline, size_t extra_fdt_size); -int ima_get_kexec_buffer(void **addr, size_t *size); -int ima_free_kexec_buffer(void); #else /* CONFIG_OF */ static inline void of_core_init(void) -- cgit v1.2.3 From 2852ca7fba9f77b204f0fe953b31fadd0057c936 Mon Sep 17 00:00:00 2001 From: David Gow Date: Fri, 1 Jul 2022 16:47:41 +0800 Subject: panic: Taint kernel if tests are run Most in-kernel tests (such as KUnit tests) are not supposed to run on production systems: they may do deliberately illegal things to trigger errors, and have security implications (for example, KUnit assertions will often deliberately leak kernel addresses). Add a new taint type, TAINT_TEST to signal that a test has been run. This will be printed as 'N' (originally for kuNit, as every other sensible letter was taken.) This should discourage people from running these tests on production systems, and to make it easier to tell if tests have been run accidentally (by loading the wrong configuration, etc.) Acked-by: Luis Chamberlain Reviewed-by: Brendan Higgins Signed-off-by: David Gow Signed-off-by: Shuah Khan --- include/linux/panic.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/panic.h b/include/linux/panic.h index e71161da69c4..c7759b3f2045 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -68,7 +68,8 @@ static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout) #define TAINT_LIVEPATCH 15 #define TAINT_AUX 16 #define TAINT_RANDSTRUCT 17 -#define TAINT_FLAGS_COUNT 18 +#define TAINT_TEST 18 +#define TAINT_FLAGS_COUNT 19 #define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1) struct taint_flag { -- cgit v1.2.3 From eb003bf3ba221bb3d21d1fdcddaa36c158fd2d8f Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 24 Jun 2022 20:36:39 +0200 Subject: platform/surface: aggregator: Add helper macros for requests with argument and return value Add helper macros for synchronous stack-allocated Surface Aggregator request with both argument and return value, similar to the current argument-only and return-value-only ones. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20220624183642.910893-2-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/surface_aggregator/controller.h | 125 ++++++++++++++++++++++++++ include/linux/surface_aggregator/device.h | 36 ++++++++ 2 files changed, 161 insertions(+) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h index 50a2b4926c06..d11a1c6e3186 100644 --- a/include/linux/surface_aggregator/controller.h +++ b/include/linux/surface_aggregator/controller.h @@ -469,6 +469,67 @@ struct ssam_request_spec_md { return 0; \ } +/** + * SSAM_DEFINE_SYNC_REQUEST_WR() - Define synchronous SAM request function with + * both argument and return value. + * @name: Name of the generated function. + * @atype: Type of the request's argument. + * @rtype: Type of the request's return value. + * @spec: Specification (&struct ssam_request_spec) defining the request. + * + * Defines a function executing the synchronous SAM request specified by @spec, + * with the request taking an argument of type @atype and having a return value + * of type @rtype. The generated function takes care of setting up the request + * and response structs, buffer allocation, as well as execution of the request + * itself, returning once the request has been fully completed. The required + * transport buffer will be allocated on the stack. + * + * The generated function is defined as ``static int name(struct + * ssam_controller *ctrl, const atype *arg, rtype *ret)``, returning the status + * of the request, which is zero on success and negative on failure. The + * ``ctrl`` parameter is the controller via which the request is sent. The + * request argument is specified via the ``arg`` pointer. The request's return + * value is written to the memory pointed to by the ``ret`` parameter. + * + * Refer to ssam_request_sync_onstack() for more details on the behavior of + * the generated function. + */ +#define SSAM_DEFINE_SYNC_REQUEST_WR(name, atype, rtype, spec...) \ + static int name(struct ssam_controller *ctrl, const atype *arg, rtype *ret) \ + { \ + struct ssam_request_spec s = (struct ssam_request_spec)spec; \ + struct ssam_request rqst; \ + struct ssam_response rsp; \ + int status; \ + \ + rqst.target_category = s.target_category; \ + rqst.target_id = s.target_id; \ + rqst.command_id = s.command_id; \ + rqst.instance_id = s.instance_id; \ + rqst.flags = s.flags | SSAM_REQUEST_HAS_RESPONSE; \ + rqst.length = sizeof(atype); \ + rqst.payload = (u8 *)arg; \ + \ + rsp.capacity = sizeof(rtype); \ + rsp.length = 0; \ + rsp.pointer = (u8 *)ret; \ + \ + status = ssam_request_sync_onstack(ctrl, &rqst, &rsp, sizeof(atype)); \ + if (status) \ + return status; \ + \ + if (rsp.length != sizeof(rtype)) { \ + struct device *dev = ssam_controller_device(ctrl); \ + dev_err(dev, \ + "rqst: invalid response length, expected %zu, got %zu (tc: %#04x, cid: %#04x)", \ + sizeof(rtype), rsp.length, rqst.target_category,\ + rqst.command_id); \ + return -EIO; \ + } \ + \ + return 0; \ + } + /** * SSAM_DEFINE_SYNC_REQUEST_MD_N() - Define synchronous multi-device SAM * request function with neither argument nor return value. @@ -613,6 +674,70 @@ struct ssam_request_spec_md { return 0; \ } +/** + * SSAM_DEFINE_SYNC_REQUEST_MD_WR() - Define synchronous multi-device SAM + * request function with both argument and return value. + * @name: Name of the generated function. + * @atype: Type of the request's argument. + * @rtype: Type of the request's return value. + * @spec: Specification (&struct ssam_request_spec_md) defining the request. + * + * Defines a function executing the synchronous SAM request specified by @spec, + * with the request taking an argument of type @atype and having a return value + * of type @rtype. Device specifying parameters are not hard-coded, but instead + * must be provided to the function. The generated function takes care of + * setting up the request and response structs, buffer allocation, as well as + * execution of the request itself, returning once the request has been fully + * completed. The required transport buffer will be allocated on the stack. + * + * The generated function is defined as ``static int name(struct + * ssam_controller *ctrl, u8 tid, u8 iid, const atype *arg, rtype *ret)``, + * returning the status of the request, which is zero on success and negative + * on failure. The ``ctrl`` parameter is the controller via which the request + * is sent, ``tid`` the target ID for the request, and ``iid`` the instance ID. + * The request argument is specified via the ``arg`` pointer. The request's + * return value is written to the memory pointed to by the ``ret`` parameter. + * + * Refer to ssam_request_sync_onstack() for more details on the behavior of + * the generated function. + */ +#define SSAM_DEFINE_SYNC_REQUEST_MD_WR(name, atype, rtype, spec...) \ + static int name(struct ssam_controller *ctrl, u8 tid, u8 iid, \ + const atype *arg, rtype *ret) \ + { \ + struct ssam_request_spec_md s = (struct ssam_request_spec_md)spec; \ + struct ssam_request rqst; \ + struct ssam_response rsp; \ + int status; \ + \ + rqst.target_category = s.target_category; \ + rqst.target_id = tid; \ + rqst.command_id = s.command_id; \ + rqst.instance_id = iid; \ + rqst.flags = s.flags | SSAM_REQUEST_HAS_RESPONSE; \ + rqst.length = sizeof(atype); \ + rqst.payload = (u8 *)arg; \ + \ + rsp.capacity = sizeof(rtype); \ + rsp.length = 0; \ + rsp.pointer = (u8 *)ret; \ + \ + status = ssam_request_sync_onstack(ctrl, &rqst, &rsp, sizeof(atype)); \ + if (status) \ + return status; \ + \ + if (rsp.length != sizeof(rtype)) { \ + struct device *dev = ssam_controller_device(ctrl); \ + dev_err(dev, \ + "rqst: invalid response length, expected %zu, got %zu (tc: %#04x, cid: %#04x)", \ + sizeof(rtype), rsp.length, rqst.target_category,\ + rqst.command_id); \ + return -EIO; \ + } \ + \ + return 0; \ + } + /* -- Event notifier/callbacks. --------------------------------------------- */ diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index c418f7f2732d..6cf7e80312d5 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -483,6 +483,42 @@ static inline void ssam_remove_clients(struct device *dev) {} sdev->uid.instance, ret); \ } +/** + * SSAM_DEFINE_SYNC_REQUEST_CL_WR() - Define synchronous client-device SAM + * request function with argument and return value. + * @name: Name of the generated function. + * @atype: Type of the request's argument. + * @rtype: Type of the request's return value. + * @spec: Specification (&struct ssam_request_spec_md) defining the request. + * + * Defines a function executing the synchronous SAM request specified by @spec, + * with the request taking an argument of type @atype and having a return value + * of type @rtype. Device specifying parameters are not hard-coded, but instead + * are provided via the client device, specifically its UID, supplied when + * calling this function. The generated function takes care of setting up the + * request struct, buffer allocation, as well as execution of the request + * itself, returning once the request has been fully completed. The required + * transport buffer will be allocated on the stack. + * + * The generated function is defined as ``static int name(struct ssam_device + * *sdev, const atype *arg, rtype *ret)``, returning the status of the request, + * which is zero on success and negative on failure. The ``sdev`` parameter + * specifies both the target device of the request and by association the + * controller via which the request is sent. The request's argument is + * specified via the ``arg`` pointer. The request's return value is written to + * the memory pointed to by the ``ret`` parameter. + * + * Refer to ssam_request_sync_onstack() for more details on the behavior of + * the generated function. + */ +#define SSAM_DEFINE_SYNC_REQUEST_CL_WR(name, atype, rtype, spec...) \ + SSAM_DEFINE_SYNC_REQUEST_MD_WR(__raw_##name, atype, rtype, spec) \ + static int name(struct ssam_device *sdev, const atype *arg, rtype *ret) \ + { \ + return __raw_##name(sdev->ctrl, sdev->uid.target, \ + sdev->uid.instance, arg, ret); \ + } + /* -- Helpers for client-device notifiers. ---------------------------------- */ -- cgit v1.2.3 From 4a4ab610b8ae912c28a4fd28442ef24ed7a1a5bd Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 24 Jun 2022 22:57:58 +0200 Subject: platform/surface: aggregator: Move device registry helper functions to core module Move helper functions for client device registration to the core module. This simplifies addition of future DT/OF support and also allows us to split out the device hub drivers into their own module. At the same time, also improve device node validation a bit by not silently skipping devices with invalid device UID specifiers. Further, ensure proper lifetime management for the firmware/software nodes associated with the added devices. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20220624205800.1355621-2-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/surface_aggregator/device.h | 52 +++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index 6cf7e80312d5..46c45d1b6368 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -375,11 +376,62 @@ void ssam_device_driver_unregister(struct ssam_device_driver *d); /* -- Helpers for controller and hub devices. ------------------------------- */ #ifdef CONFIG_SURFACE_AGGREGATOR_BUS + +int __ssam_register_clients(struct device *parent, struct ssam_controller *ctrl, + struct fwnode_handle *node); void ssam_remove_clients(struct device *dev); + #else /* CONFIG_SURFACE_AGGREGATOR_BUS */ + +static inline int __ssam_register_clients(struct device *parent, struct ssam_controller *ctrl, + struct fwnode_handle *node) +{ + return 0; +} + static inline void ssam_remove_clients(struct device *dev) {} + #endif /* CONFIG_SURFACE_AGGREGATOR_BUS */ +/** + * ssam_register_clients() - Register all client devices defined under the + * given parent device. + * @dev: The parent device under which clients should be registered. + * @ctrl: The controller with which client should be registered. + * + * Register all clients that have via firmware nodes been defined as children + * of the given (parent) device. The respective child firmware nodes will be + * associated with the correspondingly created child devices. + * + * The given controller will be used to instantiate the new devices. See + * ssam_device_add() for details. + * + * Return: Returns zero on success, nonzero on failure. + */ +static inline int ssam_register_clients(struct device *dev, struct ssam_controller *ctrl) +{ + return __ssam_register_clients(dev, ctrl, dev_fwnode(dev)); +} + +/** + * ssam_device_register_clients() - Register all client devices defined under + * the given SSAM parent device. + * @sdev: The parent device under which clients should be registered. + * + * Register all clients that have via firmware nodes been defined as children + * of the given (parent) device. The respective child firmware nodes will be + * associated with the correspondingly created child devices. + * + * The controller used by the parent device will be used to instantiate the new + * devices. See ssam_device_add() for details. + * + * Return: Returns zero on success, nonzero on failure. + */ +static inline int ssam_device_register_clients(struct ssam_device *sdev) +{ + return ssam_register_clients(&sdev->dev, sdev->ctrl); +} + /* -- Helpers for client-device requests. ----------------------------------- */ -- cgit v1.2.3 From 504148fedb854299972d164b001357b888a9193e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Jun 2022 15:07:50 +0000 Subject: net: add skb_[inner_]tcp_all_headers helpers Most drivers use "skb_transport_offset(skb) + tcp_hdrlen(skb)" to compute headers length for a TCP packet, but others use more convoluted (but equivalent) ways. Add skb_tcp_all_headers() and skb_inner_tcp_all_headers() helpers to harmonize this a bit. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1168302b7927..a9fbe22732c3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -46,6 +46,36 @@ static inline unsigned int inner_tcp_hdrlen(const struct sk_buff *skb) return inner_tcp_hdr(skb)->doff * 4; } +/** + * skb_tcp_all_headers - Returns size of all headers for a TCP packet + * @skb: buffer + * + * Used in TX path, for a packet known to be a TCP one. + * + * if (skb_is_gso(skb)) { + * int hlen = skb_tcp_all_headers(skb); + * ... + */ +static inline int skb_tcp_all_headers(const struct sk_buff *skb) +{ + return skb_transport_offset(skb) + tcp_hdrlen(skb); +} + +/** + * skb_inner_tcp_all_headers - Returns size of all headers for an encap TCP packet + * @skb: buffer + * + * Used in TX path, for a packet known to be a TCP one. + * + * if (skb_is_gso(skb) && skb->encapsulation) { + * int hlen = skb_inner_tcp_all_headers(skb); + * ... + */ +static inline int skb_inner_tcp_all_headers(const struct sk_buff *skb) +{ + return skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); +} + static inline unsigned int tcp_optlen(const struct sk_buff *skb) { return (tcp_hdr(skb)->doff - 5) * 4; -- cgit v1.2.3 From f019679ea5f2ab650c3348a79e7d9c3625f62899 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 30 May 2022 06:07:57 +0300 Subject: net/mlx5: E-switch, Remove dependency between sriov and eswitch mode Currently, there are three eswitch modes, none, legacy and switchdev. None is the default mode. Remove redundant none mode as eswitch mode should always be either legacy mode or switchdev mode. With this patch, there are two behavior changes: 1. Legacy becomes the default mode. When querying eswitch mode using devlink, a valid mode is always returned. 2. When disabling sriov, the eswitch mode will not change, only vfs are unloaded. Signed-off-by: Chris Mi Reviewed-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 8b18fe9771f9..e2701ed0200e 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -12,7 +12,6 @@ #define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager) enum { - MLX5_ESWITCH_NONE, MLX5_ESWITCH_LEGACY, MLX5_ESWITCH_OFFLOADS }; @@ -153,7 +152,7 @@ struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw); static inline u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev) { - return MLX5_ESWITCH_NONE; + return MLX5_ESWITCH_LEGACY; } static inline enum devlink_eswitch_encap_mode @@ -198,6 +197,11 @@ static inline struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitc #endif /* CONFIG_MLX5_ESWITCH */ +static inline bool is_mdev_legacy_mode(struct mlx5_core_dev *dev) +{ + return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_LEGACY; +} + static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev) { return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS; -- cgit v1.2.3 From 036bff2800cbcf8217dd0bc93d8421b5b8f72476 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Tue, 28 Jun 2022 18:31:28 +0200 Subject: can: netlink: dump bitrate 0 if can_priv::bittiming.bitrate is -1U Upcoming changes on slcan driver will require you to specify a bitrate of value -1 to prevent the open_candev() from failing but at the same time highlighting that it is a fake value. In this case the command `ip --details -s -s link show' would print 4294967295 as the bitrate value. The patch change this value in 0. Link: https://lore.kernel.org/all/20220628163137.413025-5-dario.binacchi@amarulasolutions.com Suggested-by: Marc Kleine-Budde Signed-off-by: Dario Binacchi Tested-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- include/linux/can/bittiming.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index 7ae21c0f7f23..ef0a77173e3c 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -11,6 +11,8 @@ #define CAN_SYNC_SEG 1 +#define CAN_BITRATE_UNSET 0 +#define CAN_BITRATE_UNKNOWN (-1U) #define CAN_CTRLMODE_TDC_MASK \ (CAN_CTRLMODE_TDC_AUTO | CAN_CTRLMODE_TDC_MANUAL) -- cgit v1.2.3 From cffe57bee62b155c08d71218fc0e9e84a0a90bbb Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 22 Jun 2022 15:45:46 +0700 Subject: Documentation: highmem: use literal block for code example in highmem.h comment When building htmldocs on Linus's tree, there are inline emphasis warnings on include/linux/highmem.h: Documentation/vm/highmem:166: ./include/linux/highmem.h:154: WARNING: Inline emphasis start-string without end-string. Documentation/vm/highmem:166: ./include/linux/highmem.h:157: WARNING: Inline emphasis start-string without end-string. These warnings above are due to comments in code example at the mentioned lines above are enclosed by double dash (--), which confuses Sphinx as inline markup delimiters instead. Fix these warnings by indenting the code example with literal block indentation and making the comments C comments. Link: https://lkml.kernel.org/r/20220622084546.17745-1-bagasdotme@gmail.com Fixes: 85a85e7601263f ("Documentation/vm: move "Using kmap-atomic" to highmem.h") Signed-off-by: Bagas Sanjaya Reviewed-by: Ira Weiny Tested-by: Ira Weiny Cc: "Matthew Wilcox (Oracle)" Cc: "Fabio M. De Francesco" Cc: Sebastian Andrzej Siewior Signed-off-by: Andrew Morton --- include/linux/highmem.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 3af34de54330..56d6a0196534 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -149,19 +149,19 @@ static inline void *kmap_local_folio(struct folio *folio, size_t offset); * It is used in atomic context when code wants to access the contents of a * page that might be allocated from high memory (see __GFP_HIGHMEM), for * example a page in the pagecache. The API has two functions, and they - * can be used in a manner similar to the following: + * can be used in a manner similar to the following:: * - * -- Find the page of interest. -- - * struct page *page = find_get_page(mapping, offset); + * // Find the page of interest. + * struct page *page = find_get_page(mapping, offset); * - * -- Gain access to the contents of that page. -- - * void *vaddr = kmap_atomic(page); + * // Gain access to the contents of that page. + * void *vaddr = kmap_atomic(page); * - * -- Do something to the contents of that page. -- - * memset(vaddr, 0, PAGE_SIZE); + * // Do something to the contents of that page. + * memset(vaddr, 0, PAGE_SIZE); * - * -- Unmap that page. -- - * kunmap_atomic(vaddr); + * // Unmap that page. + * kunmap_atomic(vaddr); * * Note that the kunmap_atomic() call takes the result of the kmap_atomic() * call, not the argument. -- cgit v1.2.3 From 507db7927cd181d409dd495c8384b8e14c21c600 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Sun, 3 Jul 2022 18:08:36 -0700 Subject: mm: rmap: use the correct parameter name for DEFINE_PAGE_VMA_WALK The parameter used by DEFINE_PAGE_VMA_WALK is _page not page, fix the parameter name. It didn't cause any build error, it is probably because the only caller is write_protect_page() from ksm.c, which pass in page. Link: https://lkml.kernel.org/r/20220512174551.81279-1-shy828301@gmail.com Fixes: 2aff7a4755be ("mm: Convert page_vma_mapped_walk to work on PFNs") Signed-off-by: Yang Shi Reviewed-by: Muchun Song Reviewed-by: Matthew Wilcox (Oracle) Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/rmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 9ec23138e410..bf80adca980b 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -325,8 +325,8 @@ struct page_vma_mapped_walk { #define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags) \ struct page_vma_mapped_walk name = { \ .pfn = page_to_pfn(_page), \ - .nr_pages = compound_nr(page), \ - .pgoff = page_to_pgoff(page), \ + .nr_pages = compound_nr(_page), \ + .pgoff = page_to_pgoff(_page), \ .vma = _vma, \ .address = _address, \ .flags = _flags, \ -- cgit v1.2.3 From c453d8c7d1384d7e1d7f26d3ec0d527092edf801 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 13 May 2022 12:17:05 -0700 Subject: mm/page_vma_mapped.c: check possible huge PMD map with transhuge_vma_suitable() IIUC page_vma_mapped_walk() checks if the vma is possibly huge PMD mapped with transparent_hugepage_active() and "pvmw->nr_pages >= HPAGE_PMD_NR". Actually pvmw->nr_pages is returned by compound_nr() or folio_nr_pages(), so the page should be THP as long as "pvmw->nr_pages >= HPAGE_PMD_NR". And it is guaranteed THP is allocated for valid VMA in the first place. But it may be not PMD mapped if the VMA is file VMA and it is not properly aligned. The transhuge_vma_suitable() is used to do such check, so replace transparent_hugepage_active() to it, which is too heavy and overkilling. Link: https://lkml.kernel.org/r/20220513191705.457775-1-shy828301@gmail.com Signed-off-by: Yang Shi Reviewed-by: Muchun Song Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index de29821231c9..648cb3ce7099 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -117,8 +117,10 @@ extern struct kobj_attribute shmem_enabled_attr; extern unsigned long transparent_hugepage_flags; static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, - unsigned long haddr) + unsigned long addr) { + unsigned long haddr; + /* Don't have to check pgoff for anonymous vma */ if (!vma_is_anonymous(vma)) { if (!IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, @@ -126,6 +128,8 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, return false; } + haddr = addr & HPAGE_PMD_MASK; + if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) return false; return true; @@ -342,7 +346,7 @@ static inline bool transparent_hugepage_active(struct vm_area_struct *vma) } static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, - unsigned long haddr) + unsigned long addr) { return false; } -- cgit v1.2.3 From 7ce82f4c3f3ead13a9d9498768e3b1a79975c4d8 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 30 May 2022 19:30:15 +0800 Subject: mm/migration: return errno when isolate_huge_page failed We might fail to isolate huge page due to e.g. the page is under migration which cleared HPageMigratable. We should return errno in this case rather than always return 1 which could confuse the user, i.e. the caller might think all of the memory is migrated while the hugetlb page is left behind. We make the prototype of isolate_huge_page consistent with isolate_lru_page as suggested by Huang Ying and rename isolate_huge_page to isolate_hugetlb as suggested by Muchun to improve the readability. Link: https://lkml.kernel.org/r/20220530113016.16663-4-linmiaohe@huawei.com Fixes: e8db67eb0ded ("mm: migrate: move_pages() supports thp migration") Signed-off-by: Miaohe Lin Suggested-by: Huang Ying Reported-by: kernel test robot (build error) Cc: Alistair Popple Cc: Christoph Hellwig Cc: Christoph Lameter Cc: David Hildenbrand Cc: David Howells Cc: Mike Kravetz Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Xu Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e4cff27d1198..756b66ff025e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -170,7 +170,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); -bool isolate_huge_page(struct page *page, struct list_head *list); +int isolate_hugetlb(struct page *page, struct list_head *list); int get_hwpoison_huge_page(struct page *page, bool *hugetlb); int get_huge_page_for_hwpoison(unsigned long pfn, int flags); void putback_active_hugepage(struct page *page); @@ -376,9 +376,9 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, return NULL; } -static inline bool isolate_huge_page(struct page *page, struct list_head *list) +static inline int isolate_hugetlb(struct page *page, struct list_head *list) { - return false; + return -EBUSY; } static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb) -- cgit v1.2.3 From ad1ac596e8a8c4b06715dfbd89853eb73c9886b2 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 30 May 2022 19:30:16 +0800 Subject: mm/migration: fix potential pte_unmap on an not mapped pte __migration_entry_wait and migration_entry_wait_on_locked assume pte is always mapped from caller. But this is not the case when it's called from migration_entry_wait_huge and follow_huge_pmd. Add a hugetlbfs variant that calls hugetlb_migration_entry_wait(ptep == NULL) to fix this issue. Link: https://lkml.kernel.org/r/20220530113016.16663-5-linmiaohe@huawei.com Fixes: 30dad30922cc ("mm: migration: add migrate_entry_wait_huge()") Signed-off-by: Miaohe Lin Suggested-by: David Hildenbrand Reviewed-by: David Hildenbrand Cc: Alistair Popple Cc: Christoph Hellwig Cc: Christoph Lameter Cc: David Howells Cc: Huang Ying Cc: kernel test robot Cc: Mike Kravetz Cc: Muchun Song Cc: Oscar Salvador Cc: Peter Xu Signed-off-by: Andrew Morton --- include/linux/swapops.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swapops.h b/include/linux/swapops.h index f24775b41880..bb7afd03a324 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -244,8 +244,10 @@ extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, spinlock_t *ptl); extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); -extern void migration_entry_wait_huge(struct vm_area_struct *vma, - struct mm_struct *mm, pte_t *pte); +#ifdef CONFIG_HUGETLB_PAGE +extern void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl); +extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte); +#endif #else static inline swp_entry_t make_readable_migration_entry(pgoff_t offset) { @@ -271,8 +273,10 @@ static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, spinlock_t *ptl) { } static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } -static inline void migration_entry_wait_huge(struct vm_area_struct *vma, - struct mm_struct *mm, pte_t *pte) { } +#ifdef CONFIG_HUGETLB_PAGE +static inline void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl) { } +static inline void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte) { } +#endif static inline int is_writable_migration_entry(swp_entry_t entry) { return 0; -- cgit v1.2.3 From c9e124e0382d83d458db204f929002ea98daa6a8 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 6 Jun 2022 18:23:06 +0000 Subject: mm/damon/{dbgfs,sysfs}: move target_has_pid() from dbgfs to damon.h The function for knowing if given monitoring context's targets will have pid or not is defined and used in dbgfs only. However, the logic is also needed for sysfs. This commit moves the code to damon.h and makes both dbgfs and sysfs to use it. Link: https://lkml.kernel.org/r/20220606182310.48781-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/damon.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 2765c7d99beb..b9aae19fab3e 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -525,6 +525,12 @@ bool damon_is_registered_ops(enum damon_ops_id id); int damon_register_ops(struct damon_operations *ops); int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id); +static inline bool damon_target_has_pid(const struct damon_ctx *ctx) +{ + return ctx->ops.id == DAMON_OPS_VADDR || ctx->ops.id == DAMON_OPS_FVADDR; +} + + int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); -- cgit v1.2.3 From d9da8f6cf55eeca642c021912af1890002464c64 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 9 Jun 2022 20:18:46 +0200 Subject: mm: introduce clear_highpage_kasan_tagged Add a clear_highpage_kasan_tagged() helper that does clear_highpage() on a page potentially tagged by KASAN. This helper is used by the following patch. Link: https://lkml.kernel.org/r/4471979b46b2c487787ddcd08b9dc5fedd1b6ffd.1654798516.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Marco Elver Signed-off-by: Andrew Morton --- include/linux/highmem.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index fee9835e3793..22379a63e293 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -243,6 +243,16 @@ static inline void clear_highpage(struct page *page) kunmap_local(kaddr); } +static inline void clear_highpage_kasan_tagged(struct page *page) +{ + u8 tag; + + tag = page_kasan_tag(page); + page_kasan_tag_reset(page); + clear_highpage(page); + page_kasan_tag_set(page, tag); +} + #ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE static inline void tag_clear_highpage(struct page *page) -- cgit v1.2.3 From c15187a4a2d660bf490f7873afd0de5288f65c8f Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 31 May 2022 20:22:22 -0700 Subject: mm: memcontrol: introduce mem_cgroup_ino() and mem_cgroup_get_from_ino() Patch series "mm: introduce shrinker debugfs interface", v5. The only existing debugging mechanism is a couple of tracepoints in do_shrink_slab(): mm_shrink_slab_start and mm_shrink_slab_end. They aren't covering everything though: shrinkers which report 0 objects will never show up, there is no support for memcg-aware shrinkers. Shrinkers are identified by their scan function, which is not always enough (e.g. hard to guess which super block's shrinker it is having only "super_cache_scan"). To provide a better visibility and debug options for memory shrinkers this patchset introduces a /sys/kernel/debug/shrinker interface, to some extent similar to /sys/kernel/slab. For each shrinker registered in the system a directory is created. As now, the directory will contain only a "scan" file, which allows to get the number of managed objects for each memory cgroup (for memcg-aware shrinkers) and each numa node (for numa-aware shrinkers on a numa machine). Other interfaces might be added in the future. To make debugging more pleasant, the patchset also names all shrinkers, so that debugfs entries can have meaningful names. This patch (of 5): Shrinker debugfs requires a way to represent memory cgroups without using full paths, both for displaying information and getting input from a user. Cgroup inode number is a perfect way, already used by bpf. This commit adds a couple of helper functions which will be used to handle memcg-aware shrinkers. Link: https://lkml.kernel.org/r/20220601032227.4076670-1-roman.gushchin@linux.dev Link: https://lkml.kernel.org/r/20220601032227.4076670-2-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Acked-by: Muchun Song Cc: Dave Chinner Cc: Kent Overstreet Cc: Hillf Danton Cc: Christophe JAILLET Cc: Roman Gushchin Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 04f2f33607e9..4d31ce55b1c0 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -837,6 +837,15 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) } struct mem_cgroup *mem_cgroup_from_id(unsigned short id); +#ifdef CONFIG_SHRINKER_DEBUG +static inline unsigned long mem_cgroup_ino(struct mem_cgroup *memcg) +{ + return memcg ? cgroup_ino(memcg->css.cgroup) : 0; +} + +struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino); +#endif + static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m) { return mem_cgroup_from_css(seq_css(m)); @@ -1343,6 +1352,18 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) return NULL; } +#ifdef CONFIG_SHRINKER_DEBUG +static inline unsigned long mem_cgroup_ino(struct mem_cgroup *memcg) +{ + return 0; +} + +static inline struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino) +{ + return NULL; +} +#endif + static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m) { return NULL; -- cgit v1.2.3 From 5035ebc644aec92d55d1bbfe042f35341e4bffb5 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 31 May 2022 20:22:23 -0700 Subject: mm: shrinkers: introduce debugfs interface for memory shrinkers This commit introduces the /sys/kernel/debug/shrinker debugfs interface which provides an ability to observe the state of individual kernel memory shrinkers. Because the feature adds some memory overhead (which shouldn't be large unless there is a huge amount of registered shrinkers), it's guarded by a config option (enabled by default). This commit introduces the "count" interface for each shrinker registered in the system. The output is in the following format: ... ... ... To reduce the size of output on machines with many thousands cgroups, if the total number of objects on all nodes is 0, the line is omitted. If the shrinker is not memcg-aware or CONFIG_MEMCG is off, 0 is printed as cgroup inode id. If the shrinker is not numa-aware, 0's are printed for all nodes except the first one. This commit gives debugfs entries simple numeric names, which are not very convenient. The following commit in the series will provide shrinkers with more meaningful names. [akpm@linux-foundation.org: remove WARN_ON_ONCE(), per Roman] Reported-by: syzbot+300d27c79fe6d4cbcc39@syzkaller.appspotmail.com Link: https://lkml.kernel.org/r/20220601032227.4076670-3-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Reviewed-by: Kent Overstreet Acked-by: Muchun Song Cc: Christophe JAILLET Cc: Dave Chinner Cc: Hillf Danton Signed-off-by: Andrew Morton --- include/linux/shrinker.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 76fbf92b04d9..2ced8149c513 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -72,6 +72,10 @@ struct shrinker { #ifdef CONFIG_MEMCG /* ID in shrinker_idr */ int id; +#endif +#ifdef CONFIG_SHRINKER_DEBUG + int debugfs_id; + struct dentry *debugfs_entry; #endif /* objs pending delete, per node */ atomic_long_t *nr_deferred; @@ -94,4 +98,17 @@ extern int register_shrinker(struct shrinker *shrinker); extern void unregister_shrinker(struct shrinker *shrinker); extern void free_prealloced_shrinker(struct shrinker *shrinker); extern void synchronize_shrinkers(void); -#endif + +#ifdef CONFIG_SHRINKER_DEBUG +extern int shrinker_debugfs_add(struct shrinker *shrinker); +extern void shrinker_debugfs_remove(struct shrinker *shrinker); +#else /* CONFIG_SHRINKER_DEBUG */ +static inline int shrinker_debugfs_add(struct shrinker *shrinker) +{ + return 0; +} +static inline void shrinker_debugfs_remove(struct shrinker *shrinker) +{ +} +#endif /* CONFIG_SHRINKER_DEBUG */ +#endif /* _LINUX_SHRINKER_H */ -- cgit v1.2.3 From e33c267ab70de4249d22d7eab1cc7d68a889bac2 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 31 May 2022 20:22:24 -0700 Subject: mm: shrinkers: provide shrinkers with names Currently shrinkers are anonymous objects. For debugging purposes they can be identified by count/scan function names, but it's not always useful: e.g. for superblock's shrinkers it's nice to have at least an idea of to which superblock the shrinker belongs. This commit adds names to shrinkers. register_shrinker() and prealloc_shrinker() functions are extended to take a format and arguments to master a name. In some cases it's not possible to determine a good name at the time when a shrinker is allocated. For such cases shrinker_debugfs_rename() is provided. The expected format is: -[:]- For some shrinkers an instance can be encoded as (MAJOR:MINOR) pair. After this change the shrinker debugfs directory looks like: $ cd /sys/kernel/debug/shrinker/ $ ls dquota-cache-16 sb-devpts-28 sb-proc-47 sb-tmpfs-42 mm-shadow-18 sb-devtmpfs-5 sb-proc-48 sb-tmpfs-43 mm-zspool:zram0-34 sb-hugetlbfs-17 sb-pstore-31 sb-tmpfs-44 rcu-kfree-0 sb-hugetlbfs-33 sb-rootfs-2 sb-tmpfs-49 sb-aio-20 sb-iomem-12 sb-securityfs-6 sb-tracefs-13 sb-anon_inodefs-15 sb-mqueue-21 sb-selinuxfs-22 sb-xfs:vda1-36 sb-bdev-3 sb-nsfs-4 sb-sockfs-8 sb-zsmalloc-19 sb-bpf-32 sb-pipefs-14 sb-sysfs-26 thp-deferred_split-10 sb-btrfs:vda2-24 sb-proc-25 sb-tmpfs-1 thp-zero-9 sb-cgroup2-30 sb-proc-39 sb-tmpfs-27 xfs-buf:vda1-37 sb-configfs-23 sb-proc-41 sb-tmpfs-29 xfs-inodegc:vda1-38 sb-dax-11 sb-proc-45 sb-tmpfs-35 sb-debugfs-7 sb-proc-46 sb-tmpfs-40 [roman.gushchin@linux.dev: fix build warnings] Link: https://lkml.kernel.org/r/Yr+ZTnLb9lJk6fJO@castle Reported-by: kernel test robot Link: https://lkml.kernel.org/r/20220601032227.4076670-4-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Cc: Christophe JAILLET Cc: Dave Chinner Cc: Hillf Danton Cc: Kent Overstreet Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/shrinker.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 2ced8149c513..08e6054e061f 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -75,6 +75,7 @@ struct shrinker { #endif #ifdef CONFIG_SHRINKER_DEBUG int debugfs_id; + const char *name; struct dentry *debugfs_entry; #endif /* objs pending delete, per node */ @@ -92,9 +93,11 @@ struct shrinker { */ #define SHRINKER_NONSLAB (1 << 3) -extern int prealloc_shrinker(struct shrinker *shrinker); +extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker, + const char *fmt, ...); extern void register_shrinker_prepared(struct shrinker *shrinker); -extern int register_shrinker(struct shrinker *shrinker); +extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker, + const char *fmt, ...); extern void unregister_shrinker(struct shrinker *shrinker); extern void free_prealloced_shrinker(struct shrinker *shrinker); extern void synchronize_shrinkers(void); @@ -102,6 +105,8 @@ extern void synchronize_shrinkers(void); #ifdef CONFIG_SHRINKER_DEBUG extern int shrinker_debugfs_add(struct shrinker *shrinker); extern void shrinker_debugfs_remove(struct shrinker *shrinker); +extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, + const char *fmt, ...); #else /* CONFIG_SHRINKER_DEBUG */ static inline int shrinker_debugfs_add(struct shrinker *shrinker) { @@ -110,5 +115,10 @@ static inline int shrinker_debugfs_add(struct shrinker *shrinker) static inline void shrinker_debugfs_remove(struct shrinker *shrinker) { } +static inline __printf(2, 3) +int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) +{ + return 0; +} #endif /* CONFIG_SHRINKER_DEBUG */ #endif /* _LINUX_SHRINKER_H */ -- cgit v1.2.3 From 8cdcc532268df0893d9756f537cbce479f4c4831 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 13 Jun 2022 19:22:56 +0000 Subject: mm/damon/schemes: add 'LRU_PRIO' DAMOS action This commit adds a new DAMOS action called 'LRU_PRIO' for the physical address space. The action prioritizes pages in the memory regions of the user-specified target access pattern on their LRU lists. This is hence supposed to be used for frequently accessed (hot) memory regions so that hot pages could be more likely protected under memory pressure. Internally, it simply calls 'mark_page_accessed()'. Link: https://lkml.kernel.org/r/20220613192301.8817-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/damon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index b9aae19fab3e..4c64e03e94d8 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -86,6 +86,7 @@ struct damon_target { * @DAMOS_PAGEOUT: Call ``madvise()`` for the region with MADV_PAGEOUT. * @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE. * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. + * @DAMOS_LRU_PRIO: Prioritize the region on its LRU lists. * @DAMOS_STAT: Do nothing but count the stat. * @NR_DAMOS_ACTIONS: Total number of DAMOS actions */ @@ -95,6 +96,7 @@ enum damos_action { DAMOS_PAGEOUT, DAMOS_HUGEPAGE, DAMOS_NOHUGEPAGE, + DAMOS_LRU_PRIO, DAMOS_STAT, /* Do nothing but only record the stat */ NR_DAMOS_ACTIONS, }; -- cgit v1.2.3 From 99cdc2cd180a7adc87badc9ca92f8af803d8bf3b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 13 Jun 2022 19:22:58 +0000 Subject: mm/damon/schemes: add 'LRU_DEPRIO' action This commit adds a new DAMON-based operation scheme action called 'LRU_DEPRIO' for physical address space. The action deprioritizes pages in the memory area of the target access pattern on their LRU lists. This is hence supposed to be used for rarely accessed (cold) memory regions so that cold pages could be more likely reclaimed first under memory pressure. Internally, it simply calls 'lru_deactivate()'. Using this with 'LRU_PRIO' action for hot pages, users can proactively sort LRU lists based on the access pattern. That is, it can make the LRU lists somewhat more trustworthy source of access temperature. As a result, efficiency of LRU-lists based mechanisms including the reclamation target selection could be improved. Link: https://lkml.kernel.org/r/20220613192301.8817-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/damon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 4c64e03e94d8..7b1f4a488230 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -87,6 +87,7 @@ struct damon_target { * @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE. * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. * @DAMOS_LRU_PRIO: Prioritize the region on its LRU lists. + * @DAMOS_LRU_DEPRIO: Deprioritize the region on its LRU lists. * @DAMOS_STAT: Do nothing but count the stat. * @NR_DAMOS_ACTIONS: Total number of DAMOS actions */ @@ -97,6 +98,7 @@ enum damos_action { DAMOS_HUGEPAGE, DAMOS_NOHUGEPAGE, DAMOS_LRU_PRIO, + DAMOS_LRU_DEPRIO, DAMOS_STAT, /* Do nothing but only record the stat */ NR_DAMOS_ACTIONS, }; -- cgit v1.2.3 From 64fe24a3e05e5f3ac56fcd45afd2fd1d9cc8fcb6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 14 Jun 2022 11:36:29 +0200 Subject: mm/mprotect: try avoiding write faults for exclusive anonymous pages when changing protection Similar to our MM_CP_DIRTY_ACCT handling for shared, writable mappings, we can try mapping anonymous pages in a private writable mapping writable if they are exclusive, the PTE is already dirty, and no special handling applies. Mapping the anonymous page writable is essentially the same thing the write fault handler would do in this case. Special handling is required for uffd-wp and softdirty tracking, so take care of that properly. Also, leave PROT_NONE handling alone for now; in the future, we could similarly extend the logic in do_numa_page() or use pte_mk_savedwrite() here. While this improves mprotect(PROT_READ)+mprotect(PROT_READ|PROT_WRITE) performance, it should also be a valuable optimization for uffd-wp, when un-protecting. This has been previously suggested by Peter Collingbourne in [1], relevant in the context of the Scudo memory allocator, before we had PageAnonExclusive. This commit doesn't add the same handling for PMDs (i.e., anonymous THP, anonymous hugetlb); benchmark results from Andrea indicate that there are minor performance gains, so it's might still be valuable to streamline that logic for all anonymous pages in the future. As we now also set MM_CP_DIRTY_ACCT for private mappings, let's rename it to MM_CP_TRY_CHANGE_WRITABLE, to make it clearer what's actually happening. Micro-benchmark courtesy of Andrea: === #define _GNU_SOURCE #include #include #include #include #include #define SIZE (1024*1024*1024) int main(int argc, char *argv[]) { char *p; if (posix_memalign((void **)&p, sysconf(_SC_PAGESIZE)*512, SIZE)) perror("posix_memalign"), exit(1); if (madvise(p, SIZE, argc > 1 ? MADV_HUGEPAGE : MADV_NOHUGEPAGE)) perror("madvise"); explicit_bzero(p, SIZE); for (int loops = 0; loops < 40; loops++) { if (mprotect(p, SIZE, PROT_READ)) perror("mprotect"), exit(1); if (mprotect(p, SIZE, PROT_READ|PROT_WRITE)) perror("mprotect"), exit(1); explicit_bzero(p, SIZE); } } === Results on my Ryzen 9 3900X: Stock 10 runs (lower is better): AVG 6.398s, STDEV 0.043 Patched 10 runs (lower is better): AVG 3.780s, STDEV 0.026 === [1] https://lkml.kernel.org/r/20210429214801.2583336-1-pcc@google.com Link: https://lkml.kernel.org/r/20220614093629.76309-1-david@redhat.com Signed-off-by: David Hildenbrand Suggested-by: Peter Collingbourne Acked-by: Peter Xu Cc: Nadav Amit Cc: Dave Hansen Cc: Andrea Arcangeli Cc: Yang Shi Cc: Hugh Dickins Cc: Mel Gorman Signed-off-by: Andrew Morton --- include/linux/mm.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index cf3d0d673f6b..09ea26056e2f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1962,8 +1962,12 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma, * for now all the callers are only use one of the flags at the same * time. */ -/* Whether we should allow dirty bit accounting */ -#define MM_CP_DIRTY_ACCT (1UL << 0) +/* + * Whether we should manually check if we can map individual PTEs writable, + * because something (e.g., COW, uffd-wp) blocks that from happening for all + * PTEs automatically in a writable mapping. + */ +#define MM_CP_TRY_CHANGE_WRITABLE (1UL << 0) /* Whether this protection change is for NUMA hints */ #define MM_CP_PROT_NUMA (1UL << 1) /* Whether this change is for write protecting */ -- cgit v1.2.3 From b8cecb9376b9d3031cf62b476a0db087b6b01072 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 17 Jun 2022 16:42:44 +0100 Subject: mm/vmscan: convert reclaim_clean_pages_from_list() to folios Patch series "nvert much of vmscan to folios" vmscan always operates on folios since it puts the pages on the LRU list. Switching all of these functions from pages to folios saves 1483 bytes of text from removing all the baggage around calling compound_page() and similar functions. This patch (of 5): This is a straightforward conversion which removes several hidden calls to compound_head, saving 330 bytes of kernel text. Link: https://lkml.kernel.org/r/20220617154248.700416-1-willy@infradead.org Link: https://lkml.kernel.org/r/20220617154248.700416-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e66f7aa3191d..f32aade2a6e0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -670,6 +670,12 @@ static __always_inline bool PageAnon(struct page *page) return folio_test_anon(page_folio(page)); } +static __always_inline bool __folio_test_movable(const struct folio *folio) +{ + return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == + PAGE_MAPPING_MOVABLE; +} + static __always_inline int __PageMovable(struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == -- cgit v1.2.3 From e3c4cebf3f9db8c9150eb1982da7e353d9938bed Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 17 Jun 2022 18:49:59 +0100 Subject: mm: add folios_put() Patch series "Convert the swap code to be more folio-based". There's still more to do with the swap code, but this reaps a lot of the folio benefit. More than 4kB of kernel text saved (with the UEK7 kernel config). I don't know how much that's going to translate into CPU savings, but some of those compound_head() calls are on every page free, so it should be noticable. It might even be noticable just from an I-cache consumption perspective. This patch (of 22): This is just a wrapper around release_pages() for now. Place the prototype in mm.h along with folio_put() and folio_put_refs(). Link: https://lkml.kernel.org/r/20220617175020.717127-1-willy@infradead.org Link: https://lkml.kernel.org/r/20220617175020.717127-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 19 +++++++++++++++++++ include/linux/pagemap.h | 2 -- 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 09ea26056e2f..09670ccb94e7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1220,6 +1220,25 @@ static inline void folio_put_refs(struct folio *folio, int refs) __put_page(&folio->page); } +void release_pages(struct page **pages, int nr); + +/** + * folios_put - Decrement the reference count on an array of folios. + * @folios: The folios. + * @nr: How many folios there are. + * + * Like folio_put(), but for an array of folios. This is more efficient + * than writing the loop yourself as it will optimise the locks which + * need to be taken if the folios are freed. + * + * Context: May be called in process or interrupt context, but not in NMI + * context. May be called while holding a spinlock. + */ +static inline void folios_put(struct folio **folios, unsigned int nr) +{ + release_pages((struct page **)folios, nr); +} + static inline void put_page(struct page *page) { struct folio *folio = page_folio(page); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ce96866fbec4..c399a9c5da7d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -345,8 +345,6 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping) #endif } -void release_pages(struct page **pages, int nr); - struct address_space *page_mapping(struct page *); struct address_space *folio_mapping(struct folio *); struct address_space *swapcache_mapping(struct folio *); -- cgit v1.2.3 From 7d80dd096f8f889128f67a2d452e4dadeed71e63 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 17 Jun 2022 18:50:01 +0100 Subject: mm/swap: make __pagevec_lru_add static __pagevec_lru_add has no callers outside swap.c, so make it static, and move it to a more logical position in the file. Link: https://lkml.kernel.org/r/20220617175020.717127-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/pagevec.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 67b1246f136b..b0e3540f3a4c 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -26,7 +26,6 @@ struct pagevec { }; void __pagevec_release(struct pagevec *pvec); -void __pagevec_lru_add(struct pagevec *pvec); unsigned pagevec_lookup_range(struct pagevec *pvec, struct address_space *mapping, pgoff_t *start, pgoff_t end); -- cgit v1.2.3 From 8d29c7036f5ff360ea1f51b9fed5d909be7c8094 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 17 Jun 2022 18:50:13 +0100 Subject: mm/swap: convert __put_page() to __folio_put() Saves 11 bytes of text by removing a check of PageTail. Link: https://lkml.kernel.org/r/20220617175020.717127-16-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 09670ccb94e7..3fb49aec13fd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -855,7 +855,7 @@ static inline struct folio *virt_to_folio(const void *x) return page_folio(page); } -void __put_page(struct page *page); +void __folio_put(struct folio *folio); void put_pages_list(struct list_head *pages); @@ -1197,7 +1197,7 @@ static inline __must_check bool try_get_page(struct page *page) static inline void folio_put(struct folio *folio) { if (folio_put_testzero(folio)) - __put_page(&folio->page); + __folio_put(folio); } /** @@ -1217,7 +1217,7 @@ static inline void folio_put(struct folio *folio) static inline void folio_put_refs(struct folio *folio, int refs) { if (folio_ref_sub_and_test(folio, refs)) - __put_page(&folio->page); + __folio_put(folio); } void release_pages(struct page **pages, int nr); -- cgit v1.2.3 From 5375336c8c42a343c3b440b6f1e21c65e7b174b9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 17 Jun 2022 18:50:17 +0100 Subject: mm: convert destroy_compound_page() to destroy_large_folio() All callers now have a folio, so push the folio->page conversion down to this function. [akpm@linux-foundation.org: uninline destroy_large_folio() to fix build issue] Link: https://lkml.kernel.org/r/20220617175020.717127-20-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3fb49aec13fd..9cc02a7e503b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -892,11 +892,7 @@ static inline void set_compound_page_dtor(struct page *page, page[1].compound_dtor = compound_dtor; } -static inline void destroy_compound_page(struct page *page) -{ - VM_BUG_ON_PAGE(page[1].compound_dtor >= NR_COMPOUND_DTORS, page); - compound_page_dtors[page[1].compound_dtor](page); -} +void destroy_large_folio(struct folio *folio); static inline int head_compound_pincount(struct page *head) { -- cgit v1.2.3 From ed7802dd48f7a507213cbb95bb4c6f1fe134eb5d Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 17 Jun 2022 21:56:49 +0800 Subject: mm: memory_hotplug: enumerate all supported section flags Patch series "make hugetlb_optimize_vmemmap compatible with memmap_on_memory", v3. This series makes hugetlb_optimize_vmemmap compatible with memmap_on_memory. This patch (of 2): We are almost running out of section flags, only one bit is available in the worst case (powerpc with 256k pages). However, there are still some free bits (in ->section_mem_map) on other architectures (e.g. x86_64 has 10 bits available, arm64 has 8 bits available with worst case of 64K pages). We have hard coded those numbers in code, it is inconvenient to use those bits on other architectures except powerpc. So transfer those section flags to enumeration to make it easy to add new section flags in the future. Also, move SECTION_TAINT_ZONE_DEVICE into the scope of CONFIG_ZONE_DEVICE to save a bit on non-zone-device case. [songmuchun@bytedance.com: replace enum with defines per David] Link: https://lkml.kernel.org/r/20220620110616.12056-2-songmuchun@bytedance.com Link: https://lkml.kernel.org/r/20220617135650.74901-1-songmuchun@bytedance.com Link: https://lkml.kernel.org/r/20220617135650.74901-2-songmuchun@bytedance.com Signed-off-by: Muchun Song Reviewed-by: David Hildenbrand Cc: Jonathan Corbet Cc: Mike Kravetz Cc: Oscar Salvador Cc: Paul E. McKenney Cc: Xiongchun Duan Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index aab70355d64f..2b5757752333 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1418,16 +1418,32 @@ extern size_t mem_section_usage_size(void); * (equal SECTION_SIZE_BITS - PAGE_SHIFT), and the * worst combination is powerpc with 256k pages, * which results in PFN_SECTION_SHIFT equal 6. - * To sum it up, at least 6 bits are available. + * To sum it up, at least 6 bits are available on all architectures. + * However, we can exceed 6 bits on some other architectures except + * powerpc (e.g. 15 bits are available on x86_64, 13 bits are available + * with the worst case of 64K pages on arm64) if we make sure the + * exceeded bit is not applicable to powerpc. */ -#define SECTION_MARKED_PRESENT (1UL<<0) -#define SECTION_HAS_MEM_MAP (1UL<<1) -#define SECTION_IS_ONLINE (1UL<<2) -#define SECTION_IS_EARLY (1UL<<3) -#define SECTION_TAINT_ZONE_DEVICE (1UL<<4) -#define SECTION_MAP_LAST_BIT (1UL<<5) -#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) -#define SECTION_NID_SHIFT 6 +enum { + SECTION_MARKED_PRESENT_BIT, + SECTION_HAS_MEM_MAP_BIT, + SECTION_IS_ONLINE_BIT, + SECTION_IS_EARLY_BIT, +#ifdef CONFIG_ZONE_DEVICE + SECTION_TAINT_ZONE_DEVICE_BIT, +#endif + SECTION_MAP_LAST_BIT, +}; + +#define SECTION_MARKED_PRESENT BIT(SECTION_MARKED_PRESENT_BIT) +#define SECTION_HAS_MEM_MAP BIT(SECTION_HAS_MEM_MAP_BIT) +#define SECTION_IS_ONLINE BIT(SECTION_IS_ONLINE_BIT) +#define SECTION_IS_EARLY BIT(SECTION_IS_EARLY_BIT) +#ifdef CONFIG_ZONE_DEVICE +#define SECTION_TAINT_ZONE_DEVICE BIT(SECTION_TAINT_ZONE_DEVICE_BIT) +#endif +#define SECTION_MAP_MASK (~(BIT(SECTION_MAP_LAST_BIT) - 1)) +#define SECTION_NID_SHIFT SECTION_MAP_LAST_BIT static inline struct page *__section_mem_map_addr(struct mem_section *section) { @@ -1466,12 +1482,19 @@ static inline int online_section(struct mem_section *section) return (section && (section->section_mem_map & SECTION_IS_ONLINE)); } +#ifdef CONFIG_ZONE_DEVICE static inline int online_device_section(struct mem_section *section) { unsigned long flags = SECTION_IS_ONLINE | SECTION_TAINT_ZONE_DEVICE; return section && ((section->section_mem_map & flags) == flags); } +#else +static inline int online_device_section(struct mem_section *section) +{ + return 0; +} +#endif static inline int online_section_nr(unsigned long nr) { -- cgit v1.2.3 From 66361095129b3b5d065e6c09cf0c085ef4a8c40f Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 17 Jun 2022 21:56:50 +0800 Subject: mm: memory_hotplug: make hugetlb_optimize_vmemmap compatible with memmap_on_memory For now, the feature of hugetlb_free_vmemmap is not compatible with the feature of memory_hotplug.memmap_on_memory, and hugetlb_free_vmemmap takes precedence over memory_hotplug.memmap_on_memory. However, someone wants to make memory_hotplug.memmap_on_memory takes precedence over hugetlb_free_vmemmap since memmap_on_memory makes it more likely to succeed memory hotplug in close-to-OOM situations. So the decision of making hugetlb_free_vmemmap take precedence is not wise and elegant. The proper approach is to have hugetlb_vmemmap.c do the check whether the section which the HugeTLB pages belong to can be optimized. If the section's vmemmap pages are allocated from the added memory block itself, hugetlb_free_vmemmap should refuse to optimize the vmemmap, otherwise, do the optimization. Then both kernel parameters are compatible. So this patch introduces VmemmapSelfHosted to mask any non-optimizable vmemmap pages. The hugetlb_vmemmap can use this flag to detect if a vmemmap page can be optimized. [songmuchun@bytedance.com: walk vmemmap page tables to avoid false-positive] Link: https://lkml.kernel.org/r/20220620110616.12056-3-songmuchun@bytedance.com Link: https://lkml.kernel.org/r/20220617135650.74901-3-songmuchun@bytedance.com Signed-off-by: Muchun Song Co-developed-by: Oscar Salvador Signed-off-by: Oscar Salvador Acked-by: David Hildenbrand Cc: Jonathan Corbet Cc: Mike Kravetz Cc: Paul E. McKenney Cc: Xiongchun Duan Signed-off-by: Andrew Morton --- include/linux/memory_hotplug.h | 9 --------- include/linux/page-flags.h | 11 +++++++++++ 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 20d7edf62a6a..e0b2209ab71c 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -351,13 +351,4 @@ void arch_remove_linear_mapping(u64 start, u64 size); extern bool mhp_supports_memmap_on_memory(unsigned long size); #endif /* CONFIG_MEMORY_HOTPLUG */ -#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY -bool mhp_memmap_on_memory(void); -#else -static inline bool mhp_memmap_on_memory(void) -{ - return false; -} -#endif - #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f32aade2a6e0..82719d33c0f1 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -193,6 +193,11 @@ enum pageflags { /* Only valid for buddy pages. Used to track pages that are reported */ PG_reported = PG_uptodate, + +#ifdef CONFIG_MEMORY_HOTPLUG + /* For self-hosted memmap pages */ + PG_vmemmap_self_hosted = PG_owner_priv_1, +#endif }; #define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) @@ -628,6 +633,12 @@ PAGEFLAG_FALSE(SkipKASanPoison, skip_kasan_poison) */ __PAGEFLAG(Reported, reported, PF_NO_COMPOUND) +#ifdef CONFIG_MEMORY_HOTPLUG +PAGEFLAG(VmemmapSelfHosted, vmemmap_self_hosted, PF_ANY) +#else +PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) +#endif + /* * On an anonymous page mapped into a user virtual memory area, * page->mapping points to its anon_vma, not to a struct address_space; -- cgit v1.2.3 From e8da368a1e42a8056d1a6b419e1b91b6cf11d77e Mon Sep 17 00:00:00 2001 From: Yun-Ze Li Date: Mon, 20 Jun 2022 07:15:16 +0000 Subject: mm, docs: fix comments that mention mem_hotplug_end() Comments that mention mem_hotplug_end() are confusing as there is no function called mem_hotplug_end(). Fix them by replacing all the occurences of mem_hotplug_end() in the comments with mem_hotplug_done(). [akpm@linux-foundation.org: grammatical fixes] Link: https://lkml.kernel.org/r/20220620071516.1286101-1-p76091292@gs.ncku.edu.tw Signed-off-by: Yun-Ze Li Cc: Souptick Joarder Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2b5757752333..735bf5b37949 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -591,8 +591,8 @@ struct zone { * give them a chance of being in the same cacheline. * * Write access to present_pages at runtime should be protected by - * mem_hotplug_begin/end(). Any reader who can't tolerant drift of - * present_pages should get_online_mems() to get a stable value. + * mem_hotplug_begin/done(). Any reader who can't tolerant drift of + * present_pages should use get_online_mems() to get a stable value. */ atomic_long_t managed_pages; unsigned long spanned_pages; @@ -870,7 +870,7 @@ typedef struct pglist_data { unsigned long nr_reclaim_start; /* nr pages written while throttled * when throttling started. */ struct task_struct *kswapd; /* Protected by - mem_hotplug_begin/end() */ + mem_hotplug_begin/done() */ int kswapd_order; enum zone_type kswapd_highest_zoneidx; -- cgit v1.2.3 From 18f3962953e40401b7ed98e8524167282c3e626e Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Sun, 26 Jun 2022 22:57:17 +0800 Subject: mm: hugetlb: kill set_huge_swap_pte_at() Commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at() helper") add set_huge_swap_pte_at() to handle swap entries on architectures that support hugepages consisting of contiguous ptes. And currently the set_huge_swap_pte_at() is only overridden by arm64. set_huge_swap_pte_at() provide a sz parameter to help determine the number of entries to be updated. But in fact, all hugetlb swap entries contain pfn information, so we can find the corresponding folio through the pfn recorded in the swap entry, then the folio_size() is the number of entries that need to be updated. And considering that users will easily cause bugs by ignoring the difference between set_huge_swap_pte_at() and set_huge_pte_at(). Let's handle swap entries in set_huge_pte_at() and remove the set_huge_swap_pte_at(), then we can call set_huge_pte_at() anywhere, which simplifies our coding. Link: https://lkml.kernel.org/r/20220626145717.53572-1-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Acked-by: Muchun Song Cc: Mike Kravetz Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 756b66ff025e..c6cccfaf8708 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -903,14 +903,6 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm) atomic_long_sub(l, &mm->hugetlb_usage); } -#ifndef set_huge_swap_pte_at -static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte, unsigned long sz) -{ - set_huge_pte_at(mm, addr, ptep, pte); -} -#endif - #ifndef huge_ptep_modify_prot_start #define huge_ptep_modify_prot_start huge_ptep_modify_prot_start static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, @@ -1094,11 +1086,6 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm) { } -static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte, unsigned long sz) -{ -} - static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { -- cgit v1.2.3 From 1baec203b77cafa24610b5c9ae7a2aa380d74ef6 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 25 Jun 2022 17:28:16 +0800 Subject: mm/khugepaged: try to free transhuge swapcache when possible Transhuge swapcaches won't be freed in __collapse_huge_page_copy(). It's because release_pte_page() is not called for these pages and thus free_page_and_swap_cache can't grab the page lock. These pages won't be freed from swap cache even if we are the only user until next time reclaim. It shouldn't hurt indeed, but we could try to free these pages to save more memory for system. Link: https://lkml.kernel.org/r/20220625092816.4856-8-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Cc: Alistair Popple Cc: Andrea Arcangeli Cc: David Hildenbrand Cc: David Howells Cc: Matthew Wilcox (Oracle) Cc: NeilBrown Cc: Peter Xu Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Yang Shi Cc: Zach O'Keefe Signed-off-by: Andrew Morton --- include/linux/swap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 95a5b7aa1ae9..6d11c51b2b62 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -455,6 +455,7 @@ static inline unsigned long total_swapcache_pages(void) return global_node_page_state(NR_SWAPCACHE); } +extern void free_swap_cache(struct page *page); extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); /* linux/mm/swapfile.c */ @@ -539,6 +540,10 @@ static inline void put_swap_device(struct swap_info_struct *si) /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */ #define free_swap_and_cache(e) is_pfn_swap_entry(e) +static inline void free_swap_cache(struct page *page) +{ +} + static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask) { return 0; -- cgit v1.2.3 From 1fcf54deb767d474181ad7cf33c92bb2a33607fb Mon Sep 17 00:00:00 2001 From: Josh Don Date: Wed, 29 Jun 2022 14:14:26 -0700 Subject: sched/core: add forced idle accounting for cgroups 4feee7d1260 previously added per-task forced idle accounting. This patch extends this to also include cgroups. rstat is used for cgroup accounting, except for the root, which uses kcpustat in order to bypass the need for doing an rstat flush when reading root stats. Only cgroup v2 is supported. Similar to the task accounting, the cgroup accounting requires that schedstats is enabled. Signed-off-by: Josh Don Signed-off-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo Link: https://lkml.kernel.org/r/20220629211426.3329954-1-joshdon@google.com --- include/linux/cgroup-defs.h | 4 ++++ include/linux/kernel_stat.h | 7 +++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1bfcfb1af352..025fd0e84a31 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -287,6 +287,10 @@ struct css_set { struct cgroup_base_stat { struct task_cputime cputime; + +#ifdef CONFIG_SCHED_CORE + u64 forceidle_sum; +#endif }; /* diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 69ae6b278464..ddb5a358fd82 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -28,6 +28,9 @@ enum cpu_usage_stat { CPUTIME_STEAL, CPUTIME_GUEST, CPUTIME_GUEST_NICE, +#ifdef CONFIG_SCHED_CORE + CPUTIME_FORCEIDLE, +#endif NR_STATS, }; @@ -115,4 +118,8 @@ extern void account_process_tick(struct task_struct *, int user); extern void account_idle_ticks(unsigned long ticks); +#ifdef CONFIG_SCHED_CORE +extern void __account_forceidle_time(struct task_struct *tsk, u64 delta); +#endif + #endif /* _LINUX_KERNEL_STAT_H */ -- cgit v1.2.3 From 39bfb3c12d792181de0cf3306be1ea03664a1b05 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Fri, 1 Jul 2022 19:56:06 +0200 Subject: net: phy: broadcom: Add support for BCM53128 internal PHYs Add support for BCM53128 internal PHYs. These support interrupts as well as statistics. Therefore, enable the Broadcom PHY driver for them. Tested on BCM53128 switch using the mainline b53 DSA driver. Signed-off-by: Kurt Kanzenbach Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 747fad264033..6ff567ece34a 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -16,6 +16,7 @@ #define PHY_ID_BCM5481 0x0143bca0 #define PHY_ID_BCM5395 0x0143bcf0 #define PHY_ID_BCM53125 0x03625f20 +#define PHY_ID_BCM53128 0x03625e10 #define PHY_ID_BCM54810 0x03625d00 #define PHY_ID_BCM54811 0x03625cc0 #define PHY_ID_BCM5482 0x0143bcb0 -- cgit v1.2.3 From df76234276e22136b2468825c18407fdfbb2076a Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 25 Jun 2022 13:36:15 +0200 Subject: mfd: bcm2835-pm: Add support for BCM2711 In BCM2711 the new RPiVid ASB took over V3D. The old ASB is still present with the ISP and H264 bits, and V3D is in the same place in the new ASB as the old one. As per the devicetree bindings, BCM2711 will provide both the old and new ASB resources, so get both of them and pass them into 'bcm2835-power,' which will take care of selecting which one to use accordingly. Since the RPiVid ASB's resources were being provided prior to formalizing the bindings[1], also support the old DT files that didn't use 'reg-names.' [1] See: 7dbe8c62ceeb ("ARM: dts: Add minimal Raspberry Pi 4 support") Signed-off-by: Stefan Wahren Reviewed-by: Peter Robinson Acked-by: Florian Fainelli Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220625113619.15944-8-stefan.wahren@i2se.com --- include/linux/mfd/bcm2835-pm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/bcm2835-pm.h b/include/linux/mfd/bcm2835-pm.h index ed37dc40e82a..f70a810c55f7 100644 --- a/include/linux/mfd/bcm2835-pm.h +++ b/include/linux/mfd/bcm2835-pm.h @@ -9,6 +9,7 @@ struct bcm2835_pm { struct device *dev; void __iomem *base; void __iomem *asb; + void __iomem *rpivid_asb; }; #endif /* BCM2835_MFD_PM_H */ -- cgit v1.2.3 From 2fcfa72fc13f0203bb676bd1ec30ec85e17855be Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 3 Jul 2022 17:11:25 +0800 Subject: interconnect: add device managed bulk API Add device managed bulk API to simplify driver. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20220703091132.1412063-4-peng.fan@oss.nxp.com Signed-off-by: Georgi Djakov --- include/linux/interconnect.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h index f685777b875e..2b0e784ba771 100644 --- a/include/linux/interconnect.h +++ b/include/linux/interconnect.h @@ -44,6 +44,7 @@ struct icc_path *icc_get(struct device *dev, const int src_id, const int dst_id); struct icc_path *of_icc_get(struct device *dev, const char *name); struct icc_path *devm_of_icc_get(struct device *dev, const char *name); +int devm_of_icc_bulk_get(struct device *dev, int num_paths, struct icc_bulk_data *paths); struct icc_path *of_icc_get_by_index(struct device *dev, int idx); void icc_put(struct icc_path *path); int icc_enable(struct icc_path *path); @@ -116,6 +117,12 @@ static inline int of_icc_bulk_get(struct device *dev, int num_paths, struct icc_ return 0; } +static inline int devm_of_icc_bulk_get(struct device *dev, int num_paths, + struct icc_bulk_data *paths) +{ + return 0; +} + static inline void icc_bulk_put(int num_paths, struct icc_bulk_data *paths) { } -- cgit v1.2.3 From 7097f29819bb70374dfae9f705e548a720f16f94 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 4 Jul 2022 11:19:31 +0100 Subject: firmware: arm_scmi: Add SCMI v3.1 System Power extensions Add support for SCMIv3.1 System Power optional timeout field while dispatching SYSTEM_POWER_STATE_NOTIFIER notification. Link: https://lore.kernel.org/r/20220704101933.2981635-3-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 704111f63993..311aa3c73aac 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -781,8 +781,10 @@ struct scmi_clock_rate_notif_report { struct scmi_system_power_state_notifier_report { ktime_t timestamp; unsigned int agent_id; +#define SCMI_SYSPOWER_IS_REQUEST_GRACEFUL(flags) ((flags) & BIT(0)) unsigned int flags; unsigned int system_state; + unsigned int timeout; }; struct scmi_perf_limits_report { -- cgit v1.2.3 From d91079995fa62720e11a39c08b932f2f9a8cbfae Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 4 Jul 2022 11:19:32 +0100 Subject: firmware: arm_scmi: Add devm_protocol_acquire helper Add a method to get hold of a protocol, causing it to be initialized and its resource accounting updated, without getting access to its operations and handle. Some protocols, like SCMI SystemPower, do not expose any protocol ops to the kernel OSPM agent but still need to be at least initialized. This helper avoids the need to invoke a full devm_get_protocol() only to get the protocol initialized while throwing away unused the protocol ops and handle. Link: https://lore.kernel.org/r/20220704101933.2981635-4-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 311aa3c73aac..898488c7f185 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -624,6 +624,9 @@ struct scmi_notify_ops { * * @dev: pointer to the SCMI device * @version: pointer to the structure containing SCMI version information + * @devm_protocol_acquire: devres managed method to get hold of a protocol, + * causing its initialization and related resource + * accounting * @devm_protocol_get: devres managed method to acquire a protocol and get specific * operations and a dedicated protocol handler * @devm_protocol_put: devres managed method to release a protocol @@ -642,6 +645,8 @@ struct scmi_handle { struct device *dev; struct scmi_revision_info *version; + int __must_check (*devm_protocol_acquire)(struct scmi_device *sdev, + u8 proto); const void __must_check * (*devm_protocol_get)(struct scmi_device *sdev, u8 proto, struct scmi_protocol_handle **ph); -- cgit v1.2.3 From 0316f99c4780b0a5fd60b7f136c64cb1af8d5fc3 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 4 Jul 2022 11:22:36 +0100 Subject: firmware: arm_scmi: Add SCMI v3.1 powercap protocol basic support Add support for SCMI v3.1 powercap protocol, with the exception of powercap fast channels, exposing all the new related powercap protocol operations as usual in include/linux/scmi_protocol.h. Link: https://lore.kernel.org/r/20220704102241.2988447-3-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 125 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 898488c7f185..95023dc8b3a3 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -560,6 +560,114 @@ struct scmi_voltage_proto_ops { s32 *volt_uV); }; +/** + * struct scmi_powercap_info - Describe one available Powercap domain + * + * @id: Domain ID as advertised by the platform. + * @notify_powercap_cap_change: CAP change notification support. + * @notify_powercap_measurement_change: MEASUREMENTS change notifications + * support. + * @async_powercap_cap_set: Asynchronous CAP set support. + * @powercap_cap_config: CAP configuration support. + * @powercap_monitoring: Monitoring (measurements) support. + * @powercap_pai_config: PAI configuration support. + * @powercap_scale_mw: Domain reports power data in milliwatt units. + * @powercap_scale_uw: Domain reports power data in microwatt units. + * Note that, when both @powercap_scale_mw and + * @powercap_scale_uw are set to false, the domain + * reports power data on an abstract linear scale. + * @name: name assigned to the Powercap Domain by platform. + * @min_pai: Minimum configurable PAI. + * @max_pai: Maximum configurable PAI. + * @pai_step: Step size between two consecutive PAI values. + * @min_power_cap: Minimum configurable CAP. + * @max_power_cap: Maximum configurable CAP. + * @power_cap_step: Step size between two consecutive CAP values. + * @sustainable_power: Maximum sustainable power consumption for this domain + * under normal conditions. + * @accuracy: The accuracy with which the power is measured and reported in + * integral multiples of 0.001 percent. + * @parent_id: Identifier of the containing parent power capping domain, or the + * value 0xFFFFFFFF if this powercap domain is a root domain not + * contained in any other domain. + */ +struct scmi_powercap_info { + unsigned int id; + bool notify_powercap_cap_change; + bool notify_powercap_measurement_change; + bool async_powercap_cap_set; + bool powercap_cap_config; + bool powercap_monitoring; + bool powercap_pai_config; + bool powercap_scale_mw; + bool powercap_scale_uw; + char name[SCMI_MAX_STR_SIZE]; + unsigned int min_pai; + unsigned int max_pai; + unsigned int pai_step; + unsigned int min_power_cap; + unsigned int max_power_cap; + unsigned int power_cap_step; + unsigned int sustainable_power; + unsigned int accuracy; +#define SCMI_POWERCAP_ROOT_ZONE_ID 0xFFFFFFFFUL + unsigned int parent_id; +}; + +/** + * struct scmi_powercap_proto_ops - represents the various operations provided + * by SCMI Powercap Protocol + * + * @num_domains_get: get the count of powercap domains provided by SCMI. + * @info_get: get the information for the specified domain. + * @cap_get: get the current CAP value for the specified domain. + * @cap_set: set the CAP value for the specified domain to the provided value; + * if the domain supports setting the CAP with an asynchronous command + * this request will finally trigger an asynchronous transfer, but, if + * @ignore_dresp here is set to true, this call will anyway return + * immediately without waiting for the related delayed response. + * @pai_get: get the current PAI value for the specified domain. + * @pai_set: set the PAI value for the specified domain to the provided value. + * @measurements_get: retrieve the current average power measurements for the + * specified domain and the related PAI upon which is + * calculated. + * @measurements_threshold_set: set the desired low and high power thresholds + * to be used when registering for notification + * of type POWERCAP_MEASUREMENTS_NOTIFY with this + * powercap domain. + * Note that this must be called at least once + * before registering any callback with the usual + * @scmi_notify_ops; moreover, in case this method + * is called with measurement notifications already + * enabled it will also trigger, transparently, a + * proper update of the power thresholds configured + * in the SCMI backend server. + * @measurements_threshold_get: get the currently configured low and high power + * thresholds used when registering callbacks for + * notification POWERCAP_MEASUREMENTS_NOTIFY. + */ +struct scmi_powercap_proto_ops { + int (*num_domains_get)(const struct scmi_protocol_handle *ph); + const struct scmi_powercap_info __must_check *(*info_get) + (const struct scmi_protocol_handle *ph, u32 domain_id); + int (*cap_get)(const struct scmi_protocol_handle *ph, u32 domain_id, + u32 *power_cap); + int (*cap_set)(const struct scmi_protocol_handle *ph, u32 domain_id, + u32 power_cap, bool ignore_dresp); + int (*pai_get)(const struct scmi_protocol_handle *ph, u32 domain_id, + u32 *pai); + int (*pai_set)(const struct scmi_protocol_handle *ph, u32 domain_id, + u32 pai); + int (*measurements_get)(const struct scmi_protocol_handle *ph, + u32 domain_id, u32 *average_power, u32 *pai); + int (*measurements_threshold_set)(const struct scmi_protocol_handle *ph, + u32 domain_id, u32 power_thresh_low, + u32 power_thresh_high); + int (*measurements_threshold_get)(const struct scmi_protocol_handle *ph, + u32 domain_id, u32 *power_thresh_low, + u32 *power_thresh_high); +}; + /** * struct scmi_notify_ops - represents notifications' operations provided by * SCMI core @@ -666,6 +774,7 @@ enum scmi_std_protocol { SCMI_PROTOCOL_SENSOR = 0x15, SCMI_PROTOCOL_RESET = 0x16, SCMI_PROTOCOL_VOLTAGE = 0x17, + SCMI_PROTOCOL_POWERCAP = 0x18, }; enum scmi_system_events { @@ -767,6 +876,8 @@ enum scmi_notification_events { SCMI_EVENT_RESET_ISSUED = 0x0, SCMI_EVENT_BASE_ERROR_EVENT = 0x0, SCMI_EVENT_SYSTEM_POWER_STATE_NOTIFIER = 0x0, + SCMI_EVENT_POWERCAP_CAP_CHANGED = 0x0, + SCMI_EVENT_POWERCAP_MEASUREMENTS_CHANGED = 0x1, }; struct scmi_power_state_changed_report { @@ -837,4 +948,18 @@ struct scmi_base_error_report { unsigned long long reports[]; }; +struct scmi_powercap_cap_changed_report { + ktime_t timestamp; + unsigned int agent_id; + unsigned int domain_id; + unsigned int power_cap; + unsigned int pai; +}; + +struct scmi_powercap_meas_changed_report { + ktime_t timestamp; + unsigned int agent_id; + unsigned int domain_id; + unsigned int power; +}; #endif /* _LINUX_SCMI_PROTOCOL_H */ -- cgit v1.2.3 From 855aa26e5f56d415b71d3f8d86ef0cc51b2166a3 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 4 Jul 2022 11:22:38 +0100 Subject: firmware: arm_scmi: Add SCMI v3.1 powercap fast channels support Add SCMIv3.1 powercap protocol fast channel support using common helpers provided by the SCMI core with scmi_proto_helpers_ops operations. Link: https://lore.kernel.org/r/20220704102241.2988447-5-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 95023dc8b3a3..7f4f9df1b20f 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -601,6 +601,7 @@ struct scmi_powercap_info { bool powercap_pai_config; bool powercap_scale_mw; bool powercap_scale_uw; + bool fastchannels; char name[SCMI_MAX_STR_SIZE]; unsigned int min_pai; unsigned int max_pai; @@ -612,6 +613,7 @@ struct scmi_powercap_info { unsigned int accuracy; #define SCMI_POWERCAP_ROOT_ZONE_ID 0xFFFFFFFFUL unsigned int parent_id; + struct scmi_fc_info *fc_info; }; /** -- cgit v1.2.3 From cc1cfc47ea47187a21ec1f079b3c53264157fe15 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:49 +0100 Subject: cacheinfo: Add support to check if last level cache(LLC) is valid or shared It is useful to have helper to check if the given two CPUs share last level cache. We can do that check by comparing fw_token or by comparing the cache ID. Currently we check just for fw_token as the cache ID is optional. This helper can be used to build the llc_sibling during arch specific topology parsing and feeding information to the sched_domains. This also helps to get rid of llc_id in the CPU topology as it is sort of duplicate information. Also add helper to check if the llc information in cacheinfo is valid or not. Link: https://lore.kernel.org/r/20220704101605.1318280-6-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Gavin Shan Signed-off-by: Sudeep Holla --- include/linux/cacheinfo.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 4ff37cb763ae..7e429bc5c1a4 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -82,6 +82,8 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu); int init_cache_level(unsigned int cpu); int populate_cache_leaves(unsigned int cpu); int cache_setup_acpi(unsigned int cpu); +bool last_level_cache_is_valid(unsigned int cpu); +bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y); #ifndef CONFIG_ACPI_PPTT /* * acpi_find_last_cache_level is only called on ACPI enabled -- cgit v1.2.3 From 36bbc5b4ffab33ccac0f4db27f619a6ba7a4fd32 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:50 +0100 Subject: cacheinfo: Allow early detection and population of cache attributes Some architecture/platforms may need to setup cache properties very early in the boot along with other cpu topologies so that all these information can be used to build sched_domains which is used by the scheduler. Allow detect_cache_attributes to be called quite early during the boot. Link: https://lore.kernel.org/r/20220704101605.1318280-7-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Gavin Shan Signed-off-by: Sudeep Holla --- include/linux/cacheinfo.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 7e429bc5c1a4..00b7a6ae8617 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -84,6 +84,7 @@ int populate_cache_leaves(unsigned int cpu); int cache_setup_acpi(unsigned int cpu); bool last_level_cache_is_valid(unsigned int cpu); bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y); +int detect_cache_attributes(unsigned int cpu); #ifndef CONFIG_ACPI_PPTT /* * acpi_find_last_cache_level is only called on ACPI enabled -- cgit v1.2.3 From 5b8dc787ce4a45c87254d1b0b22f161347ab7f81 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:15:56 +0100 Subject: arch_topology: Drop LLC identifier stash from the CPU topology Since the cacheinfo LLC information is used directly in arch_topology, there is no need to parse and store the LLC ID information only for ACPI systems in the CPU topology. Remove the redundant LLC ID from the generic CPU arch_topology information. Link: https://lore.kernel.org/r/20220704101605.1318280-13-sudeep.holla@arm.com Tested-by: Ionela Voinescu Tested-by: Conor Dooley Reviewed-by: Gavin Shan Signed-off-by: Sudeep Holla --- include/linux/arch_topology.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 58cbe18d825c..a07b510e7dc5 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -68,7 +68,6 @@ struct cpu_topology { int core_id; int cluster_id; int package_id; - int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; cpumask_t cluster_sibling; -- cgit v1.2.3 From 7128af87c7f1c30cd6cebe0b012cc25872c689e2 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Jul 2022 11:16:05 +0100 Subject: ACPI: Remove the unused find_acpi_cpu_cache_topology() The sole user of this find_acpi_cpu_cache_topology() was arm64 topology which is now consolidated into the generic arch_topology without the need of this function. Drop the unused function find_acpi_cpu_cache_topology(). Link: https://lore.kernel.org/r/20220704101605.1318280-22-sudeep.holla@arm.com Cc: Rafael J. Wysocki Reported-by: Ionela Voinescu Tested-by: Conor Dooley Acked-by: Rafael J. Wysocki Signed-off-by: Sudeep Holla --- include/linux/acpi.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4f82a5bc6d98..7b96a8bff6d2 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1429,7 +1429,6 @@ int find_acpi_cpu_topology(unsigned int cpu, int level); int find_acpi_cpu_topology_cluster(unsigned int cpu); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); -int find_acpi_cpu_cache_topology(unsigned int cpu, int level); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { @@ -1451,10 +1450,6 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) { return -EINVAL; } -static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level) -{ - return -EINVAL; -} #endif #ifdef CONFIG_ACPI_PCC -- cgit v1.2.3 From 50d6281ce9b8412f7ef02d1bc9d23aa62ae0cf98 Mon Sep 17 00:00:00 2001 From: Ren Zhijie Date: Thu, 30 Jun 2022 20:35:28 +0800 Subject: dma-mapping: Fix build error unused-value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_DMA_DECLARE_COHERENT is not set, make ARCH=x86_64 CROSS_COMPILE=x86_64-linux-gnu- will be failed, like this: drivers/remoteproc/remoteproc_core.c: In function ‘rproc_rvdev_release’: ./include/linux/dma-map-ops.h:182:42: error: statement with no effect [-Werror=unused-value] #define dma_release_coherent_memory(dev) (0) ^ drivers/remoteproc/remoteproc_core.c:464:2: note: in expansion of macro ‘dma_release_coherent_memory’ dma_release_coherent_memory(dev); ^~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors The return type of function dma_release_coherent_memory in CONFIG_DMA_DECLARE_COHERENT area is void, so in !CONFIG_DMA_DECLARE_COHERENT area it should neither return any value nor be defined as zero. Reported-by: Hulk Robot Fixes: e61c451476e6 ("dma-mapping: Add dma_release_coherent_memory to DMA API") Signed-off-by: Ren Zhijie Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220630123528.251181-1-renzhijie2@huawei.com Signed-off-by: Mathieu Poirier --- include/linux/dma-map-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 53db9655efe9..bfffe494356a 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -179,10 +179,10 @@ static inline int dma_declare_coherent_memory(struct device *dev, return -ENOSYS; } -#define dma_release_coherent_memory(dev) (0) #define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0) #define dma_release_from_dev_coherent(dev, order, vaddr) (0) #define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0) +static inline void dma_release_coherent_memory(struct device *dev) { } #endif /* CONFIG_DMA_DECLARE_COHERENT */ #ifdef CONFIG_DMA_GLOBAL_POOL -- cgit v1.2.3 From 3dcb861dbc6ab101838a1548b1efddd00ca3c3ec Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 30 Jun 2022 11:40:59 +0200 Subject: ACPI: VIOT: Fix ACS setup Currently acpi_viot_init() gets called after the pci device has been scanned and pci_enable_acs() has been called. So pci_request_acs() fails to be taken into account leading to wrong single iommu group topologies when dealing with multi-function root ports for instance. We cannot simply move the acpi_viot_init() earlier, similarly as the IORT init because the VIOT parsing relies on the pci scan. However we can detect VIOT is present earlier and in such a case, request ACS. Introduce a new acpi_viot_early_init() routine that allows to call pci_request_acs() before the scan. While at it, guard the call to pci_request_acs() with #ifdef CONFIG_PCI. Fixes: 3cf485540e7b ("ACPI: Add driver for the VIOT table") Signed-off-by: Eric Auger Reported-by: Jin Liu Reviewed-by: Jean-Philippe Brucker Tested-by: Jean-Philippe Brucker Signed-off-by: Rafael J. Wysocki --- include/linux/acpi_viot.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi_viot.h b/include/linux/acpi_viot.h index 1eb8ee5b0e5f..a5a122431563 100644 --- a/include/linux/acpi_viot.h +++ b/include/linux/acpi_viot.h @@ -6,9 +6,11 @@ #include #ifdef CONFIG_ACPI_VIOT +void __init acpi_viot_early_init(void); void __init acpi_viot_init(void); int viot_iommu_configure(struct device *dev); #else +static inline void acpi_viot_early_init(void) {} static inline void acpi_viot_init(void) {} static inline int viot_iommu_configure(struct device *dev) { -- cgit v1.2.3 From 09d3154a6f0f0bb5b604832095804780f3684b96 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 6 Jun 2022 22:51:58 -0500 Subject: PM: wakeup: Unify device_init_wakeup() for PM_SLEEP and !PM_SLEEP Previously the CONFIG_PM_SLEEP and !CONFIG_PM_SLEEP device_init_wakeup() implementations differed in confusing ways: - The PM_SLEEP version checked for a NULL device pointer and returned -EINVAL, while the !PM_SLEEP version did not and would simply dereference a NULL pointer. - When called with "false", the !PM_SLEEP version cleared "capable" and "enable" in the opposite order of the PM_SLEEP version. That was harmless because for !PM_SLEEP they're simple assignments, but it's unnecessary confusion. Use a simplified version of the PM_SLEEP implementation for both cases. Signed-off-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- include/linux/pm_wakeup.h | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 196a157456aa..77f4849e3418 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -109,7 +109,6 @@ extern struct wakeup_source *wakeup_sources_walk_next(struct wakeup_source *ws); extern int device_wakeup_enable(struct device *dev); extern int device_wakeup_disable(struct device *dev); extern void device_set_wakeup_capable(struct device *dev, bool capable); -extern int device_init_wakeup(struct device *dev, bool val); extern int device_set_wakeup_enable(struct device *dev, bool enable); extern void __pm_stay_awake(struct wakeup_source *ws); extern void pm_stay_awake(struct device *dev); @@ -167,13 +166,6 @@ static inline int device_set_wakeup_enable(struct device *dev, bool enable) return 0; } -static inline int device_init_wakeup(struct device *dev, bool val) -{ - device_set_wakeup_capable(dev, val); - device_set_wakeup_enable(dev, val); - return 0; -} - static inline bool device_may_wakeup(struct device *dev) { return dev->power.can_wakeup && dev->power.should_wakeup; @@ -217,4 +209,27 @@ static inline void pm_wakeup_hard_event(struct device *dev) return pm_wakeup_dev_event(dev, 0, true); } +/** + * device_init_wakeup - Device wakeup initialization. + * @dev: Device to handle. + * @enable: Whether or not to enable @dev as a wakeup device. + * + * By default, most devices should leave wakeup disabled. The exceptions are + * devices that everyone expects to be wakeup sources: keyboards, power buttons, + * possibly network interfaces, etc. Also, devices that don't generate their + * own wakeup requests but merely forward requests from one bus to another + * (like PCI bridges) should have wakeup enabled by default. + */ +static inline int device_init_wakeup(struct device *dev, bool enable) +{ + if (enable) { + device_set_wakeup_capable(dev, true); + return device_wakeup_enable(dev); + } else { + device_wakeup_disable(dev); + device_set_wakeup_capable(dev, false); + return 0; + } +} + #endif /* _LINUX_PM_WAKEUP_H */ -- cgit v1.2.3 From e67198cc05b8ecbb7b8e2d8ef9fb5c8d26821873 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:25 +0200 Subject: context_tracking: Take idle eqs entrypoints over RCU The RCU dynticks counter is going to be merged into the context tracking subsystem. Start with moving the idle extended quiescent states entrypoints to context tracking. For now those are dumb redirections to existing RCU calls. [ paulmck: Apply kernel test robot feedback. ] Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/context_tracking.h | 8 ++++++++ include/linux/rcupdate.h | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index e35ae66b4794..01abadb2f993 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -119,4 +119,12 @@ extern void context_tracking_init(void); static inline void context_tracking_init(void) { } #endif /* CONFIG_CONTEXT_TRACKING_USER_FORCE */ +#ifdef CONFIG_CONTEXT_TRACKING_IDLE +extern void ct_idle_enter(void); +extern void ct_idle_exit(void); +#else +static inline void ct_idle_enter(void) { } +static inline void ct_idle_exit(void) { } +#endif /* !CONFIG_CONTEXT_TRACKING_IDLE */ + #endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 1a32036c918c..6ebe754501c3 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -128,7 +128,7 @@ static inline void rcu_nocb_flush_deferred_wakeup(void) { } * @a: Code that RCU needs to pay attention to. * * RCU read-side critical sections are forbidden in the inner idle loop, - * that is, between the rcu_idle_enter() and the rcu_idle_exit() -- RCU + * that is, between the ct_idle_enter() and the ct_idle_exit() -- RCU * will happily ignore any such read-side critical sections. However, * things like powertop need tracepoints in the inner idle loop. * -- cgit v1.2.3 From 6f0e6c1598b1a3d19fc30db86b6e26d6f881b43d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:26 +0200 Subject: context_tracking: Take IRQ eqs entrypoints over RCU The RCU dynticks counter is going to be merged into the context tracking subsystem. Prepare with moving the IRQ extended quiescent states entrypoints to context tracking. For now those are dumb redirection to existing RCU calls. [ paulmck: Apply Stephen Rothwell feedback from -next. ] [ paulmck: Apply Nathan Chancellor feedback. ] Acked-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/context_tracking_irq.h | 17 +++++++++++++++++ include/linux/context_tracking_state.h | 1 + include/linux/entry-common.h | 10 +++++----- include/linux/rcupdate.h | 5 +++-- include/linux/tracepoint.h | 4 ++-- 5 files changed, 28 insertions(+), 9 deletions(-) create mode 100644 include/linux/context_tracking_irq.h (limited to 'include/linux') diff --git a/include/linux/context_tracking_irq.h b/include/linux/context_tracking_irq.h new file mode 100644 index 000000000000..62f62bbd1a50 --- /dev/null +++ b/include/linux/context_tracking_irq.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CONTEXT_TRACKING_IRQ_H +#define _LINUX_CONTEXT_TRACKING_IRQ_H + +#ifdef CONFIG_CONTEXT_TRACKING_IDLE +void ct_irq_enter(void); +void ct_irq_exit(void); +void ct_irq_enter_irqson(void); +void ct_irq_exit_irqson(void); +#else +static inline void ct_irq_enter(void) { } +static inline void ct_irq_exit(void) { } +static inline void ct_irq_enter_irqson(void) { } +static inline void ct_irq_exit_irqson(void) { } +#endif + +#endif diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 2b46afe105a9..9c16a8b2c194 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -4,6 +4,7 @@ #include #include +#include struct context_tracking { /* diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index c92ac75d6556..84a466b176cf 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -357,7 +357,7 @@ void irqentry_exit_to_user_mode(struct pt_regs *regs); /** * struct irqentry_state - Opaque object for exception state storage * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the - * exit path has to invoke rcu_irq_exit(). + * exit path has to invoke ct_irq_exit(). * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that * lockdep state is restored correctly on exit from nmi. * @@ -395,12 +395,12 @@ typedef struct irqentry_state { * * For kernel mode entries RCU handling is done conditional. If RCU is * watching then the only RCU requirement is to check whether the tick has - * to be restarted. If RCU is not watching then rcu_irq_enter() has to be - * invoked on entry and rcu_irq_exit() on exit. + * to be restarted. If RCU is not watching then ct_irq_enter() has to be + * invoked on entry and ct_irq_exit() on exit. * - * Avoiding the rcu_irq_enter/exit() calls is an optimization but also + * Avoiding the ct_irq_enter/exit() calls is an optimization but also * solves the problem of kernel mode pagefaults which can schedule, which - * is not possible after invoking rcu_irq_enter() without undoing it. + * is not possible after invoking ct_irq_enter() without undoing it. * * For user mode entries irqentry_enter_from_user_mode() is invoked to * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6ebe754501c3..f1562d91c67d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -29,6 +29,7 @@ #include #include #include +#include #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) @@ -143,9 +144,9 @@ static inline void rcu_nocb_flush_deferred_wakeup(void) { } */ #define RCU_NONIDLE(a) \ do { \ - rcu_irq_enter_irqson(); \ + ct_irq_enter_irqson(); \ do { a; } while (0); \ - rcu_irq_exit_irqson(); \ + ct_irq_exit_irqson(); \ } while (0) /* diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 28031b15f878..55717a2eda08 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -200,13 +200,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) */ \ if (rcuidle) { \ __idx = srcu_read_lock_notrace(&tracepoint_srcu);\ - rcu_irq_enter_irqson(); \ + ct_irq_enter_irqson(); \ } \ \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ \ if (rcuidle) { \ - rcu_irq_exit_irqson(); \ + ct_irq_exit_irqson(); \ srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\ } \ \ -- cgit v1.2.3 From 493c1822825f00025d6754ec0632990a27edc6f8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:27 +0200 Subject: context_tracking: Take NMI eqs entrypoints over RCU The RCU dynticks counter is going to be merged into the context tracking subsystem. Prepare with moving the NMI extended quiescent states entrypoints to context tracking. For now those are dumb redirection to existing RCU calls. Acked-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/context_tracking_irq.h | 4 ++++ include/linux/hardirq.h | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking_irq.h b/include/linux/context_tracking_irq.h index 62f62bbd1a50..c50b5670c4a5 100644 --- a/include/linux/context_tracking_irq.h +++ b/include/linux/context_tracking_irq.h @@ -7,11 +7,15 @@ void ct_irq_enter(void); void ct_irq_exit(void); void ct_irq_enter_irqson(void); void ct_irq_exit_irqson(void); +void ct_nmi_enter(void); +void ct_nmi_exit(void); #else static inline void ct_irq_enter(void) { } static inline void ct_irq_exit(void) { } static inline void ct_irq_enter_irqson(void) { } static inline void ct_irq_exit_irqson(void) { } +static inline void ct_nmi_enter(void) { } +static inline void ct_nmi_exit(void) { } #endif #endif diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 76878b357ffa..345cdbe9c1b7 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -124,7 +124,7 @@ extern void rcu_nmi_exit(void); do { \ __nmi_enter(); \ lockdep_hardirq_enter(); \ - rcu_nmi_enter(); \ + ct_nmi_enter(); \ instrumentation_begin(); \ ftrace_nmi_enter(); \ instrumentation_end(); \ @@ -143,7 +143,7 @@ extern void rcu_nmi_exit(void); instrumentation_begin(); \ ftrace_nmi_exit(); \ instrumentation_end(); \ - rcu_nmi_exit(); \ + ct_nmi_exit(); \ lockdep_hardirq_exit(); \ __nmi_exit(); \ } while (0) -- cgit v1.2.3 From 3864caafe7c66f01b188ffccb6a4215f3bf56292 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:28 +0200 Subject: rcu/context-tracking: Remove rcu_irq_enter/exit() Now rcu_irq_enter/exit() is an unnecessary middle call between ct_irq_enter/exit() and nmi_irq_enter/exit(). Take this opportunity to remove the former functions and move the comments above them to the new entrypoints. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/rcutiny.h | 4 ---- include/linux/rcutree.h | 4 ---- 2 files changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 5fed476f977f..591119413cf1 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -78,10 +78,6 @@ static inline void rcu_cpu_stall_reset(void) { } static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; } static inline void rcu_idle_enter(void) { } static inline void rcu_idle_exit(void) { } -static inline void rcu_irq_enter(void) { } -static inline void rcu_irq_exit_irqson(void) { } -static inline void rcu_irq_enter_irqson(void) { } -static inline void rcu_irq_exit(void) { } static inline void rcu_irq_exit_check_preempt(void) { } #define rcu_is_idle_cpu(cpu) \ (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq()) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 9c6cfb742504..4522b6a7cc42 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -47,10 +47,6 @@ void cond_synchronize_rcu(unsigned long oldstate); void rcu_idle_enter(void); void rcu_idle_exit(void); -void rcu_irq_enter(void); -void rcu_irq_exit(void); -void rcu_irq_enter_irqson(void); -void rcu_irq_exit_irqson(void); bool rcu_is_idle_cpu(int cpu); #ifdef CONFIG_PROVE_RCU -- cgit v1.2.3 From 62e2412df4b90ae6706ce1f1a9649b789b2e44ef Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:29 +0200 Subject: rcu/context_tracking: Move dynticks counter to context tracking In order to prepare for merging RCU dynticks counter into the context tracking state, move the rcu_data's dynticks field to the context tracking structure. It will later be mixed within the context tracking state itself. [ paulmck: Move enum ctx_state into global scope. ] Acked-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/context_tracking_state.h | 45 ++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 9c16a8b2c194..5a8da2787287 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -6,7 +6,15 @@ #include #include +enum ctx_state { + CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */ + CONTEXT_KERNEL = 0, + CONTEXT_USER, + CONTEXT_GUEST, +}; + struct context_tracking { +#ifdef CONFIG_CONTEXT_TRACKING_USER /* * When active is false, probes are unset in order * to minimize overhead: TIF flags are cleared @@ -15,17 +23,40 @@ struct context_tracking { */ bool active; int recursion; - enum ctx_state { - CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */ - CONTEXT_KERNEL = 0, - CONTEXT_USER, - CONTEXT_GUEST, - } state; + enum ctx_state state; +#endif +#ifdef CONFIG_CONTEXT_TRACKING_IDLE + atomic_t dynticks; /* Even value for idle, else odd. */ +#endif }; +#ifdef CONFIG_CONTEXT_TRACKING +DECLARE_PER_CPU(struct context_tracking, context_tracking); +#endif + +#ifdef CONFIG_CONTEXT_TRACKING_IDLE +static __always_inline int ct_dynticks(void) +{ + return atomic_read(this_cpu_ptr(&context_tracking.dynticks)); +} + +static __always_inline int ct_dynticks_cpu(int cpu) +{ + struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); + + return atomic_read(&ct->dynticks); +} + +static __always_inline int ct_dynticks_cpu_acquire(int cpu) +{ + struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); + + return atomic_read_acquire(&ct->dynticks); +} +#endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */ + #ifdef CONFIG_CONTEXT_TRACKING_USER extern struct static_key_false context_tracking_key; -DECLARE_PER_CPU(struct context_tracking, context_tracking); static __always_inline bool context_tracking_enabled(void) { -- cgit v1.2.3 From 904e600e60f46f92eb4bcfb95788b1fedf7e8237 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:30 +0200 Subject: rcu/context_tracking: Move dynticks_nesting to context tracking The RCU eqs tracking is going to be performed by the context tracking subsystem. The related nesting counters thus need to be moved to the context tracking structure. Acked-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/context_tracking_state.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 5a8da2787287..13a4a9d1ec7e 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -27,6 +27,7 @@ struct context_tracking { #endif #ifdef CONFIG_CONTEXT_TRACKING_IDLE atomic_t dynticks; /* Even value for idle, else odd. */ + long dynticks_nesting; /* Track process nesting level. */ #endif }; @@ -53,6 +54,18 @@ static __always_inline int ct_dynticks_cpu_acquire(int cpu) return atomic_read_acquire(&ct->dynticks); } + +static __always_inline long ct_dynticks_nesting(void) +{ + return __this_cpu_read(context_tracking.dynticks_nesting); +} + +static __always_inline long ct_dynticks_nesting_cpu(int cpu) +{ + struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); + + return ct->dynticks_nesting; +} #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */ #ifdef CONFIG_CONTEXT_TRACKING_USER -- cgit v1.2.3 From 95e04f48ec0a634e2f221081f5fa1a904755f326 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:31 +0200 Subject: rcu/context_tracking: Move dynticks_nmi_nesting to context tracking The RCU eqs tracking is going to be performed by the context tracking subsystem. The related nesting counters thus need to be moved to the context tracking structure. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/context_tracking_state.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 13a4a9d1ec7e..5f11e3d2d85a 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -13,6 +13,9 @@ enum ctx_state { CONTEXT_GUEST, }; +/* Offset to allow distinguishing irq vs. task-based idle entry/exit. */ +#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) + struct context_tracking { #ifdef CONFIG_CONTEXT_TRACKING_USER /* @@ -28,6 +31,7 @@ struct context_tracking { #ifdef CONFIG_CONTEXT_TRACKING_IDLE atomic_t dynticks; /* Even value for idle, else odd. */ long dynticks_nesting; /* Track process nesting level. */ + long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */ #endif }; @@ -66,6 +70,18 @@ static __always_inline long ct_dynticks_nesting_cpu(int cpu) return ct->dynticks_nesting; } + +static __always_inline long ct_dynticks_nmi_nesting(void) +{ + return __this_cpu_read(context_tracking.dynticks_nmi_nesting); +} + +static __always_inline long ct_dynticks_nmi_nesting_cpu(int cpu) +{ + struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); + + return ct->dynticks_nmi_nesting; +} #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */ #ifdef CONFIG_CONTEXT_TRACKING_USER -- cgit v1.2.3 From 564506495ca96a6e66d077d3d5b9f02d4b9b0f45 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:32 +0200 Subject: rcu/context-tracking: Move deferred nocb resched to context tracking To prepare for migrating the RCU eqs accounting code to context tracking, split the last-resort deferred nocb resched from rcu_user_enter() and move it into a separate call from context tracking. Acked-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/rcupdate.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f1562d91c67d..3717cad983a6 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -112,6 +112,12 @@ static inline void rcu_user_enter(void) { } static inline void rcu_user_exit(void) { } #endif /* CONFIG_NO_HZ_FULL */ +#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) +void rcu_irq_work_resched(void); +#else +static inline void rcu_irq_work_resched(void) { } +#endif + #ifdef CONFIG_RCU_NOCB_CPU void rcu_init_nohz(void); int rcu_nocb_cpu_offload(int cpu); -- cgit v1.2.3 From 172114552701b85d5c3b1a089a73ee85d0d7786b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:33 +0200 Subject: rcu/context-tracking: Move RCU-dynticks internal functions to context_tracking Move the core RCU eqs/dynticks functions to context tracking so that we can later merge all that code within context tracking. Acked-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/context_tracking.h | 20 ++++++++++++++++++++ include/linux/rcutree.h | 3 +++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 01abadb2f993..1f568676bc1d 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -122,6 +122,26 @@ static inline void context_tracking_init(void) { } #ifdef CONFIG_CONTEXT_TRACKING_IDLE extern void ct_idle_enter(void); extern void ct_idle_exit(void); + +/* + * Is the current CPU in an extended quiescent state? + * + * No ordering, as we are sampling CPU-local information. + */ +static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) +{ + return !(arch_atomic_read(this_cpu_ptr(&context_tracking.dynticks)) & 0x1); +} + +/* + * Increment the current CPU's context_tracking structure's ->dynticks field + * with ordering. Return the new value. + */ +static __always_inline unsigned long rcu_dynticks_inc(int incby) +{ + return arch_atomic_add_return(incby, this_cpu_ptr(&context_tracking.dynticks)); +} + #else static inline void ct_idle_enter(void) { } static inline void ct_idle_exit(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 4522b6a7cc42..24db1e41695c 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -55,6 +55,9 @@ void rcu_irq_exit_check_preempt(void); static inline void rcu_irq_exit_check_preempt(void) { } #endif +struct task_struct; +void rcu_preempt_deferred_qs(struct task_struct *t); + void exit_rcu(void); void rcu_scheduler_starting(void); -- cgit v1.2.3 From c33ef43a359001415032665dfcd433979c462b71 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:34 +0200 Subject: rcu/context-tracking: Remove unused and/or unecessary middle functions Some eqs functions are now only used internally by context tracking, so their public declarations can be removed. Also middle functions such as rcu_user_*() and rcu_idle_*() which now directly call to rcu_eqs_enter() and rcu_eqs_exit() can be wiped out as well. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/hardirq.h | 8 -------- include/linux/rcupdate.h | 8 -------- include/linux/rcutiny.h | 2 -- include/linux/rcutree.h | 2 -- 4 files changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 345cdbe9c1b7..d57cab4d4c06 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -92,14 +92,6 @@ void irq_exit_rcu(void); #define arch_nmi_exit() do { } while (0) #endif -#ifdef CONFIG_TINY_RCU -static inline void rcu_nmi_enter(void) { } -static inline void rcu_nmi_exit(void) { } -#else -extern void rcu_nmi_enter(void); -extern void rcu_nmi_exit(void); -#endif - /* * NMI vs Tracing * -------------- diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3717cad983a6..434da1eb88cd 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -104,14 +104,6 @@ static inline void rcu_sysrq_start(void) { } static inline void rcu_sysrq_end(void) { } #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ -#ifdef CONFIG_NO_HZ_FULL -void rcu_user_enter(void); -void rcu_user_exit(void); -#else -static inline void rcu_user_enter(void) { } -static inline void rcu_user_exit(void) { } -#endif /* CONFIG_NO_HZ_FULL */ - #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) void rcu_irq_work_resched(void); #else diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 591119413cf1..900ba35c3582 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -76,8 +76,6 @@ static inline int rcu_needs_cpu(void) static inline void rcu_virt_note_context_switch(int cpu) { } static inline void rcu_cpu_stall_reset(void) { } static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; } -static inline void rcu_idle_enter(void) { } -static inline void rcu_idle_exit(void) { } static inline void rcu_irq_exit_check_preempt(void) { } #define rcu_is_idle_cpu(cpu) \ (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq()) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 24db1e41695c..9cca00ed9bc9 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -45,8 +45,6 @@ unsigned long start_poll_synchronize_rcu(void); bool poll_state_synchronize_rcu(unsigned long oldstate); void cond_synchronize_rcu(unsigned long oldstate); -void rcu_idle_enter(void); -void rcu_idle_exit(void); bool rcu_is_idle_cpu(int cpu); #ifdef CONFIG_PROVE_RCU -- cgit v1.2.3 From 171476775d32a40bfebf83250136c19b2e842672 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Jun 2022 16:40:35 +0200 Subject: context_tracking: Convert state to atomic_t Context tracking's state and dynticks counter are going to be merged in a single field so that both updates can happen atomically and at the same time. Prepare for that with converting the state into an atomic_t. [ paulmck: Apply kernel test robot feedback. ] Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Joel Fernandes Cc: Boqun Feng Cc: Nicolas Saenz Julienne Cc: Marcelo Tosatti Cc: Xiongfeng Wang Cc: Yu Liao Cc: Phil Auld Cc: Paul Gortmaker Cc: Alex Belits Signed-off-by: Paul E. McKenney Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne --- include/linux/context_tracking.h | 32 ++++++------------- include/linux/context_tracking_state.h | 57 +++++++++++++++++++++++++++------- 2 files changed, 56 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 1f568676bc1d..dcef4a9e4d63 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -56,7 +56,7 @@ static inline enum ctx_state exception_enter(void) !context_tracking_enabled()) return 0; - prev_ctx = this_cpu_read(context_tracking.state); + prev_ctx = __ct_state(); if (prev_ctx != CONTEXT_KERNEL) ct_user_exit(prev_ctx); @@ -86,33 +86,21 @@ static __always_inline void context_tracking_guest_exit(void) __ct_user_exit(CONTEXT_GUEST); } -/** - * ct_state() - return the current context tracking state if known - * - * Returns the current cpu's context tracking state if context tracking - * is enabled. If context tracking is disabled, returns - * CONTEXT_DISABLED. This should be used primarily for debugging. - */ -static __always_inline enum ctx_state ct_state(void) -{ - return context_tracking_enabled() ? - this_cpu_read(context_tracking.state) : CONTEXT_DISABLED; -} +#define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond)) + #else static inline void user_enter(void) { } static inline void user_exit(void) { } static inline void user_enter_irqoff(void) { } static inline void user_exit_irqoff(void) { } -static inline enum ctx_state exception_enter(void) { return 0; } +static inline int exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } -static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; } +static inline int ct_state(void) { return -1; } static __always_inline bool context_tracking_guest_enter(void) { return false; } static inline void context_tracking_guest_exit(void) { } - +#define CT_WARN_ON(cond) do { } while (0) #endif /* !CONFIG_CONTEXT_TRACKING_USER */ -#define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond)) - #ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE extern void context_tracking_init(void); #else @@ -130,16 +118,16 @@ extern void ct_idle_exit(void); */ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) { - return !(arch_atomic_read(this_cpu_ptr(&context_tracking.dynticks)) & 0x1); + return !(arch_atomic_read(this_cpu_ptr(&context_tracking.state)) & RCU_DYNTICKS_IDX); } /* - * Increment the current CPU's context_tracking structure's ->dynticks field + * Increment the current CPU's context_tracking structure's ->state field * with ordering. Return the new value. */ -static __always_inline unsigned long rcu_dynticks_inc(int incby) +static __always_inline unsigned long ct_state_inc(int incby) { - return arch_atomic_add_return(incby, this_cpu_ptr(&context_tracking.dynticks)); + return arch_atomic_add_return(incby, this_cpu_ptr(&context_tracking.state)); } #else diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 5f11e3d2d85a..e20a74bc0597 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -6,15 +6,23 @@ #include #include +/* Offset to allow distinguishing irq vs. task-based idle entry/exit. */ +#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) + enum ctx_state { - CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */ - CONTEXT_KERNEL = 0, - CONTEXT_USER, - CONTEXT_GUEST, + CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */ + CONTEXT_KERNEL = 0, + CONTEXT_IDLE = 1, + CONTEXT_USER = 2, + CONTEXT_GUEST = 3, + CONTEXT_MAX = 4, }; -/* Offset to allow distinguishing irq vs. task-based idle entry/exit. */ -#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) +/* Even value for idle, else odd. */ +#define RCU_DYNTICKS_IDX CONTEXT_MAX + +#define CT_STATE_MASK (CONTEXT_MAX - 1) +#define CT_DYNTICKS_MASK (~CT_STATE_MASK) struct context_tracking { #ifdef CONFIG_CONTEXT_TRACKING_USER @@ -26,10 +34,11 @@ struct context_tracking { */ bool active; int recursion; - enum ctx_state state; +#endif +#ifdef CONFIG_CONTEXT_TRACKING + atomic_t state; #endif #ifdef CONFIG_CONTEXT_TRACKING_IDLE - atomic_t dynticks; /* Even value for idle, else odd. */ long dynticks_nesting; /* Track process nesting level. */ long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */ #endif @@ -37,26 +46,31 @@ struct context_tracking { #ifdef CONFIG_CONTEXT_TRACKING DECLARE_PER_CPU(struct context_tracking, context_tracking); + +static __always_inline int __ct_state(void) +{ + return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_STATE_MASK; +} #endif #ifdef CONFIG_CONTEXT_TRACKING_IDLE static __always_inline int ct_dynticks(void) { - return atomic_read(this_cpu_ptr(&context_tracking.dynticks)); + return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_DYNTICKS_MASK; } static __always_inline int ct_dynticks_cpu(int cpu) { struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); - return atomic_read(&ct->dynticks); + return atomic_read(&ct->state) & CT_DYNTICKS_MASK; } static __always_inline int ct_dynticks_cpu_acquire(int cpu) { struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); - return atomic_read_acquire(&ct->dynticks); + return atomic_read_acquire(&ct->state) & CT_DYNTICKS_MASK; } static __always_inline long ct_dynticks_nesting(void) @@ -102,6 +116,27 @@ static inline bool context_tracking_enabled_this_cpu(void) return context_tracking_enabled() && __this_cpu_read(context_tracking.active); } +/** + * ct_state() - return the current context tracking state if known + * + * Returns the current cpu's context tracking state if context tracking + * is enabled. If context tracking is disabled, returns + * CONTEXT_DISABLED. This should be used primarily for debugging. + */ +static __always_inline int ct_state(void) +{ + int ret; + + if (!context_tracking_enabled()) + return CONTEXT_DISABLED; + + preempt_disable(); + ret = __ct_state(); + preempt_enable(); + + return ret; +} + #else static __always_inline bool context_tracking_enabled(void) { return false; } static __always_inline bool context_tracking_enabled_cpu(int cpu) { return false; } -- cgit v1.2.3 From 1dcaa3b462265f688613163a1562a65ee53a3311 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 16 Jun 2022 09:30:37 -0700 Subject: context_tracking: Use arch_atomic_read() in __ct_state for KASAN Context tracking's __ct_state() function can be invoked from noinstr state where RCU is not watching. This means that its use of atomic_read() causes KASAN to invoke the non-noinstr __kasan_check_read() function from the noinstr function __ct_state(). This is problematic because someone tracing the __kasan_check_read() function could get a nasty surprise because of RCU not watching. This commit therefore replaces the __ct_state() function's use of atomic_read() with arch_atomic_read(), which KASAN does not attempt to add instrumention to. Reported-by: kernel test robot Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Cc: Marco Elver Reviewed-by: Marco Elver --- include/linux/context_tracking_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index e20a74bc0597..4a4d56f77180 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -49,7 +49,7 @@ DECLARE_PER_CPU(struct context_tracking, context_tracking); static __always_inline int __ct_state(void) { - return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_STATE_MASK; + return arch_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_STATE_MASK; } #endif -- cgit v1.2.3 From 6a4e9307cd3782f8e805fac970b9a240ab3078d6 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 May 2022 13:11:12 +0200 Subject: dmaengine: qcom: fix typo in comment Spelling mistake (triple letters) in comment. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall Link: https://lore.kernel.org/r/20220521111145.81697-62-Julia.Lawall@inria.fr Signed-off-by: Vinod Koul --- include/linux/dma/qcom-gpi-dma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h index f46dc3372f11..6680dd1a43c6 100644 --- a/include/linux/dma/qcom-gpi-dma.h +++ b/include/linux/dma/qcom-gpi-dma.h @@ -26,7 +26,7 @@ enum spi_transfer_cmd { * @clk_div: source clock divider * @clk_src: serial clock * @cmd: spi cmd - * @fragmentation: keep CS assserted at end of sequence + * @fragmentation: keep CS asserted at end of sequence * @cs: chip select toggle * @set_config: set peripheral config * @rx_len: receive length for buffer -- cgit v1.2.3 From bd29c00edd0a5dac8b6e7332bb470cd50f92e893 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 21 Jun 2022 17:56:38 -0500 Subject: soundwire: revisit driver bind/unbind and callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the SoundWire probe, we store a pointer from the driver ops into the 'slave' structure. This can lead to kernel oopses when unbinding codec drivers, e.g. with the following sequence to remove machine driver and codec driver. /sbin/modprobe -r snd_soc_sof_sdw /sbin/modprobe -r snd_soc_rt711 The full details can be found in the BugLink below, for reference the two following examples show different cases of driver ops/callbacks being invoked after the driver .remove(). kernel: BUG: kernel NULL pointer dereference, address: 0000000000000150 kernel: Workqueue: events cdns_update_slave_status_work [soundwire_cadence] kernel: RIP: 0010:mutex_lock+0x19/0x30 kernel: Call Trace: kernel: ? sdw_handle_slave_status+0x426/0xe00 [soundwire_bus 94ff184bf398570c3f8ff7efe9e32529f532e4ae] kernel: ? newidle_balance+0x26a/0x400 kernel: ? cdns_update_slave_status_work+0x1e9/0x200 [soundwire_cadence 1bcf98eebe5ba9833cd433323769ac923c9c6f82] kernel: BUG: unable to handle page fault for address: ffffffffc07654c8 kernel: Workqueue: pm pm_runtime_work kernel: RIP: 0010:sdw_bus_prep_clk_stop+0x6f/0x160 [soundwire_bus] kernel: Call Trace: kernel: kernel: sdw_cdns_clock_stop+0xb5/0x1b0 [soundwire_cadence 1bcf98eebe5ba9833cd433323769ac923c9c6f82] kernel: intel_suspend_runtime+0x5f/0x120 [soundwire_intel aca858f7c87048d3152a4a41bb68abb9b663a1dd] kernel: ? dpm_sysfs_remove+0x60/0x60 This was not detected earlier in Intel tests since the tests first remove the parent PCI device and shut down the bus. The sequence above is a corner case which keeps the bus operational but without a driver bound. While trying to solve this kernel oopses, it became clear that the existing SoundWire bus does not deal well with the unbind case. Commit 528be501b7d4a ("soundwire: sdw_slave: add probe_complete structure and new fields") added a 'probed' status variable and a 'probe_complete' struct completion. This status is however not reset on remove and likewise the 'probe complete' is not re-initialized, so the bind/unbind/bind test cases would fail. The timeout used before the 'update_status' callback was also a bad idea in hindsight, there should really be no timing assumption as to if and when a driver is bound to a device. An initial draft was based on device_lock() and device_unlock() was tested. This proved too complicated, with deadlocks created during the suspend-resume sequences, which also use the same device_lock/unlock() as the bind/unbind sequences. On a CometLake device, a bad DSDT/BIOS caused spurious resumes and the use of device_lock() caused hangs during suspend. After multiple weeks or testing and painful reverse-engineering of deadlocks on different devices, we looked for alternatives that did not interfere with the device core. A bus notifier was used successfully to keep track of DRIVER_BOUND and DRIVER_UNBIND events. This solved the bind-unbind-bind case in tests, but it can still be defeated with a theoretical corner case where the memory is freed by a .remove while the callback is in use. The notifier only helps make sure the driver callbacks are valid, but not that the memory allocated in probe remains valid while the callbacks are invoked. This patch suggests the introduction of a new 'sdw_dev_lock' mutex protecting probe/remove and all driver callbacks. Since this mutex is 'local' to SoundWire only, it does not interfere with existing locks and does not create deadlocks. In addition, this patch removes the 'probe_complete' completion, instead we directly invoke the 'update_status' from the probe routine. That removes any sort of timing dependency and a much better support for the device/driver model, the driver could be bound before the bus started, or eons after the bus started and the hardware would be properly initialized in all cases. BugLink: https://github.com/thesofproject/linux/issues/3531 Fixes: 56d4fe31af77 ("soundwire: Add MIPI DisCo property helpers") Fixes: 528be501b7d4a ("soundwire: sdw_slave: add probe_complete structure and new fields") Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Link: https://lore.kernel.org/r/20220621225641.221170-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 76ce3f3ac0f2..bf6f0decb3f6 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -646,9 +646,6 @@ struct sdw_slave_ops { * @dev_num: Current Device Number, values can be 0 or dev_num_sticky * @dev_num_sticky: one-time static Device Number assigned by Bus * @probed: boolean tracking driver state - * @probe_complete: completion utility to control potential races - * on startup between driver probe/initialization and SoundWire - * Slave state changes/implementation-defined interrupts * @enumeration_complete: completion utility to control potential races * on startup between device enumeration and read/write access to the * Slave device @@ -663,6 +660,7 @@ struct sdw_slave_ops { * for a Slave happens for the first time after enumeration * @is_mockup_device: status flag used to squelch errors in the command/control * protocol for SoundWire mockup devices + * @sdw_dev_lock: mutex used to protect callbacks/remove races */ struct sdw_slave { struct sdw_slave_id id; @@ -680,12 +678,12 @@ struct sdw_slave { u16 dev_num; u16 dev_num_sticky; bool probed; - struct completion probe_complete; struct completion enumeration_complete; struct completion initialization_complete; u32 unattach_request; bool first_interrupt_done; bool is_mockup_device; + struct mutex sdw_dev_lock; /* protect callbacks/remove races */ }; #define dev_to_sdw_dev(_dev) container_of(_dev, struct sdw_slave, dev) -- cgit v1.2.3 From 9a24bb35b0d81edb826f174d7752c2a54bc00abd Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 21 Jun 2022 17:56:39 -0500 Subject: soundwire: peripheral: remove useless ops pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we are using the ops structure directly from the driver, there are no users left of this ops pointer. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Link: https://lore.kernel.org/r/20220621225641.221170-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index bf6f0decb3f6..39058c841469 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -637,7 +637,6 @@ struct sdw_slave_ops { * @dev: Linux device * @status: Status reported by the Slave * @bus: Bus handle - * @ops: Slave callback ops * @prop: Slave properties * @debugfs: Slave debugfs * @node: node for bus list @@ -667,7 +666,6 @@ struct sdw_slave { struct device dev; enum sdw_slave_status status; struct sdw_bus *bus; - const struct sdw_slave_ops *ops; struct sdw_slave_prop prop; #ifdef CONFIG_DEBUG_FS struct dentry *debugfs; -- cgit v1.2.3 From e9a023f2b73ac35ff5cfbefe8524c64d8173d65f Mon Sep 17 00:00:00 2001 From: Eric Lin Date: Tue, 5 Jul 2022 17:19:20 +0800 Subject: drivers/perf: riscv_pmu: Add riscv pmu pm notifier Currently, when the CPU is doing suspend to ram, we don't save pmu counter register and its content will be lost. To ensure perf profiling is not affected by suspend to ram, this patch is based on arm_pmu CPU_PM notifier and implements riscv pmu pm notifier. In the pm notifier, we stop the counter and update the counter value before suspend and start the counter after resume. Signed-off-by: Eric Lin Link: https://lore.kernel.org/r/20220705091920.27432-1-eric.lin@sifive.com Signed-off-by: Will Deacon --- include/linux/perf/riscv_pmu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 46f9b6fe306e..bf66fe011fa8 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -56,9 +56,13 @@ struct riscv_pmu { struct cpu_hw_events __percpu *hw_events; struct hlist_node node; + struct notifier_block riscv_pm_nb; }; #define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu)) + +void riscv_pmu_start(struct perf_event *event, int flags); +void riscv_pmu_stop(struct perf_event *event, int flags); unsigned long riscv_pmu_ctr_read_csr(unsigned long csr); int riscv_pmu_event_set_period(struct perf_event *event); uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event); -- cgit v1.2.3 From 66637ab137b44914356a9dc7a9b3f8ebcf0b0695 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Tue, 28 Jun 2022 14:34:19 +0800 Subject: drivers/perf: hisi: add driver for HNS3 PMU HNS3(HiSilicon Network System 3) PMU is RCiEP device in HiSilicon SoC NIC, supports collection of performance statistics such as bandwidth, latency, packet rate and interrupt rate. NIC of each SICL has one PMU device for it. Driver registers each PMU device to perf, and exports information of supported events, filter mode of each event, bdf range, hardware clock frequency, identifier and so on via sysfs. Each PMU device has its own registers of control, counters and interrupt, and it supports 8 hardware events, each hardward event has its own registers for configuration, counters and interrupt. Filter options contains: config - select event port - select physical port of nic tc - select tc(must be used with port) func - select PF/VF queue - select queue of PF/VF(must be used with func) intr - select interrupt number(must be used with func) global - select all functions of IO DIE Signed-off-by: Guangbin Huang Reviewed-by: John Garry Reviewed-by: Shaokun Zhang Link: https://lore.kernel.org/r/20220628063419.38514-3-huangguangbin2@huawei.com Signed-off-by: Will Deacon --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 19f0dbfdd7fe..3e99fb4d3134 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -230,6 +230,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, + CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, -- cgit v1.2.3 From 3b7e2482f9a3f2e99628048b945c9c6cc53bd38e Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 15 Jun 2022 11:10:36 +0100 Subject: iommu: Introduce a callback to struct iommu_resv_region MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A callback is introduced to struct iommu_resv_region to free memory allocations associated with the reserved region. This will be useful when we introduce support for IORT RMR based reserved regions. Reviewed-by: Christoph Hellwig Tested-by: Steven Price Tested-by: Laurentiu Tudor Tested-by: Hanjun Guo Signed-off-by: Shameer Kolothum Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220615101044.1972-2-shameerali.kolothum.thodi@huawei.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5e1afe169549..b22ffa6bc4a9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -135,6 +135,7 @@ enum iommu_resv_type { * @length: Length of the region in bytes * @prot: IOMMU Protection flags (READ/WRITE/...) * @type: Type of the reserved region + * @free: Callback to free associated memory allocations */ struct iommu_resv_region { struct list_head list; @@ -142,6 +143,7 @@ struct iommu_resv_region { size_t length; int prot; enum iommu_resv_type type; + void (*free)(struct device *dev, struct iommu_resv_region *region); }; /** -- cgit v1.2.3 From 8778b1d48117055c710a01498f65fa730160fdfa Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 15 Jun 2022 11:10:37 +0100 Subject: ACPI/IORT: Make iort_iommu_msi_get_resv_regions() return void At present iort_iommu_msi_get_resv_regions() returns the number of MSI reserved regions on success and there are no users for this. The reserved region list will get populated anyway for platforms that require the HW MSI region reservation. Hence, change the function to return void instead. Reviewed-by: Christoph Hellwig Tested-by: Steven Price Tested-by: Laurentiu Tudor Reviewed-by: Hanjun Guo Signed-off-by: Shameer Kolothum Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220615101044.1972-3-shameerali.kolothum.thodi@huawei.com Signed-off-by: Joerg Roedel --- include/linux/acpi_iort.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index f1f0842a2cb2..a8198b83753d 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -36,7 +36,7 @@ int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ int iort_dma_get_ranges(struct device *dev, u64 *size); int iort_iommu_configure_id(struct device *dev, const u32 *id_in); -int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); +void iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); phys_addr_t acpi_iort_dma_get_max_cpu_address(void); #else static inline void acpi_iort_init(void) { } @@ -52,8 +52,8 @@ static inline int iort_dma_get_ranges(struct device *dev, u64 *size) static inline int iort_iommu_configure_id(struct device *dev, const u32 *id_in) { return -ENODEV; } static inline -int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) -{ return 0; } +void iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) +{ } static inline phys_addr_t acpi_iort_dma_get_max_cpu_address(void) { return PHYS_ADDR_MAX; } -- cgit v1.2.3 From 55be25b8b5e4e2fd680cfb073b84a74a79c002fa Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 15 Jun 2022 11:10:38 +0100 Subject: ACPI/IORT: Provide a generic helper to retrieve reserve regions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently IORT provides a helper to retrieve HW MSI reserve regions. Change this to a generic helper to retrieve any IORT related reserve regions. This will be useful when we add support for RMR nodes in subsequent patches. [Lorenzo: For ACPI IORT] Reviewed-by: Lorenzo Pieralisi Reviewed-by: Christoph Hellwig Tested-by: Steven Price Tested-by: Laurentiu Tudor Tested-by: Hanjun Guo Reviewed-by: Hanjun Guo Signed-off-by: Shameer Kolothum Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220615101044.1972-4-shameerali.kolothum.thodi@huawei.com Signed-off-by: Joerg Roedel --- include/linux/acpi_iort.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index a8198b83753d..e5d2de9caf7f 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -36,7 +36,7 @@ int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ int iort_dma_get_ranges(struct device *dev, u64 *size); int iort_iommu_configure_id(struct device *dev, const u32 *id_in); -void iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); +void iort_iommu_get_resv_regions(struct device *dev, struct list_head *head); phys_addr_t acpi_iort_dma_get_max_cpu_address(void); #else static inline void acpi_iort_init(void) { } @@ -52,7 +52,7 @@ static inline int iort_dma_get_ranges(struct device *dev, u64 *size) static inline int iort_iommu_configure_id(struct device *dev, const u32 *id_in) { return -ENODEV; } static inline -void iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) +void iort_iommu_get_resv_regions(struct device *dev, struct list_head *head) { } static inline phys_addr_t acpi_iort_dma_get_max_cpu_address(void) -- cgit v1.2.3 From 491cf4a6735a957cd0733365f8d17e0f4308f5a4 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 15 Jun 2022 11:10:39 +0100 Subject: ACPI/IORT: Add support to retrieve IORT RMR reserved regions Parse through the IORT RMR nodes and populate the reserve region list corresponding to a given IOMMU and device(optional). Also, go through the ID mappings of the RMR node and retrieve all the SIDs associated with it. Reviewed-by: Lorenzo Pieralisi Tested-by: Steven Price Tested-by: Laurentiu Tudor Tested-by: Hanjun Guo Reviewed-by: Hanjun Guo Signed-off-by: Shameer Kolothum Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220615101044.1972-5-shameerali.kolothum.thodi@huawei.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b22ffa6bc4a9..e6abd998dbe7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -146,6 +146,14 @@ struct iommu_resv_region { void (*free)(struct device *dev, struct iommu_resv_region *region); }; +struct iommu_iort_rmr_data { + struct iommu_resv_region rr; + + /* Stream IDs associated with IORT RMR entry */ + const u32 *sids; + u32 num_sids; +}; + /** * enum iommu_dev_features - Per device IOMMU features * @IOMMU_DEV_FEAT_SVA: Shared Virtual Addresses -- cgit v1.2.3 From e302eea8f49717253ac64fd45b7cc719e87fa010 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 15 Jun 2022 11:10:40 +0100 Subject: ACPI/IORT: Add a helper to retrieve RMR info directly This will provide a way for SMMU drivers to retrieve StreamIDs associated with IORT RMR nodes and use that to set bypass settings for those IDs. Tested-by: Steven Price Tested-by: Laurentiu Tudor Tested-by: Hanjun Guo Reviewed-by: Hanjun Guo Signed-off-by: Shameer Kolothum Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220615101044.1972-6-shameerali.kolothum.thodi@huawei.com Signed-off-by: Joerg Roedel --- include/linux/acpi_iort.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index e5d2de9caf7f..b43be0987b19 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -33,6 +33,10 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 id, enum irq_domain_bus_token bus_token); void acpi_configure_pmsi_domain(struct device *dev); int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); +void iort_get_rmr_sids(struct fwnode_handle *iommu_fwnode, + struct list_head *head); +void iort_put_rmr_sids(struct fwnode_handle *iommu_fwnode, + struct list_head *head); /* IOMMU interface */ int iort_dma_get_ranges(struct device *dev, u64 *size); int iort_iommu_configure_id(struct device *dev, const u32 *id_in); @@ -46,6 +50,10 @@ static inline struct irq_domain *iort_get_device_domain( struct device *dev, u32 id, enum irq_domain_bus_token bus_token) { return NULL; } static inline void acpi_configure_pmsi_domain(struct device *dev) { } +static inline +void iort_get_rmr_sids(struct fwnode_handle *iommu_fwnode, struct list_head *head) { } +static inline +void iort_put_rmr_sids(struct fwnode_handle *iommu_fwnode, struct list_head *head) { } /* IOMMU interface */ static inline int iort_dma_get_ranges(struct device *dev, u64 *size) { return -ENODEV; } -- cgit v1.2.3 From 88527790c079fb1ea41cbcfa4450ee37906a2fb0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Jul 2022 16:59:24 -0700 Subject: tls: rx: add sockopt for enabling optimistic decrypt with TLS 1.3 Since optimisitic decrypt may add extra load in case of retries require socket owner to explicitly opt-in. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/sockptr.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index ea193414298b..d45902fb4cad 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -102,4 +102,12 @@ static inline long strncpy_from_sockptr(char *dst, sockptr_t src, size_t count) return strncpy_from_user(dst, src.user, count); } +static inline int check_zeroed_sockptr(sockptr_t src, size_t offset, + size_t size) +{ + if (!sockptr_is_kernel(src)) + return check_zeroed_user(src.user + offset, size); + return memchr_inv(src.kernel + offset, 0, size) == NULL; +} + #endif /* _LINUX_SOCKPTR_H */ -- cgit v1.2.3 From 2fd26970cf66bd52dc42843c46968040caa8c9a1 Mon Sep 17 00:00:00 2001 From: Imran Khan Date: Wed, 6 Jul 2022 06:10:26 +1000 Subject: Revert "kernfs: Change kernfs_notify_list to llist." This reverts commit b8f35fa1188b84035c59d4842826c4e93a1b1c9f. This is causing regression due to same kernfs_node getting added multiple times in kernfs_notify_list so revert it until safe way of using llist in this context is found. Reported-by: Nathan Chancellor Reported-by: Michael Walle Reported-by: Marek Szyprowski Signed-off-by: Imran Khan Cc: Tejun Heo Link: https://lore.kernel.org/r/20220705201026.2487665-1-imran.f.khan@oracle.com Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 13e703f615f7..367044d7708c 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -173,7 +173,7 @@ struct kernfs_elem_attr { const struct kernfs_ops *ops; struct kernfs_open_node __rcu *open; loff_t size; - struct llist_node notify_next; /* for kernfs_notify() */ + struct kernfs_node *notify_next; /* for kernfs_notify() */ }; /* -- cgit v1.2.3 From 99e48cd6855e9535488e3c90d65edd46c6e6fc1b Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jul 2022 20:03:50 +0800 Subject: blk-mq: Add a flag for reserved requests Add a flag for reserved requests so that drivers may know this for any special handling. Signed-off-by: John Garry Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/1657109034-206040-3-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 43aad0da3305..7c62b7fabec7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -57,6 +57,7 @@ typedef __u32 __bitwise req_flags_t; #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) /* queue has elevator attached */ #define RQF_ELV ((__force req_flags_t)(1 << 22)) +#define RQF_RESV ((__force req_flags_t)(1 << 23)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ @@ -825,6 +826,11 @@ static inline bool blk_mq_need_time_stamp(struct request *rq) return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV)); } +static inline bool blk_mq_is_reserved_rq(struct request *rq) +{ + return rq->rq_flags & RQF_RESV; +} + /* * Batched completions only work when there is no I/O error and no special * ->end_io handler. -- cgit v1.2.3 From 9bdb4833dd399cbff82cc20893f52bdec66a9eca Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jul 2022 20:03:51 +0800 Subject: blk-mq: Drop blk_mq_ops.timeout 'reserved' arg With new API blk_mq_is_reserved_rq() we can tell if a request is from the reserved pool, so stop passing 'reserved' arg. There is actually only a single user of that arg for all the callback implementations, which can use blk_mq_is_reserved_rq() instead. This will also allow us to stop passing the same 'reserved' around the blk-mq iter functions next. Signed-off-by: John Garry Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Hannes Reinecke Acked-by: Ulf Hansson # For MMC Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/1657109034-206040-4-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7c62b7fabec7..c84c56d296fe 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -575,7 +575,7 @@ struct blk_mq_ops { /** * @timeout: Called on request timeout. */ - enum blk_eh_timer_return (*timeout)(struct request *, bool); + enum blk_eh_timer_return (*timeout)(struct request *); /** * @poll: Called to poll for completion of a specific tag. -- cgit v1.2.3 From 2dd6532e9591f201e7571b30915db807603ab924 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jul 2022 20:03:53 +0800 Subject: blk-mq: Drop 'reserved' arg of busy_tag_iter_fn We no longer use the 'reserved' arg in busy_tag_iter_fn for any iter function so it may be dropped. Signed-off-by: John Garry Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Reviewed-by: Sagi Grimberg #nvme Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/1657109034-206040-6-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c84c56d296fe..810a24884f7e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -520,7 +520,7 @@ struct blk_mq_queue_data { bool last; }; -typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); +typedef bool (busy_tag_iter_fn)(struct request *, void *); /** * struct blk_mq_ops - Callback functions that implements block driver -- cgit v1.2.3 From f1a8bbd1100d9cd117bc8b7fc0903982bbaf474f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:35 +0200 Subject: block: remove a superflous ifdef in blkdev.h It doesn't hurt to always have the blk_zone_cond_str prototype, and the two inlines can also be defined unconditionally. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b9a94c53c6cd..270cd0c55292 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -899,8 +899,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) return bdev->bd_queue; /* this is never NULL */ } -#ifdef CONFIG_BLK_DEV_ZONED - /* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond); @@ -915,7 +913,6 @@ static inline unsigned int bio_zone_is_seq(struct bio *bio) return blk_queue_zone_is_seq(bdev_get_queue(bio->bi_bdev), bio->bi_iter.bi_sector); } -#endif /* CONFIG_BLK_DEV_ZONED */ /* * Return how much of the chunk is left to be used for I/O at a given offset. -- cgit v1.2.3 From 6b2bd274744e6454ba7bbbe6a09b44866f2f414a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:40 +0200 Subject: block: pass a gendisk to blk_queue_set_zoned Prepare for storing the zone related field in struct gendisk instead of struct request_queue. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 270cd0c55292..416faa013782 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -291,7 +291,7 @@ struct queue_limits { typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void *data); -void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model); +void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model); #ifdef CONFIG_BLK_DEV_ZONED -- cgit v1.2.3 From 1dc0172027b0aa09823b430e395b1116d2745f36 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:43 +0200 Subject: block: remove queue_max_open_zones and queue_max_active_zones Always use the bdev based helpers instead. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-10-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 416faa013782..7d4105d23b0a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -702,21 +702,22 @@ static inline void blk_queue_max_open_zones(struct request_queue *q, q->max_open_zones = max_open_zones; } -static inline unsigned int queue_max_open_zones(const struct request_queue *q) -{ - return q->max_open_zones; -} - static inline void blk_queue_max_active_zones(struct request_queue *q, unsigned int max_active_zones) { q->max_active_zones = max_active_zones; } -static inline unsigned int queue_max_active_zones(const struct request_queue *q) +static inline unsigned int bdev_max_open_zones(struct block_device *bdev) +{ + return bdev->bd_disk->queue->max_open_zones; +} + +static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { - return q->max_active_zones; + return bdev->bd_disk->queue->max_active_zones; } + #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { @@ -732,11 +733,11 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, { return 0; } -static inline unsigned int queue_max_open_zones(const struct request_queue *q) +static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return 0; } -static inline unsigned int queue_max_active_zones(const struct request_queue *q) +static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { return 0; } @@ -1314,24 +1315,6 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) return 0; } -static inline unsigned int bdev_max_open_zones(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return queue_max_open_zones(q); - return 0; -} - -static inline unsigned int bdev_max_active_zones(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return queue_max_active_zones(q); - return 0; -} - static inline int queue_dma_alignment(const struct request_queue *q) { return q ? q->dma_alignment : 511; -- cgit v1.2.3 From 982977df48179c8c690868f398051074e68eef0f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:44 +0200 Subject: block: pass a gendisk to blk_queue_max_open_zones and blk_queue_max_active_zones Switch to a gendisk based API in preparation for moving all zone related fields from the request_queue to the gendisk. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-11-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7d4105d23b0a..c05e1cc05c26 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -696,16 +696,16 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q, return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); } -static inline void blk_queue_max_open_zones(struct request_queue *q, +static inline void disk_set_max_open_zones(struct gendisk *disk, unsigned int max_open_zones) { - q->max_open_zones = max_open_zones; + disk->queue->max_open_zones = max_open_zones; } -static inline void blk_queue_max_active_zones(struct request_queue *q, +static inline void disk_set_max_active_zones(struct gendisk *disk, unsigned int max_active_zones) { - q->max_active_zones = max_active_zones; + disk->queue->max_active_zones = max_active_zones; } static inline unsigned int bdev_max_open_zones(struct block_device *bdev) -- cgit v1.2.3 From b623e347323f6464b20fb0d899a0a73522ed8f6c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:45 +0200 Subject: block: replace blkdev_nr_zones with bdev_nr_zones Pass a block_device instead of a request_queue as that is what most callers have at hand. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Acked-by: Damien Le Moal Link: https://lore.kernel.org/r/20220706070350.1703384-12-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c05e1cc05c26..fa2757ef4a84 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -298,7 +298,7 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model); #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); -unsigned int blkdev_nr_zones(struct gendisk *disk); +unsigned int bdev_nr_zones(struct block_device *bdev); extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); @@ -312,7 +312,7 @@ extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, #else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int blkdev_nr_zones(struct gendisk *disk) +static inline unsigned int bdev_nr_zones(struct block_device *bdev) { return 0; } -- cgit v1.2.3 From de71973c2951cb2ce4b46560f021f03b15906408 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:49 +0200 Subject: block: remove blk_queue_zone_sectors Always use bdev_zone_sectors instead. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-16-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fa2757ef4a84..21b97f7115dc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -667,11 +667,6 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) } } -static inline sector_t blk_queue_zone_sectors(struct request_queue *q) -{ - return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; -} - #ifdef CONFIG_BLK_DEV_ZONED static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { @@ -1310,9 +1305,9 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); - if (q) - return blk_queue_zone_sectors(q); - return 0; + if (!blk_queue_is_zoned(q)) + return 0; + return q->limits.chunk_sectors; } static inline int queue_dma_alignment(const struct request_queue *q) -- cgit v1.2.3 From d86e716aa40643e3eb8c69fab3a198146bf76dd6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:50 +0200 Subject: block: move zone related fields to struct gendisk Move the zone related fields that are currently stored in struct request_queue to struct gendisk as these are part of the highlevel block layer API and are only used for non-passthrough I/O that requires the gendisk. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-17-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 8 ++--- include/linux/blkdev.h | 91 +++++++++++++++++++++++--------------------------- 2 files changed, 46 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 810a24884f7e..d74f6a6b7e69 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -1129,12 +1129,12 @@ void blk_dump_rq_flags(struct request *, char *); #ifdef CONFIG_BLK_DEV_ZONED static inline unsigned int blk_rq_zone_no(struct request *rq) { - return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); + return disk_zone_no(rq->q->disk, blk_rq_pos(rq)); } static inline unsigned int blk_rq_zone_is_seq(struct request *rq) { - return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); + return disk_zone_is_seq(rq->q->disk, blk_rq_pos(rq)); } bool blk_req_needs_zone_write_lock(struct request *rq); @@ -1156,8 +1156,8 @@ static inline void blk_req_zone_write_unlock(struct request *rq) static inline bool blk_req_zone_is_write_locked(struct request *rq) { - return rq->q->seq_zones_wlock && - test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock); + return rq->q->disk->seq_zones_wlock && + test_bit(blk_rq_zone_no(rq), rq->q->disk->seq_zones_wlock); } static inline bool blk_req_can_dispatch_to_zone(struct request *rq) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 21b97f7115dc..22c477fadc0f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -164,6 +164,29 @@ struct gendisk { #ifdef CONFIG_BLK_DEV_INTEGRITY struct kobject integrity_kobj; #endif /* CONFIG_BLK_DEV_INTEGRITY */ + +#ifdef CONFIG_BLK_DEV_ZONED + /* + * Zoned block device information for request dispatch control. + * nr_zones is the total number of zones of the device. This is always + * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones + * bits which indicates if a zone is conventional (bit set) or + * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones + * bits which indicates if a zone is write locked, that is, if a write + * request targeting the zone was dispatched. + * + * Reads of this information must be protected with blk_queue_enter() / + * blk_queue_exit(). Modifying this information is only allowed while + * no requests are being processed. See also blk_mq_freeze_queue() and + * blk_mq_unfreeze_queue(). + */ + unsigned int nr_zones; + unsigned int max_open_zones; + unsigned int max_active_zones; + unsigned long *conv_zones_bitmap; + unsigned long *seq_zones_wlock; +#endif /* CONFIG_BLK_DEV_ZONED */ + #if IS_ENABLED(CONFIG_CDROM) struct cdrom_device_info *cdi; #endif @@ -467,31 +490,6 @@ struct request_queue { unsigned int required_elevator_features; -#ifdef CONFIG_BLK_DEV_ZONED - /* - * Zoned block device information for request dispatch control. - * nr_zones is the total number of zones of the device. This is always - * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones - * bits which indicates if a zone is conventional (bit set) or - * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones - * bits which indicates if a zone is write locked, that is, if a write - * request targeting the zone was dispatched. All three fields are - * initialized by the low level device driver (e.g. scsi/sd.c). - * Stacking drivers (device mappers) may or may not initialize - * these fields. - * - * Reads of this information must be protected with blk_queue_enter() / - * blk_queue_exit(). Modifying this information is only allowed while - * no requests are being processed. See also blk_mq_freeze_queue() and - * blk_mq_unfreeze_queue(). - */ - unsigned int nr_zones; - unsigned long *conv_zones_bitmap; - unsigned long *seq_zones_wlock; - unsigned int max_open_zones; - unsigned int max_active_zones; -#endif /* CONFIG_BLK_DEV_ZONED */ - int node; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; @@ -668,63 +666,59 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) } #ifdef CONFIG_BLK_DEV_ZONED -static inline unsigned int blk_queue_nr_zones(struct request_queue *q) +static inline unsigned int disk_nr_zones(struct gendisk *disk) { - return blk_queue_is_zoned(q) ? q->nr_zones : 0; + return blk_queue_is_zoned(disk->queue) ? disk->nr_zones : 0; } -static inline unsigned int blk_queue_zone_no(struct request_queue *q, - sector_t sector) +static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { - if (!blk_queue_is_zoned(q)) + if (!blk_queue_is_zoned(disk->queue)) return 0; - return sector >> ilog2(q->limits.chunk_sectors); + return sector >> ilog2(disk->queue->limits.chunk_sectors); } -static inline bool blk_queue_zone_is_seq(struct request_queue *q, - sector_t sector) +static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) { - if (!blk_queue_is_zoned(q)) + if (!blk_queue_is_zoned(disk->queue)) return false; - if (!q->conv_zones_bitmap) + if (!disk->conv_zones_bitmap) return true; - return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); + return !test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap); } static inline void disk_set_max_open_zones(struct gendisk *disk, unsigned int max_open_zones) { - disk->queue->max_open_zones = max_open_zones; + disk->max_open_zones = max_open_zones; } static inline void disk_set_max_active_zones(struct gendisk *disk, unsigned int max_active_zones) { - disk->queue->max_active_zones = max_active_zones; + disk->max_active_zones = max_active_zones; } static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { - return bdev->bd_disk->queue->max_open_zones; + return bdev->bd_disk->max_open_zones; } static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { - return bdev->bd_disk->queue->max_active_zones; + return bdev->bd_disk->max_active_zones; } #else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int blk_queue_nr_zones(struct request_queue *q) +static inline unsigned int disk_nr_zones(struct gendisk *disk) { return 0; } -static inline bool blk_queue_zone_is_seq(struct request_queue *q, - sector_t sector) +static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) { return false; } -static inline unsigned int blk_queue_zone_no(struct request_queue *q, - sector_t sector) +static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { return 0; } @@ -732,6 +726,7 @@ static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return 0; } + static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { return 0; @@ -900,14 +895,12 @@ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond); static inline unsigned int bio_zone_no(struct bio *bio) { - return blk_queue_zone_no(bdev_get_queue(bio->bi_bdev), - bio->bi_iter.bi_sector); + return disk_zone_no(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); } static inline unsigned int bio_zone_is_seq(struct bio *bio) { - return blk_queue_zone_is_seq(bdev_get_queue(bio->bi_bdev), - bio->bi_iter.bi_sector); + return disk_zone_is_seq(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); } /* -- cgit v1.2.3 From 2d693ed436a67db06d1473c22d4caee899a4e9d2 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 11 May 2022 15:45:58 +0100 Subject: coresight: Add config flag to enable branch broadcast When enabled, all taken branch addresses are output, even if the branch was because of a direct branch instruction. This enables reconstruction of the program flow without having access to the memory image of the code being executed. Use bit 8 for the config option which would be the correct bit for programming ETMv3. Although branch broadcast can't be enabled on ETMv3 because it's not in the define ETM3X_SUPPORTED_OPTIONS, using the correct bit might help prevent future collisions or allow it to be enabled if needed. Signed-off-by: James Clark Reviewed-by: Mike Leach Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20220511144601.2257870-2-james.clark@arm.com --- include/linux/coresight-pmu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index 4ac5c081af93..6c2fd6cc5a98 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -18,6 +18,7 @@ * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and * directly use below macros as config bits. */ +#define ETM_OPT_BRANCH_BROADCAST 8 #define ETM_OPT_CYCACC 12 #define ETM_OPT_CTXTID 14 #define ETM_OPT_CTXTID2 15 @@ -25,6 +26,7 @@ #define ETM_OPT_RETSTK 29 /* ETMv4 CONFIGR programming bits for the ETM OPTs */ +#define ETM4_CFG_BIT_BB 3 #define ETM4_CFG_BIT_CYCACC 4 #define ETM4_CFG_BIT_CTXTID 6 #define ETM4_CFG_BIT_VMID 7 -- cgit v1.2.3 From bfdd231374181254742c5e2faef0bef2d30c0ee4 Mon Sep 17 00:00:00 2001 From: Yunfei Wang Date: Thu, 30 Jun 2022 17:29:25 +0800 Subject: iommu/io-pgtable-arm-v7s: Add a quirk to allow pgtable PA up to 35bit Single memory zone feature will remove ZONE_DMA32 and ZONE_DMA and cause pgtable PA size larger than 32bit. Since Mediatek IOMMU hardware support at most 35bit PA in pgtable, so add a quirk to allow the PA of pgtables support up to bit35. Signed-off-by: Ning Li Signed-off-by: Yunfei Wang Reviewed-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/20220630092927.24925-2-yf.wang@mediatek.com Signed-off-by: Joerg Roedel --- include/linux/io-pgtable.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 86af6f0a00a2..ca98aeadcc80 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -74,17 +74,22 @@ struct io_pgtable_cfg { * to support up to 35 bits PA where the bit32, bit33 and bit34 are * encoded in the bit9, bit4 and bit5 of the PTE respectively. * + * IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT: (ARM v7s format) MediaTek IOMMUs + * extend the translation table base support up to 35 bits PA, the + * encoding format is same with IO_PGTABLE_QUIRK_ARM_MTK_EXT. + * * IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table * for use in the upper half of a split address space. * * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability * attributes set in the TCR for a non-coherent page-table walker. */ - #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) - #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) - #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) - #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) - #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) + #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) + #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) + #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) + #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4) + #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) + #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; -- cgit v1.2.3 From 961343d7822624d0e329ab4167c7e1d02bb53112 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 1 Jul 2022 15:00:53 -0500 Subject: genirq: Refactor accessors to use irq_data_get_affinity_mask A couple of functions directly reference the affinity mask. Route them through irq_data_get_affinity_mask so they will pick up any refactoring done there. Signed-off-by: Samuel Holland Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220701200056.46555-6-samuel@sholland.org --- include/linux/irq.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 505308253d23..69ee4e2f36ce 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -879,16 +879,16 @@ static inline int irq_data_get_node(struct irq_data *d) return irq_common_data_get_node(d->common); } -static inline struct cpumask *irq_get_affinity_mask(int irq) +static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) { - struct irq_data *d = irq_get_irq_data(irq); - - return d ? d->common->affinity : NULL; + return d->common->affinity; } -static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) +static inline struct cpumask *irq_get_affinity_mask(int irq) { - return d->common->affinity; + struct irq_data *d = irq_get_irq_data(irq); + + return d ? irq_data_get_affinity_mask(d) : NULL; } #ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK @@ -910,7 +910,7 @@ static inline void irq_data_update_effective_affinity(struct irq_data *d, static inline struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) { - return d->common->affinity; + return irq_data_get_affinity_mask(d); } #endif -- cgit v1.2.3 From 073352e951f60946452da358d64841066c3142ff Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 1 Jul 2022 15:00:54 -0500 Subject: genirq: Add and use an irq_data_update_affinity helper Some architectures and irqchip drivers modify the cpumask returned by irq_data_get_affinity_mask, usually by copying in to it. This is problematic for uniprocessor configurations, where the affinity mask should be constant, as it is known at compile time. Add and use a setter for the affinity mask, following the pattern of irq_data_update_effective_affinity. This allows the getter function to return a const cpumask pointer. Signed-off-by: Samuel Holland Reviewed-by: Oleksandr Tyshchenko # Xen bits Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220701200056.46555-7-samuel@sholland.org --- include/linux/irq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 69ee4e2f36ce..adcfebceb777 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -884,6 +884,12 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) return d->common->affinity; } +static inline void irq_data_update_affinity(struct irq_data *d, + const struct cpumask *m) +{ + cpumask_copy(d->common->affinity, m); +} + static inline struct cpumask *irq_get_affinity_mask(int irq) { struct irq_data *d = irq_get_irq_data(irq); -- cgit v1.2.3 From 4d0b8298818b623f5fa51d5c49e1a142d3618ac9 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 1 Jul 2022 15:00:55 -0500 Subject: genirq: Return a const cpumask from irq_data_get_affinity_mask Now that the irq_data_update_affinity helper exists, enforce its use by returning a a const cpumask from irq_data_get_affinity_mask. Since the previous commit already updated places that needed to call irq_data_update_affinity, this commit updates the remaining code that either did not modify the cpumask or immediately passed the modified mask to irq_set_affinity. Signed-off-by: Samuel Holland Reviewed-by: Michael Kelley Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220701200056.46555-8-samuel@sholland.org --- include/linux/irq.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index adcfebceb777..02073f7a156e 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -879,7 +879,8 @@ static inline int irq_data_get_node(struct irq_data *d) return irq_common_data_get_node(d->common); } -static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) +static inline +const struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) { return d->common->affinity; } @@ -890,7 +891,7 @@ static inline void irq_data_update_affinity(struct irq_data *d, cpumask_copy(d->common->affinity, m); } -static inline struct cpumask *irq_get_affinity_mask(int irq) +static inline const struct cpumask *irq_get_affinity_mask(int irq) { struct irq_data *d = irq_get_irq_data(irq); @@ -899,7 +900,7 @@ static inline struct cpumask *irq_get_affinity_mask(int irq) #ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK static inline -struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) +const struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) { return d->common->effective_affinity; } @@ -914,13 +915,14 @@ static inline void irq_data_update_effective_affinity(struct irq_data *d, { } static inline -struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) +const struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) { return irq_data_get_affinity_mask(d); } #endif -static inline struct cpumask *irq_get_effective_affinity_mask(unsigned int irq) +static inline +const struct cpumask *irq_get_effective_affinity_mask(unsigned int irq) { struct irq_data *d = irq_get_irq_data(irq); -- cgit v1.2.3 From aa0813581b8d37bdd91cd40b67ef79ffa45104b2 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 1 Jul 2022 15:00:56 -0500 Subject: genirq: Provide an IRQ affinity mask in non-SMP configs IRQ affinity masks are not allocated in uniprocessor configurations. This requires special case non-SMP code in drivers for irqchips which have per-CPU enable or mask registers. Since IRQ affinity is always the same in a uniprocessor configuration, we can provide a correct affinity mask without allocating one per IRQ. By returning a real cpumask from irq_data_get_affinity_mask even when SMP is disabled, irqchip drivers which iterate over that mask will automatically do the right thing. Signed-off-by: Samuel Holland Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220701200056.46555-9-samuel@sholland.org --- include/linux/irq.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 02073f7a156e..996e22744edd 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -151,7 +151,9 @@ struct irq_common_data { #endif void *handler_data; struct msi_desc *msi_desc; +#ifdef CONFIG_SMP cpumask_var_t affinity; +#endif #ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK cpumask_var_t effective_affinity; #endif @@ -882,13 +884,19 @@ static inline int irq_data_get_node(struct irq_data *d) static inline const struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) { +#ifdef CONFIG_SMP return d->common->affinity; +#else + return cpumask_of(0); +#endif } static inline void irq_data_update_affinity(struct irq_data *d, const struct cpumask *m) { +#ifdef CONFIG_SMP cpumask_copy(d->common->affinity, m); +#endif } static inline const struct cpumask *irq_get_affinity_mask(int irq) -- cgit v1.2.3 From 70c248aca9e7efa85a6664d5ab56c17c326c958f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 10 Jun 2022 16:21:39 +0100 Subject: mm: kasan: Skip unpoisoning of user pages Commit c275c5c6d50a ("kasan: disable freed user page poisoning with HW tags") added __GFP_SKIP_KASAN_POISON to GFP_HIGHUSER_MOVABLE. A similar argument can be made about unpoisoning, so also add __GFP_SKIP_KASAN_UNPOISON to user pages. To ensure the user page is still accessible via page_address() without a kasan fault, reset the page->flags tag. With the above changes, there is no need for the arm64 tag_clear_highpage() to reset the page->flags tag. Signed-off-by: Catalin Marinas Cc: Andrey Ryabinin Cc: Andrey Konovalov Cc: Peter Collingbourne Cc: Vincenzo Frascino Reviewed-by: Vincenzo Frascino Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20220610152141.2148929-3-catalin.marinas@arm.com Signed-off-by: Will Deacon --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2d2ccae933c2..0ace7759acd2 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -348,7 +348,7 @@ struct vm_area_struct; #define GFP_DMA32 __GFP_DMA32 #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | \ - __GFP_SKIP_KASAN_POISON) + __GFP_SKIP_KASAN_POISON | __GFP_SKIP_KASAN_UNPOISON) #define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) #define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) -- cgit v1.2.3 From 2aec377a29250b942f14d3c16d49783da3e9df11 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 5 Jul 2022 14:00:36 -0400 Subject: dm table: remove dm_table_get_num_targets() wrapper More efficient and readable to just access table->num_targets directly. Suggested-by: Linus Torvalds Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 47a01c7cffdf..920085dd7f3b 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -561,7 +561,6 @@ void dm_sync_table(struct mapped_device *md); * Queries */ sector_t dm_table_get_size(struct dm_table *t); -unsigned int dm_table_get_num_targets(struct dm_table *t); fmode_t dm_table_get_mode(struct dm_table *t); struct mapped_device *dm_table_get_md(struct dm_table *t); const char *dm_table_device_name(struct dm_table *t); -- cgit v1.2.3 From 752f596371289cb3b45e99453b5c09d96ed36614 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 26 Jun 2022 10:10:48 +0100 Subject: docs: filesystems: update netfs-api.rst reference Changeset efc930fa1d84 ("docs: filesystems: caching/netfs-api.txt: convert it to ReST") renamed: Documentation/filesystems/caching/netfs-api.txt to: Documentation/filesystems/caching/netfs-api.rst. Update its cross-reference accordingly. Fixes: efc930fa1d84 ("docs: filesystems: caching/netfs-api.txt: convert it to ReST") Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/5f867f01d42c3e65e111167739ed1a41a26623f9.1656234456.git.mchehab@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/fscache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 72585c9729a2..2f82ea31d4c1 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -378,7 +378,7 @@ void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data, * * Request that the size of an object be changed. * - * See Documentation/filesystems/caching/netfs-api.txt for a complete + * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline -- cgit v1.2.3 From c02b872a7ca7842e4cdbbf621f77607d0a655f83 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 26 Jun 2022 10:10:56 +0100 Subject: Documentation: update watch_queue.rst references Changeset f5461124d59b ("Documentation: move watch_queue to core-api") renamed: Documentation/watch_queue.rst to: Documentation/core-api/watch_queue.rst. Update the cross-references accordingly. Fixes: f5461124d59b ("Documentation: move watch_queue to core-api") Reviewed-by: Randy Dunlap Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/1c220de9c58f35e815a3df9458ac2bea323c8bfb.1656234456.git.mchehab@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/watch_queue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h index 3b9a40ae8bdb..fc6bba20273b 100644 --- a/include/linux/watch_queue.h +++ b/include/linux/watch_queue.h @@ -4,7 +4,7 @@ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * - * See Documentation/watch_queue.rst + * See Documentation/core-api/watch_queue.rst */ #ifndef _LINUX_WATCH_QUEUE_H -- cgit v1.2.3 From d6a21f2d73258f2a4cd2e7806f5755ee73fddced Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 26 Jun 2022 10:11:01 +0100 Subject: objtool: update objtool.txt references Changeset a8e35fece49b ("objtool: Update documentation") renamed: tools/objtool/Documentation/stack-validation.txt to: tools/objtool/Documentation/objtool.txt. Update the cross-references accordingly. Fixes: a8e35fece49b ("objtool: Update documentation") Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/ec285ece6348a5be191aebe45f78d06b3319056b.1656234456.git.mchehab@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/objtool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 6491fa8fba6d..6f89471005ee 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -62,7 +62,7 @@ struct unwind_hint { * It should only be used in special cases where you're 100% sure it won't * affect the reliability of frame pointers and kernel stack traces. * - * For more information, see tools/objtool/Documentation/stack-validation.txt. + * For more information, see tools/objtool/Documentation/objtool.txt. */ #define STACK_FRAME_NON_STANDARD(func) \ static void __used __section(".discard.func_stack_frame_non_standard") \ -- cgit v1.2.3 From 3566ee1d776c1393393564b2514f9cd52a49c16e Mon Sep 17 00:00:00 2001 From: Michael Kawano Date: Thu, 7 Jul 2022 15:57:27 +0200 Subject: vfio/ccw: Remove UUID from s390 debug log As vfio-ccw devices are created/destroyed, the uuid of the associated mdevs that are recorded in $S390DBF/vfio_ccw_msg/sprintf get lost. This is because a pointer to the UUID is stored instead of the UUID itself, and that memory may have been repurposed if/when the logs are examined. The result is usually garbage UUID data in the logs, though there is an outside chance of an oops happening here. Simply remove the UUID from the traces, as the subchannel number will provide useful configuration information for problem determination, and is stored directly into the log instead of a pointer. As we were the only consumer of mdev_uuid(), remove that too. Cc: Kirti Wankhede Signed-off-by: Michael Kawano Fixes: 60e05d1cf0875 ("vfio-ccw: add some logging") Fixes: b7701dfbf9832 ("vfio-ccw: Register a chp_event callback for vfio-ccw") [farman: reworded commit message, added Fixes: tags] Signed-off-by: Eric Farman Reviewed-by: Jason Gunthorpe Reviewed-by: Matthew Rosato Reviewed-by: Kirti Wankhede Link: https://lore.kernel.org/r/20220707135737.720765-2-farman@linux.ibm.com Signed-off-by: Alex Williamson --- include/linux/mdev.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mdev.h b/include/linux/mdev.h index bb539794f54a..47ad3b104d9e 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -65,11 +65,6 @@ struct mdev_driver { struct device_driver driver; }; -static inline const guid_t *mdev_uuid(struct mdev_device *mdev) -{ - return &mdev->uuid; -} - extern struct bus_type mdev_bus_type; int mdev_register_device(struct device *dev, struct mdev_driver *mdev_driver); -- cgit v1.2.3 From 87686cc845c3be7dea777f1dbf2de0767007cda8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 4 Jul 2022 16:10:39 +0530 Subject: OPP: Make dev_pm_opp_set_regulators() accept NULL terminated list Make dev_pm_opp_set_regulators() accept a NULL terminated list of names instead of making the callers keep the two parameters in sync, which creates an opportunity for bugs to get in. Suggested-by: Greg Kroah-Hartman Reviewed-by: Steven Price # panfrost Reviewed-by: Chanwoo Choi Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 6708b4ec244d..4c490865d574 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -159,9 +159,9 @@ void dev_pm_opp_put_supported_hw(struct opp_table *opp_table); int devm_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count); struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name); void dev_pm_opp_put_prop_name(struct opp_table *opp_table); -struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count); +struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[]); void dev_pm_opp_put_regulators(struct opp_table *opp_table); -int devm_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count); +int devm_pm_opp_set_regulators(struct device *dev, const char * const names[]); struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name); void dev_pm_opp_put_clkname(struct opp_table *opp_table); int devm_pm_opp_set_clkname(struct device *dev, const char *name); @@ -379,7 +379,7 @@ static inline struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, con static inline void dev_pm_opp_put_prop_name(struct opp_table *opp_table) {} -static inline struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count) +static inline struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[]) { return ERR_PTR(-EOPNOTSUPP); } @@ -387,8 +387,7 @@ static inline struct opp_table *dev_pm_opp_set_regulators(struct device *dev, co static inline void dev_pm_opp_put_regulators(struct opp_table *opp_table) {} static inline int devm_pm_opp_set_regulators(struct device *dev, - const char * const names[], - unsigned int count) + const char * const names[]) { return -EOPNOTSUPP; } -- cgit v1.2.3 From 11b9b663585c4f8b00846089ebbca4d1e3283e86 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 25 May 2022 15:23:16 +0530 Subject: OPP: Add dev_pm_opp_set_config() and friends The OPP core already have few configuration specific APIs and it is getting complex or messy for both the OPP core and its users. Lets introduce a new set of API which will be used for all kind of different configurations, and shall eventually be used by all the existing ones. The new API, returns a unique token instead of a pointer to the OPP table, which allows the OPP core to drop the resources selectively later on. Tested-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 4c490865d574..a08f9481efb3 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -90,6 +90,32 @@ struct dev_pm_set_opp_data { struct device *dev; }; +/** + * struct dev_pm_opp_config - Device OPP configuration values + * @clk_names: Clk names, NULL terminated array, max 1 clock for now. + * @prop_name: Name to postfix to properties. + * @set_opp: Custom set OPP helper. + * @supported_hw: Array of hierarchy of versions to match. + * @supported_hw_count: Number of elements in the array. + * @regulator_names: Array of pointers to the names of the regulator, NULL terminated. + * @genpd_names: Null terminated array of pointers containing names of genpd to + * attach. + * @virt_devs: Pointer to return the array of virtual devices. + * + * This structure contains platform specific OPP configurations for the device. + */ +struct dev_pm_opp_config { + /* NULL terminated */ + const char * const *clk_names; + const char *prop_name; + int (*set_opp)(struct dev_pm_set_opp_data *data); + const unsigned int *supported_hw; + unsigned int supported_hw_count; + const char * const *regulator_names; + const char * const *genpd_names; + struct device ***virt_devs; +}; + #if defined(CONFIG_PM_OPP) struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); @@ -154,6 +180,10 @@ int dev_pm_opp_disable(struct device *dev, unsigned long freq); int dev_pm_opp_register_notifier(struct device *dev, struct notifier_block *nb); int dev_pm_opp_unregister_notifier(struct device *dev, struct notifier_block *nb); +int dev_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config); +int devm_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config); +void dev_pm_opp_clear_config(int token); + struct opp_table *dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count); void dev_pm_opp_put_supported_hw(struct opp_table *opp_table); int devm_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count); @@ -418,6 +448,18 @@ static inline int devm_pm_opp_attach_genpd(struct device *dev, return -EOPNOTSUPP; } +static inline int dev_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline int devm_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline void dev_pm_opp_clear_config(int token) {} + static inline struct dev_pm_opp *dev_pm_opp_xlate_required_opp(struct opp_table *src_table, struct opp_table *dst_table, struct dev_pm_opp *src_opp) { -- cgit v1.2.3 From b0ec09428621daee5101130c307634a390b0213b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 4 Jul 2022 16:36:26 +0530 Subject: OPP: Migrate set-regulators API to use set-config helpers Now that we have a central API to handle all OPP table configurations, migrate the set-regulators family of helpers to use the new infrastructure. The return type and parameter to the APIs change a bit due to this, update the current users as well in the same commit in order to avoid breaking builds. Reviewed-by: Chanwoo Choi Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index a08f9481efb3..f014bd172c99 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -189,9 +189,6 @@ void dev_pm_opp_put_supported_hw(struct opp_table *opp_table); int devm_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count); struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name); void dev_pm_opp_put_prop_name(struct opp_table *opp_table); -struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[]); -void dev_pm_opp_put_regulators(struct opp_table *opp_table); -int devm_pm_opp_set_regulators(struct device *dev, const char * const names[]); struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name); void dev_pm_opp_put_clkname(struct opp_table *opp_table); int devm_pm_opp_set_clkname(struct device *dev, const char *name); @@ -409,19 +406,6 @@ static inline struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, con static inline void dev_pm_opp_put_prop_name(struct opp_table *opp_table) {} -static inline struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[]) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void dev_pm_opp_put_regulators(struct opp_table *opp_table) {} - -static inline int devm_pm_opp_set_regulators(struct device *dev, - const char * const names[]) -{ - return -EOPNOTSUPP; -} - static inline struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name) { return ERR_PTR(-EOPNOTSUPP); @@ -606,4 +590,32 @@ static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_ta } #endif +/* OPP Configuration helpers */ + +/* Regulators helpers */ +static inline int dev_pm_opp_set_regulators(struct device *dev, + const char * const names[]) +{ + struct dev_pm_opp_config config = { + .regulator_names = names, + }; + + return dev_pm_opp_set_config(dev, &config); +} + +static inline void dev_pm_opp_put_regulators(int token) +{ + dev_pm_opp_clear_config(token); +} + +static inline int devm_pm_opp_set_regulators(struct device *dev, + const char * const names[]) +{ + struct dev_pm_opp_config config = { + .regulator_names = names, + }; + + return devm_pm_opp_set_config(dev, &config); +} + #endif /* __LINUX_OPP_H__ */ -- cgit v1.2.3 From 89f03984fa2abface1ffb1fe050b7c175651ffc7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 26 May 2022 09:36:27 +0530 Subject: OPP: Migrate set-supported-hw API to use set-config helpers Now that we have a central API to handle all OPP table configurations, migrate the set-supported-hw family of helpers to use the new infrastructure. The return type and parameter to the APIs change a bit due to this, update the current users as well in the same commit in order to avoid breaking builds. Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 49 ++++++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index f014bd172c99..94d0101c254c 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -184,9 +184,6 @@ int dev_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config); int devm_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config); void dev_pm_opp_clear_config(int token); -struct opp_table *dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count); -void dev_pm_opp_put_supported_hw(struct opp_table *opp_table); -int devm_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count); struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name); void dev_pm_opp_put_prop_name(struct opp_table *opp_table); struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name); @@ -369,22 +366,6 @@ static inline int dev_pm_opp_unregister_notifier(struct device *dev, struct noti return -EOPNOTSUPP; } -static inline struct opp_table *dev_pm_opp_set_supported_hw(struct device *dev, - const u32 *versions, - unsigned int count) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void dev_pm_opp_put_supported_hw(struct opp_table *opp_table) {} - -static inline int devm_pm_opp_set_supported_hw(struct device *dev, - const u32 *versions, - unsigned int count) -{ - return -EOPNOTSUPP; -} - static inline struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)) { @@ -618,4 +599,34 @@ static inline int devm_pm_opp_set_regulators(struct device *dev, return devm_pm_opp_set_config(dev, &config); } +/* Supported-hw helpers */ +static inline int dev_pm_opp_set_supported_hw(struct device *dev, + const u32 *versions, + unsigned int count) +{ + struct dev_pm_opp_config config = { + .supported_hw = versions, + .supported_hw_count = count, + }; + + return dev_pm_opp_set_config(dev, &config); +} + +static inline void dev_pm_opp_put_supported_hw(int token) +{ + dev_pm_opp_clear_config(token); +} + +static inline int devm_pm_opp_set_supported_hw(struct device *dev, + const u32 *versions, + unsigned int count) +{ + struct dev_pm_opp_config config = { + .supported_hw = versions, + .supported_hw_count = count, + }; + + return devm_pm_opp_set_config(dev, &config); +} + #endif /* __LINUX_OPP_H__ */ -- cgit v1.2.3 From 2368f57685768f9f9cd666eaa4194a359d89afb8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 26 May 2022 09:36:27 +0530 Subject: OPP: Migrate set-clk-name API to use set-config helpers Now that we have a central API to handle all OPP table configurations, migrate the set-clk-name family of helpers to use the new infrastructure. Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 94d0101c254c..ed1906bbe8bb 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -186,9 +186,6 @@ void dev_pm_opp_clear_config(int token); struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name); void dev_pm_opp_put_prop_name(struct opp_table *opp_table); -struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name); -void dev_pm_opp_put_clkname(struct opp_table *opp_table); -int devm_pm_opp_set_clkname(struct device *dev, const char *name); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); int devm_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); @@ -387,18 +384,6 @@ static inline struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, con static inline void dev_pm_opp_put_prop_name(struct opp_table *opp_table) {} -static inline struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void dev_pm_opp_put_clkname(struct opp_table *opp_table) {} - -static inline int devm_pm_opp_set_clkname(struct device *dev, const char *name) -{ - return -EOPNOTSUPP; -} - static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs) { return ERR_PTR(-EOPNOTSUPP); @@ -629,4 +614,30 @@ static inline int devm_pm_opp_set_supported_hw(struct device *dev, return devm_pm_opp_set_config(dev, &config); } +/* clkname helpers */ +static inline int dev_pm_opp_set_clkname(struct device *dev, const char *name) +{ + const char *names[] = { name, NULL }; + struct dev_pm_opp_config config = { + .clk_names = names, + }; + + return dev_pm_opp_set_config(dev, &config); +} + +static inline void dev_pm_opp_put_clkname(int token) +{ + dev_pm_opp_clear_config(token); +} + +static inline int devm_pm_opp_set_clkname(struct device *dev, const char *name) +{ + const char *names[] = { name, NULL }; + struct dev_pm_opp_config config = { + .clk_names = names, + }; + + return devm_pm_opp_set_config(dev, &config); +} + #endif /* __LINUX_OPP_H__ */ -- cgit v1.2.3 From 3c543b42a6df35feb3db6689e512fa167739451e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 26 May 2022 09:36:27 +0530 Subject: OPP: Migrate set-opp-helper API to use set-config helpers Now that we have a central API to handle all OPP table configurations, migrate the set-opp-helper family of helpers to use the new infrastructure. The return type and parameter to the APIs change a bit due to this, update the current users as well in the same commit in order to avoid breaking builds. Remove devm_pm_opp_register_set_opp_helper() as it has no users currently. Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index ed1906bbe8bb..85a4b7353979 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -186,9 +186,6 @@ void dev_pm_opp_clear_config(int token); struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name); void dev_pm_opp_put_prop_name(struct opp_table *opp_table); -struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); -void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); -int devm_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs); void dev_pm_opp_detach_genpd(struct opp_table *opp_table); int devm_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs); @@ -363,20 +360,6 @@ static inline int dev_pm_opp_unregister_notifier(struct device *dev, struct noti return -EOPNOTSUPP; } -static inline struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, - int (*set_opp)(struct dev_pm_set_opp_data *data)) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) {} - -static inline int devm_pm_opp_register_set_opp_helper(struct device *dev, - int (*set_opp)(struct dev_pm_set_opp_data *data)) -{ - return -EOPNOTSUPP; -} - static inline struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) { return ERR_PTR(-EOPNOTSUPP); @@ -640,4 +623,20 @@ static inline int devm_pm_opp_set_clkname(struct device *dev, const char *name) return devm_pm_opp_set_config(dev, &config); } +/* set-opp helpers */ +static inline int dev_pm_opp_register_set_opp_helper(struct device *dev, + int (*set_opp)(struct dev_pm_set_opp_data *data)) +{ + struct dev_pm_opp_config config = { + .set_opp = set_opp, + }; + + return dev_pm_opp_set_config(dev, &config); +} + +static inline void dev_pm_opp_unregister_set_opp_helper(int token) +{ + dev_pm_opp_clear_config(token); +} + #endif /* __LINUX_OPP_H__ */ -- cgit v1.2.3 From 442e7a1786e628b38175314ec1805966b8d0c02c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 26 May 2022 09:36:27 +0530 Subject: OPP: Migrate attach-genpd API to use set-config helpers Now that we have a central API to handle all OPP table configurations, migrate the attach-genpd family of helpers to use the new infrastructure. Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 48 +++++++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 85a4b7353979..20e1e5060a8a 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -186,9 +186,6 @@ void dev_pm_opp_clear_config(int token); struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name); void dev_pm_opp_put_prop_name(struct opp_table *opp_table); -struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs); -void dev_pm_opp_detach_genpd(struct opp_table *opp_table); -int devm_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs); struct dev_pm_opp *dev_pm_opp_xlate_required_opp(struct opp_table *src_table, struct opp_table *dst_table, struct dev_pm_opp *src_opp); int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct opp_table *dst_table, unsigned int pstate); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); @@ -367,20 +364,6 @@ static inline struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, con static inline void dev_pm_opp_put_prop_name(struct opp_table *opp_table) {} -static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void dev_pm_opp_detach_genpd(struct opp_table *opp_table) {} - -static inline int devm_pm_opp_attach_genpd(struct device *dev, - const char * const *names, - struct device ***virt_devs) -{ - return -EOPNOTSUPP; -} - static inline int dev_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config) { return -EOPNOTSUPP; @@ -639,4 +622,35 @@ static inline void dev_pm_opp_unregister_set_opp_helper(int token) dev_pm_opp_clear_config(token); } +/* genpd helpers */ +static inline int dev_pm_opp_attach_genpd(struct device *dev, + const char * const *names, + struct device ***virt_devs) +{ + struct dev_pm_opp_config config = { + .genpd_names = names, + .virt_devs = virt_devs, + }; + + return dev_pm_opp_set_config(dev, &config); +} + +static inline void dev_pm_opp_detach_genpd(int token) +{ + dev_pm_opp_clear_config(token); +} + +static inline int devm_pm_opp_attach_genpd(struct device *dev, + const char * const *names, + struct device ***virt_devs) +{ + struct dev_pm_opp_config config = { + .genpd_names = names, + .virt_devs = virt_devs, + }; + + return devm_pm_opp_set_config(dev, &config); +} + + #endif /* __LINUX_OPP_H__ */ -- cgit v1.2.3 From 298098e55a6fcc176a5af52cd689f33577ead5ca Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 26 May 2022 09:36:27 +0530 Subject: OPP: Migrate set-prop-name helper API to use set-config helpers Now that we have a central API to handle all OPP table configurations, migrate the set-prop-name family of helpers to use the new infrastructure. The return type and parameter to the APIs change a bit due to this, update the current users as well in the same commit in order to avoid breaking builds. Acked-by: Samuel Holland # sun50i Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 20e1e5060a8a..f995ca1406e8 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -184,8 +184,6 @@ int dev_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config); int devm_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config); void dev_pm_opp_clear_config(int token); -struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name); -void dev_pm_opp_put_prop_name(struct opp_table *opp_table); struct dev_pm_opp *dev_pm_opp_xlate_required_opp(struct opp_table *src_table, struct opp_table *dst_table, struct dev_pm_opp *src_opp); int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct opp_table *dst_table, unsigned int pstate); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); @@ -357,13 +355,6 @@ static inline int dev_pm_opp_unregister_notifier(struct device *dev, struct noti return -EOPNOTSUPP; } -static inline struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void dev_pm_opp_put_prop_name(struct opp_table *opp_table) {} - static inline int dev_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config) { return -EOPNOTSUPP; @@ -652,5 +643,19 @@ static inline int devm_pm_opp_attach_genpd(struct device *dev, return devm_pm_opp_set_config(dev, &config); } +/* prop-name helpers */ +static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name) +{ + struct dev_pm_opp_config config = { + .prop_name = name, + }; + + return dev_pm_opp_set_config(dev, &config); +} + +static inline void dev_pm_opp_put_prop_name(int token) +{ + dev_pm_opp_clear_config(token); +} #endif /* __LINUX_OPP_H__ */ -- cgit v1.2.3 From aee3352f6ecf8cfad1f1ee5838cfc4d37c6b8f75 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 4 Jul 2022 13:45:08 +0530 Subject: OPP: Add support for config_regulators() helper Extend the dev_pm_opp_set_config() interface to allow adding config_regulators() helpers. This helper will be called to set the voltages of the regulators from the regular path in _set_opp(), while we are trying to change the OPP. This will eventually replace the custom set_opp() helper. Tested-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index f995ca1406e8..9f2f9a792a19 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -90,11 +90,16 @@ struct dev_pm_set_opp_data { struct device *dev; }; +typedef int (*config_regulators_t)(struct device *dev, + struct dev_pm_opp *old_opp, struct dev_pm_opp *new_opp, + struct regulator **regulators, unsigned int count); + /** * struct dev_pm_opp_config - Device OPP configuration values * @clk_names: Clk names, NULL terminated array, max 1 clock for now. * @prop_name: Name to postfix to properties. * @set_opp: Custom set OPP helper. + * @config_regulators: Custom set regulator helper. * @supported_hw: Array of hierarchy of versions to match. * @supported_hw_count: Number of elements in the array. * @regulator_names: Array of pointers to the names of the regulator, NULL terminated. @@ -109,6 +114,7 @@ struct dev_pm_opp_config { const char * const *clk_names; const char *prop_name; int (*set_opp)(struct dev_pm_set_opp_data *data); + config_regulators_t config_regulators; const unsigned int *supported_hw; unsigned int supported_hw_count; const char * const *regulator_names; @@ -613,6 +619,22 @@ static inline void dev_pm_opp_unregister_set_opp_helper(int token) dev_pm_opp_clear_config(token); } +/* config-regulators helpers */ +static inline int dev_pm_opp_set_config_regulators(struct device *dev, + config_regulators_t helper) +{ + struct dev_pm_opp_config config = { + .config_regulators = helper, + }; + + return dev_pm_opp_set_config(dev, &config); +} + +static inline void dev_pm_opp_put_config_regulators(int token) +{ + dev_pm_opp_clear_config(token); +} + /* genpd helpers */ static inline int dev_pm_opp_attach_genpd(struct device *dev, const char * const *names, -- cgit v1.2.3 From 69b1af178a3a534da2f20fa0c7356fcbbe8cce1f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 31 May 2022 13:24:21 +0530 Subject: OPP: Add dev_pm_opp_get_supplies() We already have an API for getting voltage information for a single regulator, dev_pm_opp_get_voltage(), but there is nothing available for multiple regulator case. This patch adds a new API, dev_pm_opp_get_supplies(), to get all information related to the supplies for an OPP. Tested-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 9f2f9a792a19..1e2b33d79ba6 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -129,6 +129,8 @@ void dev_pm_opp_put_opp_table(struct opp_table *opp_table); unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp); +int dev_pm_opp_get_supplies(struct dev_pm_opp *opp, struct dev_pm_opp_supply *supplies); + unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp); unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp); @@ -217,6 +219,11 @@ static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) return 0; } +static inline int dev_pm_opp_get_supplies(struct dev_pm_opp *opp, struct dev_pm_opp_supply *supplies) +{ + return -EOPNOTSUPP; +} + static inline unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp) { return 0; -- cgit v1.2.3 From 1f378c6ead5c69decf36e8e61de2e50377c76ab8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 4 Jul 2022 14:57:06 +0530 Subject: OPP: Remove custom OPP helper support The only user of the custom helper is migrated to use dev_pm_opp_set_config_regulators() interface. Remove the now unused custom OPP helper support. This cleans up _set_opp() and leaves a single code path to be used by all users. Tested-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 51 -------------------------------------------------- 1 file changed, 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 1e2b33d79ba6..9d59aedc2be3 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -57,39 +57,6 @@ struct dev_pm_opp_icc_bw { u32 peak; }; -/** - * struct dev_pm_opp_info - OPP freq/voltage/current values - * @rate: Target clk rate in hz - * @supplies: Array of voltage/current values for all power supplies - * - * This structure stores the freq/voltage/current values for a single OPP. - */ -struct dev_pm_opp_info { - unsigned long rate; - struct dev_pm_opp_supply *supplies; -}; - -/** - * struct dev_pm_set_opp_data - Set OPP data - * @old_opp: Old OPP info - * @new_opp: New OPP info - * @regulators: Array of regulator pointers - * @regulator_count: Number of regulators - * @clk: Pointer to clk - * @dev: Pointer to the struct device - * - * This structure contains all information required for setting an OPP. - */ -struct dev_pm_set_opp_data { - struct dev_pm_opp_info old_opp; - struct dev_pm_opp_info new_opp; - - struct regulator **regulators; - unsigned int regulator_count; - struct clk *clk; - struct device *dev; -}; - typedef int (*config_regulators_t)(struct device *dev, struct dev_pm_opp *old_opp, struct dev_pm_opp *new_opp, struct regulator **regulators, unsigned int count); @@ -98,7 +65,6 @@ typedef int (*config_regulators_t)(struct device *dev, * struct dev_pm_opp_config - Device OPP configuration values * @clk_names: Clk names, NULL terminated array, max 1 clock for now. * @prop_name: Name to postfix to properties. - * @set_opp: Custom set OPP helper. * @config_regulators: Custom set regulator helper. * @supported_hw: Array of hierarchy of versions to match. * @supported_hw_count: Number of elements in the array. @@ -113,7 +79,6 @@ struct dev_pm_opp_config { /* NULL terminated */ const char * const *clk_names; const char *prop_name; - int (*set_opp)(struct dev_pm_set_opp_data *data); config_regulators_t config_regulators; const unsigned int *supported_hw; unsigned int supported_hw_count; @@ -610,22 +575,6 @@ static inline int devm_pm_opp_set_clkname(struct device *dev, const char *name) return devm_pm_opp_set_config(dev, &config); } -/* set-opp helpers */ -static inline int dev_pm_opp_register_set_opp_helper(struct device *dev, - int (*set_opp)(struct dev_pm_set_opp_data *data)) -{ - struct dev_pm_opp_config config = { - .set_opp = set_opp, - }; - - return dev_pm_opp_set_config(dev, &config); -} - -static inline void dev_pm_opp_unregister_set_opp_helper(int token) -{ - dev_pm_opp_clear_config(token); -} - /* config-regulators helpers */ static inline int dev_pm_opp_set_config_regulators(struct device *dev, config_regulators_t helper) -- cgit v1.2.3 From 9fbb62605607d8f00c32a4765cd1ae67f022b640 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 2 Jun 2022 14:05:59 +0530 Subject: OPP: Remove dev_pm_opp_find_freq_ceil_by_volt() This was added few years back, but the code that was supposed to use it never got merged. Remove the unused helper. Tested-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 9d59aedc2be3..50cbc75bef71 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -118,8 +118,6 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, bool available); struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq); -struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev, - unsigned long u_volt); struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, unsigned int level); @@ -265,12 +263,6 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } -static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev, - unsigned long u_volt) -{ - return ERR_PTR(-EOPNOTSUPP); -} - static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq) { -- cgit v1.2.3 From 7963d4d710112bc457f99bdb56608211e561190e Mon Sep 17 00:00:00 2001 From: Xin Ji Date: Wed, 6 Jul 2022 16:34:31 +0800 Subject: usb: typec: tcpci: move tcpci.h to include/linux/usb/ USB PD controllers which consisting of a microcontroller (acting as the TCPM) and a port controller (TCPC) - may require that the driver for the PD controller accesses directly also the on-chip port controller in some cases. Move tcpci.h to include/linux/usb/ is convenience access TCPC registers. Reviewed-by: Heikki Krogerus Signed-off-by: Xin Ji Link: https://lore.kernel.org/r/20220706083433.2415524-1-xji@analogixsemi.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpci.h | 210 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 include/linux/usb/tcpci.h (limited to 'include/linux') diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h new file mode 100644 index 000000000000..20c0bedb8ec8 --- /dev/null +++ b/include/linux/usb/tcpci.h @@ -0,0 +1,210 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright 2015-2017 Google, Inc + * + * USB Type-C Port Controller Interface. + */ + +#ifndef __LINUX_USB_TCPCI_H +#define __LINUX_USB_TCPCI_H + +#include +#include + +#define TCPC_VENDOR_ID 0x0 +#define TCPC_PRODUCT_ID 0x2 +#define TCPC_BCD_DEV 0x4 +#define TCPC_TC_REV 0x6 +#define TCPC_PD_REV 0x8 +#define TCPC_PD_INT_REV 0xa + +#define TCPC_ALERT 0x10 +#define TCPC_ALERT_EXTND BIT(14) +#define TCPC_ALERT_EXTENDED_STATUS BIT(13) +#define TCPC_ALERT_VBUS_DISCNCT BIT(11) +#define TCPC_ALERT_RX_BUF_OVF BIT(10) +#define TCPC_ALERT_FAULT BIT(9) +#define TCPC_ALERT_V_ALARM_LO BIT(8) +#define TCPC_ALERT_V_ALARM_HI BIT(7) +#define TCPC_ALERT_TX_SUCCESS BIT(6) +#define TCPC_ALERT_TX_DISCARDED BIT(5) +#define TCPC_ALERT_TX_FAILED BIT(4) +#define TCPC_ALERT_RX_HARD_RST BIT(3) +#define TCPC_ALERT_RX_STATUS BIT(2) +#define TCPC_ALERT_POWER_STATUS BIT(1) +#define TCPC_ALERT_CC_STATUS BIT(0) + +#define TCPC_ALERT_MASK 0x12 +#define TCPC_POWER_STATUS_MASK 0x14 +#define TCPC_FAULT_STATUS_MASK 0x15 + +#define TCPC_EXTENDED_STATUS_MASK 0x16 +#define TCPC_EXTENDED_STATUS_MASK_VSAFE0V BIT(0) + +#define TCPC_ALERT_EXTENDED_MASK 0x17 +#define TCPC_SINK_FAST_ROLE_SWAP BIT(0) + +#define TCPC_CONFIG_STD_OUTPUT 0x18 + +#define TCPC_TCPC_CTRL 0x19 +#define TCPC_TCPC_CTRL_ORIENTATION BIT(0) +#define PLUG_ORNT_CC1 0 +#define PLUG_ORNT_CC2 1 +#define TCPC_TCPC_CTRL_BIST_TM BIT(1) +#define TCPC_TCPC_CTRL_EN_LK4CONN_ALRT BIT(6) + +#define TCPC_EXTENDED_STATUS 0x20 +#define TCPC_EXTENDED_STATUS_VSAFE0V BIT(0) + +#define TCPC_ROLE_CTRL 0x1a +#define TCPC_ROLE_CTRL_DRP BIT(6) +#define TCPC_ROLE_CTRL_RP_VAL_SHIFT 4 +#define TCPC_ROLE_CTRL_RP_VAL_MASK 0x3 +#define TCPC_ROLE_CTRL_RP_VAL_DEF 0x0 +#define TCPC_ROLE_CTRL_RP_VAL_1_5 0x1 +#define TCPC_ROLE_CTRL_RP_VAL_3_0 0x2 +#define TCPC_ROLE_CTRL_CC2_SHIFT 2 +#define TCPC_ROLE_CTRL_CC2_MASK 0x3 +#define TCPC_ROLE_CTRL_CC1_SHIFT 0 +#define TCPC_ROLE_CTRL_CC1_MASK 0x3 +#define TCPC_ROLE_CTRL_CC_RA 0x0 +#define TCPC_ROLE_CTRL_CC_RP 0x1 +#define TCPC_ROLE_CTRL_CC_RD 0x2 +#define TCPC_ROLE_CTRL_CC_OPEN 0x3 + +#define TCPC_FAULT_CTRL 0x1b + +#define TCPC_POWER_CTRL 0x1c +#define TCPC_POWER_CTRL_VCONN_ENABLE BIT(0) +#define TCPC_POWER_CTRL_BLEED_DISCHARGE BIT(3) +#define TCPC_POWER_CTRL_AUTO_DISCHARGE BIT(4) +#define TCPC_DIS_VOLT_ALRM BIT(5) +#define TCPC_POWER_CTRL_VBUS_VOLT_MON BIT(6) +#define TCPC_FAST_ROLE_SWAP_EN BIT(7) + +#define TCPC_CC_STATUS 0x1d +#define TCPC_CC_STATUS_TOGGLING BIT(5) +#define TCPC_CC_STATUS_TERM BIT(4) +#define TCPC_CC_STATUS_TERM_RP 0 +#define TCPC_CC_STATUS_TERM_RD 1 +#define TCPC_CC_STATE_SRC_OPEN 0 +#define TCPC_CC_STATUS_CC2_SHIFT 2 +#define TCPC_CC_STATUS_CC2_MASK 0x3 +#define TCPC_CC_STATUS_CC1_SHIFT 0 +#define TCPC_CC_STATUS_CC1_MASK 0x3 + +#define TCPC_POWER_STATUS 0x1e +#define TCPC_POWER_STATUS_DBG_ACC_CON BIT(7) +#define TCPC_POWER_STATUS_UNINIT BIT(6) +#define TCPC_POWER_STATUS_SOURCING_VBUS BIT(4) +#define TCPC_POWER_STATUS_VBUS_DET BIT(3) +#define TCPC_POWER_STATUS_VBUS_PRES BIT(2) +#define TCPC_POWER_STATUS_VCONN_PRES BIT(1) +#define TCPC_POWER_STATUS_SINKING_VBUS BIT(0) + +#define TCPC_FAULT_STATUS 0x1f + +#define TCPC_ALERT_EXTENDED 0x21 + +#define TCPC_COMMAND 0x23 +#define TCPC_CMD_WAKE_I2C 0x11 +#define TCPC_CMD_DISABLE_VBUS_DETECT 0x22 +#define TCPC_CMD_ENABLE_VBUS_DETECT 0x33 +#define TCPC_CMD_DISABLE_SINK_VBUS 0x44 +#define TCPC_CMD_SINK_VBUS 0x55 +#define TCPC_CMD_DISABLE_SRC_VBUS 0x66 +#define TCPC_CMD_SRC_VBUS_DEFAULT 0x77 +#define TCPC_CMD_SRC_VBUS_HIGH 0x88 +#define TCPC_CMD_LOOK4CONNECTION 0x99 +#define TCPC_CMD_RXONEMORE 0xAA +#define TCPC_CMD_I2C_IDLE 0xFF + +#define TCPC_DEV_CAP_1 0x24 +#define TCPC_DEV_CAP_2 0x26 +#define TCPC_STD_INPUT_CAP 0x28 +#define TCPC_STD_OUTPUT_CAP 0x29 + +#define TCPC_MSG_HDR_INFO 0x2e +#define TCPC_MSG_HDR_INFO_DATA_ROLE BIT(3) +#define TCPC_MSG_HDR_INFO_PWR_ROLE BIT(0) +#define TCPC_MSG_HDR_INFO_REV_SHIFT 1 +#define TCPC_MSG_HDR_INFO_REV_MASK 0x3 + +#define TCPC_RX_DETECT 0x2f +#define TCPC_RX_DETECT_HARD_RESET BIT(5) +#define TCPC_RX_DETECT_SOP BIT(0) +#define TCPC_RX_DETECT_SOP1 BIT(1) +#define TCPC_RX_DETECT_SOP2 BIT(2) +#define TCPC_RX_DETECT_DBG1 BIT(3) +#define TCPC_RX_DETECT_DBG2 BIT(4) + +#define TCPC_RX_BYTE_CNT 0x30 +#define TCPC_RX_BUF_FRAME_TYPE 0x31 +#define TCPC_RX_BUF_FRAME_TYPE_SOP 0 +#define TCPC_RX_HDR 0x32 +#define TCPC_RX_DATA 0x34 /* through 0x4f */ + +#define TCPC_TRANSMIT 0x50 +#define TCPC_TRANSMIT_RETRY_SHIFT 4 +#define TCPC_TRANSMIT_RETRY_MASK 0x3 +#define TCPC_TRANSMIT_TYPE_SHIFT 0 +#define TCPC_TRANSMIT_TYPE_MASK 0x7 + +#define TCPC_TX_BYTE_CNT 0x51 +#define TCPC_TX_HDR 0x52 +#define TCPC_TX_DATA 0x54 /* through 0x6f */ + +#define TCPC_VBUS_VOLTAGE 0x70 +#define TCPC_VBUS_VOLTAGE_MASK 0x3ff +#define TCPC_VBUS_VOLTAGE_LSB_MV 25 +#define TCPC_VBUS_SINK_DISCONNECT_THRESH 0x72 +#define TCPC_VBUS_SINK_DISCONNECT_THRESH_LSB_MV 25 +#define TCPC_VBUS_SINK_DISCONNECT_THRESH_MAX 0x3ff +#define TCPC_VBUS_STOP_DISCHARGE_THRESH 0x74 +#define TCPC_VBUS_VOLTAGE_ALARM_HI_CFG 0x76 +#define TCPC_VBUS_VOLTAGE_ALARM_LO_CFG 0x78 + +/* I2C_WRITE_BYTE_COUNT + 1 when TX_BUF_BYTE_x is only accessible I2C_WRITE_BYTE_COUNT */ +#define TCPC_TRANSMIT_BUFFER_MAX_LEN 31 + +struct tcpci; + +/* + * @TX_BUF_BYTE_x_hidden: + * optional; Set when TX_BUF_BYTE_x can only be accessed through I2C_WRITE_BYTE_COUNT. + * @frs_sourcing_vbus: + * Optional; Callback to perform chip specific operations when FRS + * is sourcing vbus. + * @auto_discharge_disconnect: + * Optional; Enables TCPC to autonously discharge vbus on disconnect. + * @vbus_vsafe0v: + * optional; Set when TCPC can detect whether vbus is at VSAFE0V. + * @set_partner_usb_comm_capable: + * Optional; The USB Communications Capable bit indicates if port + * partner is capable of communication over the USB data lines + * (e.g. D+/- or SS Tx/Rx). Called to notify the status of the bit. + */ +struct tcpci_data { + struct regmap *regmap; + unsigned char TX_BUF_BYTE_x_hidden:1; + unsigned char auto_discharge_disconnect:1; + unsigned char vbus_vsafe0v:1; + + int (*init)(struct tcpci *tcpci, struct tcpci_data *data); + int (*set_vconn)(struct tcpci *tcpci, struct tcpci_data *data, + bool enable); + int (*start_drp_toggling)(struct tcpci *tcpci, struct tcpci_data *data, + enum typec_cc_status cc); + int (*set_vbus)(struct tcpci *tcpci, struct tcpci_data *data, bool source, bool sink); + void (*frs_sourcing_vbus)(struct tcpci *tcpci, struct tcpci_data *data); + void (*set_partner_usb_comm_capable)(struct tcpci *tcpci, struct tcpci_data *data, + bool capable); +}; + +struct tcpci *tcpci_register_port(struct device *dev, struct tcpci_data *data); +void tcpci_unregister_port(struct tcpci *tcpci); +irqreturn_t tcpci_irq(struct tcpci *tcpci); + +struct tcpm_port; +struct tcpm_port *tcpci_get_tcpm_port(struct tcpci *tcpci); +#endif /* __LINUX_USB_TCPCI_H */ -- cgit v1.2.3 From 620e8e8ba6210003fa45081b63d90ef4f3a0637f Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Thu, 30 Jun 2022 12:35:27 -0700 Subject: of/platform: Add stubs for of_platform_device_create/destroy() Code for platform_device_create() and of_platform_device_destroy() is only generated if CONFIG_OF_ADDRESS=y. Add stubs to avoid unresolved symbols when CONFIG_OF_ADDRESS is not set. Reviewed-by: Stephen Boyd Reviewed-by: Douglas Anderson Acked-by: Rob Herring Signed-off-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/20220630123445.v24.1.I08fd2e1c775af04f663730e9fb4d00e6bbb38541@changeid Signed-off-by: Greg Kroah-Hartman --- include/linux/of_platform.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index 84a966623e78..d15b6cd5e1c3 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -61,16 +61,18 @@ static inline struct platform_device *of_find_device_by_node(struct device_node } #endif +extern int of_platform_bus_probe(struct device_node *root, + const struct of_device_id *matches, + struct device *parent); + +#ifdef CONFIG_OF_ADDRESS /* Platform devices and busses creation */ extern struct platform_device *of_platform_device_create(struct device_node *np, const char *bus_id, struct device *parent); extern int of_platform_device_destroy(struct device *dev, void *data); -extern int of_platform_bus_probe(struct device_node *root, - const struct of_device_id *matches, - struct device *parent); -#ifdef CONFIG_OF_ADDRESS + extern int of_platform_populate(struct device_node *root, const struct of_device_id *matches, const struct of_dev_auxdata *lookup, @@ -84,6 +86,18 @@ extern int devm_of_platform_populate(struct device *dev); extern void devm_of_platform_depopulate(struct device *dev); #else +/* Platform devices and busses creation */ +static inline struct platform_device *of_platform_device_create(struct device_node *np, + const char *bus_id, + struct device *parent) +{ + return NULL; +} +static inline int of_platform_device_destroy(struct device *dev, void *data) +{ + return -ENODEV; +} + static inline int of_platform_populate(struct device_node *root, const struct of_device_id *matches, const struct of_dev_auxdata *lookup, -- cgit v1.2.3 From 8bc063641cebf9d555e41d135db2b5035b521768 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Thu, 30 Jun 2022 12:35:29 -0700 Subject: usb: misc: Add onboard_usb_hub driver The main issue this driver addresses is that a USB hub needs to be powered before it can be discovered. For discrete onboard hubs (an example for such a hub is the Realtek RTS5411) this is often solved by supplying the hub with an 'always-on' regulator, which is kind of a hack. Some onboard hubs may require further initialization steps, like changing the state of a GPIO or enabling a clock, which requires even more hacks. This driver creates a platform device representing the hub which performs the necessary initialization. Currently it only supports switching on a single regulator, support for multiple regulators or other actions can be added as needed. Different initialization sequences can be supported based on the compatible string. Besides performing the initialization the driver can be configured to power the hub off during system suspend. This can help to extend battery life on battery powered devices which have no requirements to keep the hub powered during suspend. The driver can also be configured to leave the hub powered when a wakeup capable USB device is connected when suspending, and power it off otherwise. Technically the driver consists of two drivers, the platform driver described above and a very thin USB driver that subclasses the generic driver. The purpose of this driver is to provide the platform driver with the USB devices corresponding to the hub(s) (a hub controller may provide multiple 'logical' hubs, e.g. one to support USB 2.0 and another for USB 3.x). Co-developed-by: Ravi Chandra Sadineni Reviewed-by: Douglas Anderson Signed-off-by: Ravi Chandra Sadineni Signed-off-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/20220630123445.v24.3.I7c9a1f1d6ced41dd8310e8a03da666a32364e790@changeid Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/onboard_hub.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/linux/usb/onboard_hub.h (limited to 'include/linux') diff --git a/include/linux/usb/onboard_hub.h b/include/linux/usb/onboard_hub.h new file mode 100644 index 000000000000..d9373230556e --- /dev/null +++ b/include/linux/usb/onboard_hub.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LINUX_USB_ONBOARD_HUB_H +#define __LINUX_USB_ONBOARD_HUB_H + +struct usb_device; +struct list_head; + +#if IS_ENABLED(CONFIG_USB_ONBOARD_HUB) +void onboard_hub_create_pdevs(struct usb_device *parent_hub, struct list_head *pdev_list); +void onboard_hub_destroy_pdevs(struct list_head *pdev_list); +#else +static inline void onboard_hub_create_pdevs(struct usb_device *parent_hub, + struct list_head *pdev_list) {} +static inline void onboard_hub_destroy_pdevs(struct list_head *pdev_list) {} +#endif + +#endif /* __LINUX_USB_ONBOARD_HUB_H */ -- cgit v1.2.3 From 0139da50dc53f0ce2804e83566d290c7e626fd17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 4 Jul 2022 12:45:14 +0300 Subject: serial: Embed rs485_supported to uart_port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Embed rs485_supported to uart_port to allow serial core to tweak it as needed. Reviewed-by: Lino Sanfilippo Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220704094515.6831-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index b7b86ee3cb12..a6fa7c40c330 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -255,7 +255,7 @@ struct uart_port { struct attribute_group *attr_group; /* port specific attributes */ const struct attribute_group **tty_groups; /* all attributes (serial core use only) */ struct serial_rs485 rs485; - const struct serial_rs485 *rs485_supported; /* Supported mask for serial_rs485 */ + struct serial_rs485 rs485_supported; /* Supported mask for serial_rs485 */ struct gpio_desc *rs485_term_gpio; /* enable RS485 bus termination */ struct serial_iso7816 iso7816; void *private_data; /* generic platform data pointer */ -- cgit v1.2.3 From 8aa5bcb61612060429223d1fbb7a1c30a579fc1f Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 27 Jun 2022 17:19:48 +0300 Subject: gpu: host1x: Add context device management code Add code to register context devices from device tree, allocate them out and manage their refcounts. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index c0bf4e581fe9..32a82da13fed 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -446,4 +446,38 @@ int tegra_mipi_disable(struct tegra_mipi_device *device); int tegra_mipi_start_calibration(struct tegra_mipi_device *device); int tegra_mipi_finish_calibration(struct tegra_mipi_device *device); +/* host1x memory contexts */ + +struct host1x_memory_context { + struct host1x *host; + + refcount_t ref; + struct pid *owner; + + struct device dev; + u64 dma_mask; + u32 stream_id; +}; + +#ifdef CONFIG_IOMMU_API +struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x, + struct pid *pid); +void host1x_memory_context_get(struct host1x_memory_context *cd); +void host1x_memory_context_put(struct host1x_memory_context *cd); +#else +static inline struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x, + struct pid *pid) +{ + return NULL; +} + +static inline void host1x_memory_context_get(struct host1x_memory_context *cd) +{ +} + +static inline void host1x_memory_context_put(struct host1x_memory_context *cd) +{ +} +#endif + #endif -- cgit v1.2.3 From 2486254781eab6f6119fabea78f11386c54460d2 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 27 Jun 2022 17:19:49 +0300 Subject: gpu: host1x: Program context stream ID on submission Add code to do stream ID switching at the beginning of a job. The stream ID is switched to the stream ID specified by the context passed in the job structure. Before switching the stream ID, an OP_DONE wait is done on the channel's engine to ensure that there is no residual ongoing work that might do DMA using the new stream ID. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 32a82da13fed..cb2100d9b0ff 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -327,6 +327,14 @@ struct host1x_job { /* Whether host1x-side firewall should be ran for this job or not */ bool enable_firewall; + + /* Options for configuring engine data stream ID */ + /* Context device to use for job */ + struct host1x_memory_context *memory_context; + /* Stream ID to use if context isolation is disabled (!memory_context) */ + u32 engine_fallback_streamid; + /* Engine offset to program stream ID to */ + u32 engine_streamid_offset; }; struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, -- cgit v1.2.3 From b6c1c5745ccc68ac5d57c7ffb51ea25a86d0e97b Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 27 Jun 2022 08:35:24 -0700 Subject: dm: Add verity helpers for LoadPin LoadPin limits loading of kernel modules, firmware and certain other files to a 'pinned' file system (typically a read-only rootfs). To provide more flexibility LoadPin is being extended to also allow loading these files from trusted dm-verity devices. For that purpose LoadPin can be provided with a list of verity root digests that it should consider as trusted. Add a bunch of helpers to allow LoadPin to check whether a DM device is a trusted verity device. The new functions broadly fall in two categories: those that need access to verity internals (like the root digest), and the 'glue' between LoadPin and verity. The new file dm-verity-loadpin.c contains the glue functions. Signed-off-by: Matthias Kaehlcke Acked-by: Mike Snitzer Link: https://lore.kernel.org/lkml/20220627083512.v7.1.I3e928575a23481121e73286874c4c2bdb403355d@changeid Signed-off-by: Kees Cook --- include/linux/dm-verity-loadpin.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/linux/dm-verity-loadpin.h (limited to 'include/linux') diff --git a/include/linux/dm-verity-loadpin.h b/include/linux/dm-verity-loadpin.h new file mode 100644 index 000000000000..fb695ecaa5d5 --- /dev/null +++ b/include/linux/dm-verity-loadpin.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LINUX_DM_VERITY_LOADPIN_H +#define __LINUX_DM_VERITY_LOADPIN_H + +#include + +struct block_device; + +extern struct list_head dm_verity_loadpin_trusted_root_digests; + +struct dm_verity_loadpin_trusted_root_digest { + struct list_head node; + unsigned int len; + u8 data[]; +}; + +#if IS_ENABLED(CONFIG_SECURITY_LOADPIN) && IS_BUILTIN(CONFIG_DM_VERITY) +bool dm_verity_loadpin_is_bdev_trusted(struct block_device *bdev); +#else +static inline bool dm_verity_loadpin_is_bdev_trusted(struct block_device *bdev) +{ + return false; +} +#endif + +#endif /* __LINUX_DM_VERITY_LOADPIN_H */ -- cgit v1.2.3 From 231af4709018a8e4f20e511da4b6506346d662d3 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 27 Jun 2022 08:35:26 -0700 Subject: dm: verity-loadpin: Use CONFIG_SECURITY_LOADPIN_VERITY for conditional compilation The verity glue for LoadPin is only needed when CONFIG_SECURITY_LOADPIN_VERITY is set, use this option for conditional compilation instead of the combo of CONFIG_DM_VERITY and CONFIG_SECURITY_LOADPIN. Signed-off-by: Matthias Kaehlcke Acked-by: Mike Snitzer Link: https://lore.kernel.org/lkml/20220627083512.v7.3.I5aca2dcc3b06de4bf53696cd21329dce8272b8aa@changeid Signed-off-by: Kees Cook --- include/linux/dm-verity-loadpin.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dm-verity-loadpin.h b/include/linux/dm-verity-loadpin.h index fb695ecaa5d5..552b817ab102 100644 --- a/include/linux/dm-verity-loadpin.h +++ b/include/linux/dm-verity-loadpin.h @@ -15,7 +15,7 @@ struct dm_verity_loadpin_trusted_root_digest { u8 data[]; }; -#if IS_ENABLED(CONFIG_SECURITY_LOADPIN) && IS_BUILTIN(CONFIG_DM_VERITY) +#if IS_ENABLED(CONFIG_SECURITY_LOADPIN_VERITY) bool dm_verity_loadpin_is_bdev_trusted(struct block_device *bdev); #else static inline bool dm_verity_loadpin_is_bdev_trusted(struct block_device *bdev) -- cgit v1.2.3 From 91a29af413def677495e447fb9a06957ebc8bed5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Jul 2022 19:23:09 +0100 Subject: gpio: Remove dynamic allocation from populate_parent_alloc_arg() The gpiolib is unique in the way it uses intermediate fwspecs when feeding an interrupt specifier to the parent domain, as it relies on the populate_parent_alloc_arg() callback to perform a dynamic allocation. This is pretty inefficient (we free the structure almost immediately), and the only reason this isn't a stack allocation is that our ThunderX friend uses MSIs rather than a FW-constructed structure. Let's solve it by providing a new type composed of the union of a struct irq_fwspec and a msi_info_t, which satisfies both requirements. This allows us to use a stack allocation, and we can move the handful of users to this new scheme. Also perform some additional cleanup, such as getting rid of the stub versions of the irq_domain_translate_*cell helpers, which are never used when CONFIG_IRQ_DOMAIN_HIERARCHY isn't selected. Tested on a Tegra186. Reviewed-by: Linus Walleij Signed-off-by: Marc Zyngier Cc: Daniel Palmer Cc: Romain Perier Cc: Bartosz Golaszewski Cc: Thierry Reding Cc: Jonathan Hunter Cc: Robert Richter Cc: Nobuhiro Iwamatsu Cc: Andy Gross Cc: Bjorn Andersson Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20220707182314.66610-2-prabhakar.mahadev-lad.rj@bp.renesas.com --- include/linux/gpio/driver.h | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index b1e0f1f8ee2e..ad5b92b74ccf 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -12,6 +12,8 @@ #include #include +#include + struct gpio_desc; struct of_phandle_args; struct device_node; @@ -23,6 +25,13 @@ enum gpio_lookup_flags; struct gpio_chip; +union gpio_irq_fwspec { + struct irq_fwspec fwspec; +#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN + msi_alloc_info_t msiinfo; +#endif +}; + #define GPIO_LINE_DIRECTION_IN 1 #define GPIO_LINE_DIRECTION_OUT 0 @@ -103,9 +112,10 @@ struct gpio_irq_chip { * variant named &gpiochip_populate_parent_fwspec_fourcell is also * available. */ - void *(*populate_parent_alloc_arg)(struct gpio_chip *gc, - unsigned int parent_hwirq, - unsigned int parent_type); + int (*populate_parent_alloc_arg)(struct gpio_chip *gc, + union gpio_irq_fwspec *fwspec, + unsigned int parent_hwirq, + unsigned int parent_type); /** * @child_offset_to_irq: @@ -646,28 +656,14 @@ struct bgpio_pdata { #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY -void *gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc, +int gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc, + union gpio_irq_fwspec *gfwspec, + unsigned int parent_hwirq, + unsigned int parent_type); +int gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc, + union gpio_irq_fwspec *gfwspec, unsigned int parent_hwirq, unsigned int parent_type); -void *gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc, - unsigned int parent_hwirq, - unsigned int parent_type); - -#else - -static inline void *gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc, - unsigned int parent_hwirq, - unsigned int parent_type) -{ - return NULL; -} - -static inline void *gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc, - unsigned int parent_hwirq, - unsigned int parent_type) -{ - return NULL; -} #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ -- cgit v1.2.3 From 1dd685c414a7b9fdb3d23aca3aedae84f0b998ae Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 6 Jul 2022 14:51:00 -0400 Subject: XArray: Add calls to might_alloc() Catch bogus GFP flags deterministically, instead of occasionally when we actually have to allocate memory. Reported-by: Nikolay Borisov Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index c29e11b2c073..44dd6d6e01bc 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -586,6 +587,7 @@ static inline void *xa_store_bh(struct xarray *xa, unsigned long index, { void *curr; + might_alloc(gfp); xa_lock_bh(xa); curr = __xa_store(xa, index, entry, gfp); xa_unlock_bh(xa); @@ -612,6 +614,7 @@ static inline void *xa_store_irq(struct xarray *xa, unsigned long index, { void *curr; + might_alloc(gfp); xa_lock_irq(xa); curr = __xa_store(xa, index, entry, gfp); xa_unlock_irq(xa); @@ -687,6 +690,7 @@ static inline void *xa_cmpxchg(struct xarray *xa, unsigned long index, { void *curr; + might_alloc(gfp); xa_lock(xa); curr = __xa_cmpxchg(xa, index, old, entry, gfp); xa_unlock(xa); @@ -714,6 +718,7 @@ static inline void *xa_cmpxchg_bh(struct xarray *xa, unsigned long index, { void *curr; + might_alloc(gfp); xa_lock_bh(xa); curr = __xa_cmpxchg(xa, index, old, entry, gfp); xa_unlock_bh(xa); @@ -741,6 +746,7 @@ static inline void *xa_cmpxchg_irq(struct xarray *xa, unsigned long index, { void *curr; + might_alloc(gfp); xa_lock_irq(xa); curr = __xa_cmpxchg(xa, index, old, entry, gfp); xa_unlock_irq(xa); @@ -770,6 +776,7 @@ static inline int __must_check xa_insert(struct xarray *xa, { int err; + might_alloc(gfp); xa_lock(xa); err = __xa_insert(xa, index, entry, gfp); xa_unlock(xa); @@ -799,6 +806,7 @@ static inline int __must_check xa_insert_bh(struct xarray *xa, { int err; + might_alloc(gfp); xa_lock_bh(xa); err = __xa_insert(xa, index, entry, gfp); xa_unlock_bh(xa); @@ -828,6 +836,7 @@ static inline int __must_check xa_insert_irq(struct xarray *xa, { int err; + might_alloc(gfp); xa_lock_irq(xa); err = __xa_insert(xa, index, entry, gfp); xa_unlock_irq(xa); @@ -857,6 +866,7 @@ static inline __must_check int xa_alloc(struct xarray *xa, u32 *id, { int err; + might_alloc(gfp); xa_lock(xa); err = __xa_alloc(xa, id, entry, limit, gfp); xa_unlock(xa); @@ -886,6 +896,7 @@ static inline int __must_check xa_alloc_bh(struct xarray *xa, u32 *id, { int err; + might_alloc(gfp); xa_lock_bh(xa); err = __xa_alloc(xa, id, entry, limit, gfp); xa_unlock_bh(xa); @@ -915,6 +926,7 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id, { int err; + might_alloc(gfp); xa_lock_irq(xa); err = __xa_alloc(xa, id, entry, limit, gfp); xa_unlock_irq(xa); @@ -948,6 +960,7 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, { int err; + might_alloc(gfp); xa_lock(xa); err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); xa_unlock(xa); @@ -981,6 +994,7 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, { int err; + might_alloc(gfp); xa_lock_bh(xa); err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); xa_unlock_bh(xa); @@ -1014,6 +1028,7 @@ static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry, { int err; + might_alloc(gfp); xa_lock_irq(xa); err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); xa_unlock_irq(xa); -- cgit v1.2.3 From c435c54639aa5513ab877c8b014dd83d4ce6b40e Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Mon, 6 Jun 2022 16:33:14 -0400 Subject: vfio/pci: introduce CONFIG_VFIO_PCI_ZDEV_KVM The current contents of vfio-pci-zdev are today only useful in a KVM environment; let's tie everything currently under vfio-pci-zdev to this Kconfig statement and require KVM in this case, reducing complexity (e.g. symbol lookups). Signed-off-by: Matthew Rosato Acked-by: Alex Williamson Reviewed-by: Pierre Morel Link: https://lore.kernel.org/r/20220606203325.110625-11-mjrosato@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/linux/vfio_pci_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 23c176d4b073..63af2897939c 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -206,7 +206,7 @@ static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) } #endif -#ifdef CONFIG_S390 +#ifdef CONFIG_VFIO_PCI_ZDEV_KVM extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps); #else -- cgit v1.2.3 From 3c5a1b6f0a18520a0edd0600fef6f1a8553b8fdc Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Mon, 6 Jun 2022 16:33:19 -0400 Subject: KVM: s390: pci: provide routines for enabling/disabling interrupt forwarding These routines will be wired into a kvm ioctl in order to respond to requests to enable / disable a device for Adapter Event Notifications / Adapter Interuption Forwarding. Reviewed-by: Christian Borntraeger Acked-by: Niklas Schnelle Signed-off-by: Matthew Rosato Link: https://lore.kernel.org/r/20220606203325.110625-16-mjrosato@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/linux/sched/user.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 00ed419dd464..f054d0360a75 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -24,7 +24,8 @@ struct user_struct { kuid_t uid; #if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \ - defined(CONFIG_NET) || defined(CONFIG_IO_URING) + defined(CONFIG_NET) || defined(CONFIG_IO_URING) || \ + defined(CONFIG_VFIO_PCI_ZDEV_KVM) atomic_long_t locked_vm; #endif #ifdef CONFIG_WATCH_QUEUE -- cgit v1.2.3 From 8061d1c31f1a018281bc9877ecce44bfc779e21d Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Mon, 6 Jun 2022 16:33:21 -0400 Subject: vfio-pci/zdev: add open/close device hooks During vfio-pci open_device, pass the KVM associated with the vfio group (if one exists). This is needed in order to pass a special indicator (GISA) to firmware to allow zPCI interpretation facilities to be used for only the specific KVM associated with the vfio-pci device. During vfio-pci close_device, unregister the notifier. Signed-off-by: Matthew Rosato Acked-by: Alex Williamson Reviewed-by: Pierre Morel Link: https://lore.kernel.org/r/20220606203325.110625-18-mjrosato@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/linux/vfio_pci_core.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 63af2897939c..d5d9e17f0156 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -209,12 +209,22 @@ static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) #ifdef CONFIG_VFIO_PCI_ZDEV_KVM extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps); +int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev); +void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev); #else static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps) { return -ENODEV; } + +static inline int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev) +{ + return 0; +} + +static inline void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev) +{} #endif /* Will be exported for vfio pci drivers usage */ -- cgit v1.2.3 From ef6e5d61eb7a0a30f776a829274573094185d03d Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Wed, 6 Jul 2022 17:15:52 +0200 Subject: genirq: Allow irq_set_chip_handler_name_locked() to take a const irq_chip Similar to commit 393e1280f765 ("genirq: Allow irq_chip registration functions to take a const irq_chip"), allow the irq_set_chip_handler_name_locked() function to take a const irq_chip argument. Signed-off-by: Michael Walle Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220706151553.1580790-1-michael@walle.cc --- include/linux/irqdesc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index a77584593f7d..1cd4e36890fb 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -209,14 +209,15 @@ static inline void irq_set_handler_locked(struct irq_data *data, * Must be called with irq_desc locked and valid parameters. */ static inline void -irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip, +irq_set_chip_handler_name_locked(struct irq_data *data, + const struct irq_chip *chip, irq_flow_handler_t handler, const char *name) { struct irq_desc *desc = irq_data_to_desc(data); desc->handle_irq = handler; desc->name = name; - data->chip = chip; + data->chip = (struct irq_chip *)chip; } bool irq_check_status_bit(unsigned int irq, unsigned int bitmask); -- cgit v1.2.3 From 6976890e8998afd8abbbd9fe27ed71387b24f57f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 22 Jun 2022 11:00:45 +0200 Subject: netfilter: nf_conntrack: add missing __rcu annotations Access to the hook pointers use correct helpers but the pointers lack the needed __rcu annotation. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_sip.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index c620521c42bc..dbc614dfe0d5 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -164,7 +164,7 @@ struct nf_nat_sip_hooks { unsigned int medialen, union nf_inet_addr *rtp_addr); }; -extern const struct nf_nat_sip_hooks *nf_nat_sip_hooks; +extern const struct nf_nat_sip_hooks __rcu *nf_nat_sip_hooks; int ct_sip_parse_request(const struct nf_conn *ct, const char *dptr, unsigned int datalen, unsigned int *matchoff, -- cgit v1.2.3 From d3f2d0a292c24fc624afb2b4f47f838e83775721 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 22 Jun 2022 11:00:47 +0200 Subject: netfilter: h323: merge nat hook pointers into one sparse complains about incorrect rcu usage. Code uses the correct rcu access primitives, but the function pointers lack rcu annotations. Collapse all of them into a single structure, then annotate the pointer. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_h323.h | 109 ++++++++++++++-------------- 1 file changed, 56 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h index 4561ec0fcea4..9e937f64a1ad 100644 --- a/include/linux/netfilter/nf_conntrack_h323.h +++ b/include/linux/netfilter/nf_conntrack_h323.h @@ -38,60 +38,63 @@ void nf_conntrack_h245_expect(struct nf_conn *new, struct nf_conntrack_expect *this); void nf_conntrack_q931_expect(struct nf_conn *new, struct nf_conntrack_expect *this); -extern int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned int protoff, - unsigned char **data, int dataoff, - H245_TransportAddress *taddr, - union nf_inet_addr *addr, - __be16 port); -extern int (*set_h225_addr_hook) (struct sk_buff *skb, unsigned int protoff, - unsigned char **data, int dataoff, - TransportAddress *taddr, - union nf_inet_addr *addr, - __be16 port); -extern int (*set_sig_addr_hook) (struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, unsigned char **data, - TransportAddress *taddr, int count); -extern int (*set_ras_addr_hook) (struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, unsigned char **data, - TransportAddress *taddr, int count); -extern int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, unsigned char **data, - int dataoff, - H245_TransportAddress *taddr, - __be16 port, __be16 rtp_port, - struct nf_conntrack_expect *rtp_exp, - struct nf_conntrack_expect *rtcp_exp); -extern int (*nat_t120_hook) (struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, + +struct nfct_h323_nat_hooks { + int (*set_h245_addr)(struct sk_buff *skb, unsigned int protoff, unsigned char **data, int dataoff, - H245_TransportAddress *taddr, __be16 port, - struct nf_conntrack_expect *exp); -extern int (*nat_h245_hook) (struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, + H245_TransportAddress *taddr, + union nf_inet_addr *addr, __be16 port); + int (*set_h225_addr)(struct sk_buff *skb, unsigned int protoff, unsigned char **data, int dataoff, - TransportAddress *taddr, __be16 port, - struct nf_conntrack_expect *exp); -extern int (*nat_callforwarding_hook) (struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned char **data, int dataoff, - TransportAddress *taddr, - __be16 port, - struct nf_conntrack_expect *exp); -extern int (*nat_q931_hook) (struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned char **data, TransportAddress *taddr, - int idx, __be16 port, - struct nf_conntrack_expect *exp); + TransportAddress *taddr, + union nf_inet_addr *addr, __be16 port); + int (*set_sig_addr)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, + TransportAddress *taddr, int count); + int (*set_ras_addr)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, + TransportAddress *taddr, int count); + int (*nat_rtp_rtcp)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned char **data, int dataoff, + H245_TransportAddress *taddr, + __be16 port, __be16 rtp_port, + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp); + int (*nat_t120)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned char **data, int dataoff, + H245_TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp); + int (*nat_h245)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned char **data, int dataoff, + TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp); + int (*nat_callforwarding)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned char **data, int dataoff, + TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp); + int (*nat_q931)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned char **data, TransportAddress *taddr, int idx, + __be16 port, struct nf_conntrack_expect *exp); +}; +extern const struct nfct_h323_nat_hooks __rcu *nfct_h323_nat_hook; #endif -- cgit v1.2.3 From d5b36a4dbd06c5e8e36ca8ccc552f679069e2946 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 11 Jul 2022 18:16:25 +0200 Subject: fix race between exit_itimers() and /proc/pid/timers As Chris explains, the comment above exit_itimers() is not correct, we can race with proc_timers_seq_ops. Change exit_itimers() to clear signal->posix_timers with ->siglock held. Cc: Reported-by: chris@accessvector.net Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- include/linux/sched/task.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 505aaf9fe477..81cab4b01edc 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -85,7 +85,7 @@ static inline void exit_thread(struct task_struct *tsk) extern __noreturn void do_group_exit(int); extern void exit_files(struct task_struct *); -extern void exit_itimers(struct signal_struct *); +extern void exit_itimers(struct task_struct *); extern pid_t kernel_clone(struct kernel_clone_args *kargs); struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); -- cgit v1.2.3 From 3d6e44623841c8b82c2157f2f749019803fb238a Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Sat, 9 Jul 2022 11:19:57 +0800 Subject: kunit: unify module and builtin suite definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, KUnit runs built-in tests and tests loaded from modules differently. For built-in tests, the kunit_test_suite{,s}() macro adds a list of suites in the .kunit_test_suites linker section. However, for kernel modules, a module_init() function is used to run the test suites. This causes problems if tests are included in a module which already defines module_init/exit_module functions, as they'll conflict with the kunit-provided ones. This change removes the kunit-defined module inits, and instead parses the kunit tests from their own section in the module. After module init, we call __kunit_test_suites_init() on the contents of that section, which prepares and runs the suite. This essentially unifies the module- and non-module kunit init formats. Tested-by: Maíra Canal Reviewed-by: Brendan Higgins Signed-off-by: Jeremy Kerr Signed-off-by: Daniel Latypov Signed-off-by: David Gow Signed-off-by: Shuah Khan --- include/linux/module.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index abd9fa916b7d..2490223c975d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -505,6 +505,11 @@ struct module { int num_static_call_sites; struct static_call_site *static_call_sites; #endif +#if IS_ENABLED(CONFIG_KUNIT) + int num_kunit_suites; + struct kunit_suite ***kunit_suites; +#endif + #ifdef CONFIG_LIVEPATCH bool klp; /* Is this a livepatch module? */ -- cgit v1.2.3 From e5857d396f35e59e6fe96cf1178b0357cc3a1ea4 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Sat, 9 Jul 2022 11:19:58 +0800 Subject: kunit: flatten kunit_suite*** to kunit_suite** in .kunit_test_suites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We currently store kunit suites in the .kunit_test_suites ELF section as a `struct kunit_suite***` (modulo some `const`s). For every test file, we store a struct kunit_suite** NULL-terminated array. This adds quite a bit of complexity to the test filtering code in the executor. Instead, let's just make the .kunit_test_suites section contain a single giant array of struct kunit_suite pointers, which can then be directly manipulated. This array is not NULL-terminated, and so none of the test filtering code needs to NULL-terminate anything. Tested-by: Maíra Canal Reviewed-by: Brendan Higgins Signed-off-by: Daniel Latypov Co-developed-by: David Gow Signed-off-by: David Gow Signed-off-by: Shuah Khan --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 2490223c975d..518296ea7f73 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -507,7 +507,7 @@ struct module { #endif #if IS_ENABLED(CONFIG_KUNIT) int num_kunit_suites; - struct kunit_suite ***kunit_suites; + struct kunit_suite **kunit_suites; #endif -- cgit v1.2.3 From f16214c102f0f64b2f3546e989498525bd7b7708 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Mon, 11 Jul 2022 10:12:00 +0200 Subject: bpf: Fix 'dubious one-bit signed bitfield' warnings Our CI[1] reported these warnings when using Sparse: $ touch net/mptcp/bpf.c $ make C=1 net/mptcp/bpf.o net/mptcp/bpf.c: note: in included file: include/linux/bpf_verifier.h:348:26: error: dubious one-bit signed bitfield include/linux/bpf_verifier.h:349:29: error: dubious one-bit signed bitfield Set them as 'unsigned' to avoid warnings. [1] https://github.com/multipath-tcp/mptcp_net-next/actions/runs/2643588487 Fixes: 1ade23711971 ("bpf: Inline calls to bpf_loop when callback is known") Signed-off-by: Matthieu Baerts Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220711081200.2081262-1-matthieu.baerts@tessares.net --- include/linux/bpf_verifier.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 81b19669efba..2e3bad8640dc 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -345,10 +345,10 @@ struct bpf_verifier_state_list { }; struct bpf_loop_inline_state { - int initialized:1; /* set to true upon first entry */ - int fit_for_inline:1; /* true if callback function is the same - * at each call and flags are always zero - */ + unsigned int initialized:1; /* set to true upon first entry */ + unsigned int fit_for_inline:1; /* true if callback function is the same + * at each call and flags are always zero + */ u32 callback_subprogno; /* valid when fit_for_inline is true */ }; -- cgit v1.2.3 From 7b19119f4c7dc9412b556ea12fae6b32574e2810 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 11 Jul 2022 01:14:06 -0700 Subject: net/mlx5: Use devl_ API in mlx5e_devlink_port_register As part of the flows invoked by mlx5_devlink_eswitch_mode_set() get to mlx5_rescan_drivers_locked() which can call mlx5e_probe()/mlx5e_remove and register/unregister mlx5e driver ports accordingly. This can lead to deadlock once mlx5_devlink_eswitch_mode_set() will use devlink lock. Use devl_port_register/unregister() instead of devlink_port_register/unregister() and add devlink instance locks in the driver paths to this function to have it locked while calling devl_ API function. If remove or probe were called by module init or module cleanup flows, need to lock devlink just before calling devl_port_register(), otherwise it is called by attach/detach or register/unregister flow and we can have the flow locked. Added flag to distinguish between these cases. This will be used by the downstream patch to invoke mlx5_devlink_eswitch_mode_set() with devlink locked. Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Paolo Abeni --- include/linux/mlx5/driver.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 76d7661e3e63..bd882884b23c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -551,6 +551,10 @@ enum { * creation/deletion on drivers rescan. Unset during device attach. */ MLX5_PRIV_FLAGS_DETACH = 1 << 2, + /* Distinguish between mlx5e_probe/remove called by module init/cleanup + * and called by other flows which can already hold devlink lock + */ + MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW = 1 << 3, }; struct mlx5_adev { -- cgit v1.2.3 From 91f059c95c6a5dbc0907a5f871e7915a5e93c1f9 Mon Sep 17 00:00:00 2001 From: Shaik Sajida Bhanu Date: Fri, 27 May 2022 23:23:52 +0530 Subject: mmc: core: Capture eMMC and SD card errors Add changes to capture eMMC and SD card errors. This is useful for debug and testing. Signed-off-by: Liangliang Lu Signed-off-by: Sayali Lokhande Signed-off-by: Bao D. Nguyen Signed-off-by: Ram Prakash Gupta Signed-off-by: Shaik Sajida Bhanu Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/1653674036-21829-2-git-send-email-quic_c_sbhanu@quicinc.com Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index c193c50ccd78..eb8bc5b9b0b7 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -93,6 +93,25 @@ struct mmc_clk_phase_map { struct mmc_host; +enum mmc_err_stat { + MMC_ERR_CMD_TIMEOUT, + MMC_ERR_CMD_CRC, + MMC_ERR_DAT_TIMEOUT, + MMC_ERR_DAT_CRC, + MMC_ERR_AUTO_CMD, + MMC_ERR_ADMA, + MMC_ERR_TUNING, + MMC_ERR_CMDQ_RED, + MMC_ERR_CMDQ_GCE, + MMC_ERR_CMDQ_ICCE, + MMC_ERR_REQ_TIMEOUT, + MMC_ERR_CMDQ_REQ_TIMEOUT, + MMC_ERR_ICE_CFG, + MMC_ERR_CTRL_TIMEOUT, + MMC_ERR_UNEXPECTED_IRQ, + MMC_ERR_MAX, +}; + struct mmc_host_ops { /* * It is optional for the host to implement pre_req and post_req in @@ -501,6 +520,7 @@ struct mmc_host { /* Host Software Queue support */ bool hsq_enabled; + u32 err_stats[MMC_ERR_MAX]; unsigned long private[] ____cacheline_aligned; }; @@ -635,6 +655,12 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data) return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE; } +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *host, + enum mmc_err_stat stat) +{ + host->err_stats[stat] += 1; +} + int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode); int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd); -- cgit v1.2.3 From efe8f5c9b5e118070f424205078ababc46fd130a Mon Sep 17 00:00:00 2001 From: Shaik Sajida Bhanu Date: Fri, 27 May 2022 23:23:53 +0530 Subject: mmc: sdhci: Capture eMMC and SD card errors Add changes to capture eMMC and SD card errors. This is useful for debug and testing. Signed-off-by: Liangliang Lu Signed-off-by: Sayali Lokhande Signed-off-by: Bao D. Nguyen Signed-off-by: Shaik Sajida Bhanu Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/1653674036-21829-3-git-send-email-quic_c_sbhanu@quicinc.com Signed-off-by: Ulf Hansson --- include/linux/mmc/mmc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index d9a65c6a8816..9c50bc40f8ff 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -99,6 +99,12 @@ static inline bool mmc_op_multi(u32 opcode) opcode == MMC_READ_MULTIPLE_BLOCK; } +static inline bool mmc_op_tuning(u32 opcode) +{ + return opcode == MMC_SEND_TUNING_BLOCK || + opcode == MMC_SEND_TUNING_BLOCK_HS200; +} + /* * MMC_SWITCH argument format: * -- cgit v1.2.3 From 2083da24eb56ce622332946800a67a7449d85fe5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 10 Jun 2022 12:02:04 +0530 Subject: OPP: Allow multiple clocks for a device This patch adds support to allow multiple clocks for a device. The design is pretty much similar to how this is done for regulators, and platforms can supply their own version of the config_clks() callback if they have multiple clocks for their device. The core manages the calls via opp_table->config_clks() eventually. We have kept both "clk" and "clks" fields in the OPP table structure and the reason is provided as a comment in _opp_set_clknames(). The same isn't done for "rates" though and we use rates[0] at most of the places now. Co-developed-by: Krzysztof Kozlowski Signed-off-by: Krzysztof Kozlowski Tested-by: Jon Hunter Tested-by: Dmitry Osipenko Tested-by: Manivannan Sadhasivam Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 50cbc75bef71..104151dfe46c 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -61,9 +61,13 @@ typedef int (*config_regulators_t)(struct device *dev, struct dev_pm_opp *old_opp, struct dev_pm_opp *new_opp, struct regulator **regulators, unsigned int count); +typedef int (*config_clks_t)(struct device *dev, struct opp_table *opp_table, + struct dev_pm_opp *opp, void *data, bool scaling_down); + /** * struct dev_pm_opp_config - Device OPP configuration values - * @clk_names: Clk names, NULL terminated array, max 1 clock for now. + * @clk_names: Clk names, NULL terminated array. + * @config_clks: Custom set clk helper. * @prop_name: Name to postfix to properties. * @config_regulators: Custom set regulator helper. * @supported_hw: Array of hierarchy of versions to match. @@ -78,6 +82,7 @@ typedef int (*config_regulators_t)(struct device *dev, struct dev_pm_opp_config { /* NULL terminated */ const char * const *clk_names; + config_clks_t config_clks; const char *prop_name; config_regulators_t config_regulators; const unsigned int *supported_hw; -- cgit v1.2.3 From 8174a3a613af1a911ab19da812824f7180b261f9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 10 Jun 2022 12:37:25 +0530 Subject: OPP: Provide a simple implementation to configure multiple clocks This provides a simple implementation to configure multiple clocks for a device. Tested-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 104151dfe46c..683e6baf9618 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -159,6 +159,9 @@ int dev_pm_opp_unregister_notifier(struct device *dev, struct notifier_block *nb int dev_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config); int devm_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config); void dev_pm_opp_clear_config(int token); +int dev_pm_opp_config_clks_simple(struct device *dev, + struct opp_table *opp_table, struct dev_pm_opp *opp, void *data, + bool scaling_down); struct dev_pm_opp *dev_pm_opp_xlate_required_opp(struct opp_table *src_table, struct opp_table *dst_table, struct dev_pm_opp *src_opp); int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct opp_table *dst_table, unsigned int pstate); @@ -342,6 +345,13 @@ static inline int devm_pm_opp_set_config(struct device *dev, struct dev_pm_opp_c static inline void dev_pm_opp_clear_config(int token) {} +static inline int dev_pm_opp_config_clks_simple(struct device *dev, + struct opp_table *opp_table, struct dev_pm_opp *opp, void *data, + bool scaling_down) +{ + return -EOPNOTSUPP; +} + static inline struct dev_pm_opp *dev_pm_opp_xlate_required_opp(struct opp_table *src_table, struct opp_table *dst_table, struct dev_pm_opp *src_opp) { -- cgit v1.2.3 From 1e5fb38442ebaabb65057c2e58355ee36c2a178f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 5 Jul 2022 09:41:56 +0530 Subject: OPP: Remove dev{m}_pm_opp_of_add_table_noclk() Remove the now unused variants and the now unnecessary "getclk" parameter from few routines. Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 683e6baf9618..dc1fb5890792 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -402,8 +402,6 @@ static inline int dev_pm_opp_sync_regulators(struct device *dev) int dev_pm_opp_of_add_table(struct device *dev); int dev_pm_opp_of_add_table_indexed(struct device *dev, int index); int devm_pm_opp_of_add_table_indexed(struct device *dev, int index); -int dev_pm_opp_of_add_table_noclk(struct device *dev, int index); -int devm_pm_opp_of_add_table_noclk(struct device *dev, int index); void dev_pm_opp_of_remove_table(struct device *dev); int devm_pm_opp_of_add_table(struct device *dev); int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask); @@ -434,16 +432,6 @@ static inline int devm_pm_opp_of_add_table_indexed(struct device *dev, int index return -EOPNOTSUPP; } -static inline int dev_pm_opp_of_add_table_noclk(struct device *dev, int index) -{ - return -EOPNOTSUPP; -} - -static inline int devm_pm_opp_of_add_table_noclk(struct device *dev, int index) -{ - return -EOPNOTSUPP; -} - static inline void dev_pm_opp_of_remove_table(struct device *dev) { } -- cgit v1.2.3 From 79f772b9e8004c542cc88aaf680189c3f6e9a0f2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 14 Jun 2022 22:56:15 +0000 Subject: KVM: x86: Query vcpu->vcpu_idx directly and drop its accessor, again Read vcpu->vcpu_idx directly instead of bouncing through the one-line wrapper, kvm_vcpu_get_idx(), and drop the wrapper. The wrapper is a remnant of the original implementation and serves no purpose; remove it (again) before it gains more users. kvm_vcpu_get_idx() was removed in the not-too-distant past by commit 4eeef2424153 ("KVM: x86: Query vcpu->vcpu_idx directly and drop its accessor"), but was unintentionally re-introduced by commit a54d806688fe ("KVM: Keep memslots in tree-based structures instead of array-based ones"), likely due to a rebase goof. The wrapper then managed to gain users in KVM's Xen code. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Jim Mattson Link: https://lore.kernel.org/r/20220614225615.3843835-1-seanjc@google.com --- include/linux/kvm_host.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3b40f8d68fbb..cb8168cc9755 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -907,11 +907,6 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) return NULL; } -static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) -{ - return vcpu->vcpu_idx; -} - void kvm_destroy_vcpus(struct kvm *kvm); void vcpu_load(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 4201d9ab3e42d9e2a20320b751a931e6239c0df2 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 11 Jul 2022 09:28:27 -0700 Subject: bpf: reparent bpf maps on memcg offlining The memory consumed by a bpf map is always accounted to the memory cgroup of the process which created the map. The map can outlive the memory cgroup if it's used by processes in other cgroups or is pinned on bpffs. In this case the map pins the original cgroup in the dying state. For other types of objects (slab objects, non-slab kernel allocations, percpu objects and recently LRU pages) there is a reparenting process implemented: on cgroup offlining charged objects are getting reassigned to the parent cgroup. Because all charges and statistics are fully recursive it's a fairly cheap operation. For efficiency and consistency with other types of objects, let's do the same for bpf maps. Fortunately thanks to the objcg API, the required changes are minimal. Please, note that individual allocations (slabs, percpu and large kmallocs) already have the reparenting mechanism. This commit adds it to the saved map->memcg pointer by replacing it to map->objcg. Because dying cgroups are not visible for a user and all charges are recursive, this commit doesn't bring any behavior changes for a user. v2: added a missing const qualifier Signed-off-by: Roman Gushchin Reviewed-by: Shakeel Butt Link: https://lore.kernel.org/r/20220711162827.184743-1-roman.gushchin@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2b21f2a3452f..85a4db3e0536 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -221,7 +221,7 @@ struct bpf_map { u32 btf_vmlinux_value_type_id; struct btf *btf; #ifdef CONFIG_MEMCG_KMEM - struct mem_cgroup *memcg; + struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; struct bpf_map_off_arr *off_arr; -- cgit v1.2.3 From 1d5f82d9dd477d5c66e0214a68c3e4f308eadd6d Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 5 Jul 2022 17:26:12 -0700 Subject: bpf, x86: fix freeing of not-finalized bpf_prog_pack syzbot reported a few issues with bpf_prog_pack [1], [2]. This only happens with multiple subprogs. In jit_subprogs(), we first call bpf_int_jit_compile() on each sub program. And then, we call it on each sub program again. jit_data is not freed in the first call of bpf_int_jit_compile(). Similarly we don't call bpf_jit_binary_pack_finalize() in the first call of bpf_int_jit_compile(). If bpf_int_jit_compile() failed for one sub program, we will call bpf_jit_binary_pack_finalize() for this sub program. However, we don't have a chance to call it for other sub programs. Then we will hit "goto out_free" in jit_subprogs(), and call bpf_jit_free on some subprograms that haven't got bpf_jit_binary_pack_finalize() yet. At this point, bpf_jit_binary_pack_free() is called and the whole 2MB page is freed erroneously. Fix this with a custom bpf_jit_free() for x86_64, which calls bpf_jit_binary_pack_finalize() if necessary. Also, with custom bpf_jit_free(), bpf_prog_aux->use_bpf_prog_pack is not needed any more, remove it. Fixes: 1022a5498f6f ("bpf, x86_64: Use bpf_jit_binary_pack_alloc") [1] https://syzkaller.appspot.com/bug?extid=2f649ec6d2eea1495a8f [2] https://syzkaller.appspot.com/bug?extid=87f65c75f4a72db05445 Reported-by: syzbot+2f649ec6d2eea1495a8f@syzkaller.appspotmail.com Reported-by: syzbot+87f65c75f4a72db05445@syzkaller.appspotmail.com Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20220706002612.4013790-1-song@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 - include/linux/filter.h | 8 ++++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 85a4db3e0536..a5bf00649995 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1044,7 +1044,6 @@ struct bpf_prog_aux { bool sleepable; bool tail_call_reachable; bool xdp_has_frags; - bool use_bpf_prog_pack; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 4c1a8b247545..a5f21dc3c432 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1027,6 +1027,14 @@ u64 bpf_jit_alloc_exec_limit(void); void *bpf_jit_alloc_exec(unsigned long size); void bpf_jit_free_exec(void *addr); void bpf_jit_free(struct bpf_prog *fp); +struct bpf_binary_header * +bpf_jit_binary_pack_hdr(const struct bpf_prog *fp); + +static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) +{ + return list_empty(&fp->aux->ksym.lnode) || + fp->aux->ksym.lnode.prev == LIST_POISON2; +} struct bpf_binary_header * bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image, -- cgit v1.2.3 From 401e4963bf45c800e3e9ea0d3a0289d738005fd4 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Fri, 8 Jul 2022 17:27:02 +0100 Subject: sched/core: Always flush pending blk_plug With CONFIG_PREEMPT_RT, it is possible to hit a deadlock between two normal priority tasks (SCHED_OTHER, nice level zero): INFO: task kworker/u8:0:8 blocked for more than 491 seconds. Not tainted 5.15.49-rt46 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/u8:0 state:D stack: 0 pid: 8 ppid: 2 flags:0x00000000 Workqueue: writeback wb_workfn (flush-7:0) [] (__schedule) from [] (schedule+0xdc/0x134) [] (schedule) from [] (rt_mutex_slowlock_block.constprop.0+0xb8/0x174) [] (rt_mutex_slowlock_block.constprop.0) from [] +(rt_mutex_slowlock.constprop.0+0xac/0x174) [] (rt_mutex_slowlock.constprop.0) from [] (fat_write_inode+0x34/0x54) [] (fat_write_inode) from [] (__writeback_single_inode+0x354/0x3ec) [] (__writeback_single_inode) from [] (writeback_sb_inodes+0x250/0x45c) [] (writeback_sb_inodes) from [] (__writeback_inodes_wb+0x7c/0xb8) [] (__writeback_inodes_wb) from [] (wb_writeback+0x2c8/0x2e4) [] (wb_writeback) from [] (wb_workfn+0x1a4/0x3e4) [] (wb_workfn) from [] (process_one_work+0x1fc/0x32c) [] (process_one_work) from [] (worker_thread+0x22c/0x2d8) [] (worker_thread) from [] (kthread+0x16c/0x178) [] (kthread) from [] (ret_from_fork+0x14/0x38) Exception stack(0xc10e3fb0 to 0xc10e3ff8) 3fa0: 00000000 00000000 00000000 00000000 3fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 3fe0: 00000000 00000000 00000000 00000000 00000013 00000000 INFO: task tar:2083 blocked for more than 491 seconds. Not tainted 5.15.49-rt46 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:tar state:D stack: 0 pid: 2083 ppid: 2082 flags:0x00000000 [] (__schedule) from [] (schedule+0xdc/0x134) [] (schedule) from [] (io_schedule+0x14/0x24) [] (io_schedule) from [] (bit_wait_io+0xc/0x30) [] (bit_wait_io) from [] (__wait_on_bit_lock+0x54/0xa8) [] (__wait_on_bit_lock) from [] (out_of_line_wait_on_bit_lock+0x84/0xb0) [] (out_of_line_wait_on_bit_lock) from [] (fat_mirror_bhs+0xa0/0x144) [] (fat_mirror_bhs) from [] (fat_alloc_clusters+0x138/0x2a4) [] (fat_alloc_clusters) from [] (fat_alloc_new_dir+0x34/0x250) [] (fat_alloc_new_dir) from [] (vfat_mkdir+0x58/0x148) [] (vfat_mkdir) from [] (vfs_mkdir+0x68/0x98) [] (vfs_mkdir) from [] (do_mkdirat+0xb0/0xec) [] (do_mkdirat) from [] (ret_fast_syscall+0x0/0x1c) Exception stack(0xc2e1bfa8 to 0xc2e1bff0) bfa0: 01ee42f0 01ee4208 01ee42f0 000041ed 00000000 00004000 bfc0: 01ee42f0 01ee4208 00000000 00000027 01ee4302 00000004 000dcb00 01ee4190 bfe0: 000dc368 bed11924 0006d4b0 b6ebddfc Here the kworker is waiting on msdos_sb_info::s_lock which is held by tar which is in turn waiting for a buffer which is locked waiting to be flushed, but this operation is plugged in the kworker. The lock is a normal struct mutex, so tsk_is_pi_blocked() will always return false on !RT and thus the behaviour changes for RT. It seems that the intent here is to skip blk_flush_plug() in the case where a non-preemptible lock (such as a spinlock) has been converted to a rtmutex on RT, which is the case covered by the SM_RTLOCK_WAIT schedule flag. But sched_submit_work() is only called from schedule() which is never called in this scenario, so the check can simply be deleted. Looking at the history of the -rt patchset, in fact this change was present from v5.9.1-rt20 until being dropped in v5.13-rt1 as it was part of a larger patch [1] most of which was replaced by commit b4bfa3fcfe3b ("sched/core: Rework the __schedule() preempt argument"). As described in [1]: The schedule process must distinguish between blocking on a regular sleeping lock (rwsem and mutex) and a RT-only sleeping lock (spinlock and rwlock): - rwsem and mutex must flush block requests (blk_schedule_flush_plug()) even if blocked on a lock. This can not deadlock because this also happens for non-RT. There should be a warning if the scheduling point is within a RCU read section. - spinlock and rwlock must not flush block requests. This will deadlock if the callback attempts to acquire a lock which is already acquired. Similarly to being preempted, there should be no warning if the scheduling point is within a RCU read section. and with the tsk_is_pi_blocked() in the scheduler path, we hit the first issue. [1] https://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git/tree/patches/0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch?h=linux-5.10.y-rt-patches Signed-off-by: John Keeping Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (Google) Link: https://lkml.kernel.org/r/20220708162702.1758865-1-john@metanate.com --- include/linux/sched/rt.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index e5af028c08b4..994c25640e15 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -39,20 +39,12 @@ static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) } extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); extern void rt_mutex_adjust_pi(struct task_struct *p); -static inline bool tsk_is_pi_blocked(struct task_struct *tsk) -{ - return tsk->pi_blocked_on != NULL; -} #else static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) { return NULL; } # define rt_mutex_adjust_pi(p) do { } while (0) -static inline bool tsk_is_pi_blocked(struct task_struct *tsk) -{ - return false; -} #endif extern void normalize_rt_tasks(void); -- cgit v1.2.3 From 3beb0ab5bffba625007ea5c9e5e0ee5eef05c1ea Mon Sep 17 00:00:00 2001 From: Seunghui Lee Date: Wed, 13 Jul 2022 12:36:34 +0900 Subject: mmc: core: Use mmc_card_* macro and add a new for the sd_combo type Add mmc_card_sd_combo() macro for sd combo type card and use the mmc_card_* macro to simplify code instead of comparing card->type. Signed-off-by: Seunghui Lee Link: https://lore.kernel.org/r/20220713033635.28432-2-sh043.lee@samsung.com Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 37f975875102..156a7b673a28 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -348,5 +348,6 @@ bool mmc_card_is_blockaddr(struct mmc_card *card); #define mmc_card_mmc(c) ((c)->type == MMC_TYPE_MMC) #define mmc_card_sd(c) ((c)->type == MMC_TYPE_SD) #define mmc_card_sdio(c) ((c)->type == MMC_TYPE_SDIO) +#define mmc_card_sd_combo(c) ((c)->type == MMC_TYPE_SD_COMBO) #endif /* LINUX_MMC_CARD_H */ -- cgit v1.2.3 From 20347fca71a387a3751f7bb270062616ddc5317a Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Fri, 8 Jul 2022 12:15:44 -0400 Subject: swiotlb: split up the global swiotlb lock Traditionally swiotlb was not performance critical because it was only used for slow devices. But in some setups, like TDX/SEV confidential guests, all IO has to go through swiotlb. Currently swiotlb only has a single lock. Under high IO load with multiple CPUs this can lead to significat lock contention on the swiotlb lock. This patch splits the swiotlb bounce buffer pool into individual areas which have their own lock. Each CPU tries to allocate in its own area first. Only if that fails does it search other areas. On freeing the allocation is freed into the area where the memory was originally allocated from. Area number can be set via swiotlb kernel parameter and is default to be possible cpu number. If possible cpu number is not power of 2, area number will be round up to the next power of 2. This idea from Andi Kleen patch(https://github.com/intel/tdx/commit/ 4529b5784c141782c72ec9bd9a92df2b68cb7d45). Based-on-idea-by: Andi Kleen Signed-off-by: Tianyu Lan Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index bdc58a0e20b1..f65ff1930120 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -88,6 +88,8 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, * @late_alloc: %true if allocated using the page allocator * @force_bounce: %true if swiotlb bouncing is forced * @for_alloc: %true if the pool is used for memory allocation + * @nareas: The area number in the pool. + * @area_nslabs: The slot number in the area. */ struct io_tlb_mem { phys_addr_t start; @@ -101,6 +103,9 @@ struct io_tlb_mem { bool late_alloc; bool force_bounce; bool for_alloc; + unsigned int nareas; + unsigned int area_nslabs; + struct io_tlb_area *areas; struct io_tlb_slot { phys_addr_t orig_addr; size_t alloc_size; -- cgit v1.2.3 From 6d1c1a73e1126572de0a8b063fe62fe43786ed59 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Fri, 8 Jul 2022 14:13:11 +0800 Subject: soundwire: Intel: add trigger callback When a pipeline is split into FE and BE parts, the BE pipeline may need to be triggered separately in the BE trigger op. So add the trigger callback in the link_res ops that will be invoked during BE DAI trigger. Signed-off-by: Bard Liao Reviewed-by: Rander Wang Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Acked-by: Vinod Koul Link: https://lore.kernel.org/r/20220708061312.25878-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/linux/soundwire/sdw_intel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index b5b489ea1aef..ec16ae49e6a4 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -121,6 +121,7 @@ struct sdw_intel_ops { struct sdw_intel_stream_params_data *params_data); int (*free_stream)(struct device *dev, struct sdw_intel_stream_free_data *free_data); + int (*trigger)(struct snd_soc_dai *dai, int cmd, int stream); }; /** -- cgit v1.2.3 From af16df54b89dee72df253abc5e7b5e8a6d16c11c Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Wed, 13 Jul 2022 15:21:11 +0800 Subject: ima: force signature verification when CONFIG_KEXEC_SIG is configured Currently, an unsigned kernel could be kexec'ed when IMA arch specific policy is configured unless lockdown is enabled. Enforce kernel signature verification check in the kexec_file_load syscall when IMA arch specific policy is configured. Fixes: 99d5cadfde2b ("kexec_file: split KEXEC_VERIFY_SIG into KEXEC_SIG and KEXEC_SIG_FORCE") Reported-and-suggested-by: Mimi Zohar Signed-off-by: Coiby Xu Signed-off-by: Mimi Zohar --- include/linux/kexec.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index ce6536f1d269..475683cd67f1 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -452,6 +452,12 @@ static inline int kexec_crash_loaded(void) { return 0; } #define kexec_in_progress false #endif /* CONFIG_KEXEC_CORE */ +#ifdef CONFIG_KEXEC_SIG +void set_kexec_sig_enforced(void); +#else +static inline void set_kexec_sig_enforced(void) {} +#endif + #endif /* !defined(__ASSEBMLY__) */ #endif /* LINUX_KEXEC_H */ -- cgit v1.2.3 From 0372c546eca575445331c0ad8902210b70be6d61 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 2 Jun 2022 12:41:00 +0300 Subject: net/mlx5: Introduce ifc bits for using software vhca id Introduce ifc related stuff to enable using software vhca id functionality. Signed-off-by: Yishai Hadas Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8e87eb47f9dc..254cc22f5eec 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1826,7 +1826,14 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 max_reformat_remove_size[0x8]; u8 max_reformat_remove_offset[0x8]; - u8 reserved_at_c0[0x740]; + u8 reserved_at_c0[0x160]; + + u8 reserved_at_220[0x1]; + u8 sw_vhca_id_valid[0x1]; + u8 sw_vhca_id[0xe]; + u8 reserved_at_230[0x10]; + + u8 reserved_at_240[0x5c0]; }; enum mlx5_ifc_flow_destination_type { @@ -3782,6 +3789,11 @@ struct mlx5_ifc_rmpc_bits { struct mlx5_ifc_wq_bits wq; }; +enum { + VHCA_ID_TYPE_HW = 0, + VHCA_ID_TYPE_SW = 1, +}; + struct mlx5_ifc_nic_vport_context_bits { u8 reserved_at_0[0x5]; u8 min_wqe_inline_mode[0x3]; @@ -3798,8 +3810,8 @@ struct mlx5_ifc_nic_vport_context_bits { u8 event_on_mc_address_change[0x1]; u8 event_on_uc_address_change[0x1]; - u8 reserved_at_40[0xc]; - + u8 vhca_id_type[0x1]; + u8 reserved_at_41[0xb]; u8 affiliation_criteria[0x4]; u8 affiliated_vhca_id[0x10]; @@ -7259,7 +7271,12 @@ struct mlx5_ifc_init_hca_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x2]; + u8 sw_vhca_id[0xe]; + u8 reserved_at_70[0x10]; + u8 sw_owner_id[4][0x20]; }; -- cgit v1.2.3 From dc402ccc0d7b55922a79505df3000da7deb77a2b Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 2 Jun 2022 12:47:34 +0300 Subject: net/mlx5: Use software VHCA id when it's supported Use software VHCA id when it's supported by the firmware. A unique id is allocated upon mlx5_mdev_init() and freed upon mlx5_mdev_uninit(), as such it stays the same during the full life cycle of the device including upon health recovery if occurred. The conjunction of sw_vhca_id with sw_owner_id will be a global unique id per function which uses mlx5_core. The sw_vhca_id is set upon init_hca command and is used to specify the VHCA that the NIC vport is affiliated with. This functionality is needed upon migration of VM which is MPV based. (i.e. multi port device). Signed-off-by: Yishai Hadas Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index bd882884b23c..ecda6e63d5f2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -610,6 +610,7 @@ struct mlx5_priv { spinlock_t ctx_lock; struct mlx5_adev **adev; int adev_idx; + int sw_vhca_id; struct mlx5_events *events; struct mlx5_flow_steering *steering; -- cgit v1.2.3 From fac47b43c760ea90e64b895dba60df0327be7775 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 11 Jul 2022 12:11:21 +0800 Subject: netfs: do not unlock and put the folio twice check_write_begin() will unlock and put the folio when return non-zero. So we should avoid unlocking and putting it twice in netfs layer. Change the way ->check_write_begin() works in the following two ways: (1) Pass it a pointer to the folio pointer, allowing it to unlock and put the folio prior to doing the stuff it wants to do, provided it clears the folio pointer. (2) Change the return values such that 0 with folio pointer set means continue, 0 with folio pointer cleared means re-get and all error codes indicating an error (no special treatment for -EAGAIN). [ bagasdotme: use Sphinx code text syntax for *foliop pointer ] Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/56423 Link: https://lore.kernel.org/r/cf169f43-8ee7-8697-25da-0204d1b4343e@redhat.com Co-developed-by: David Howells Signed-off-by: Xiubo Li Signed-off-by: David Howells Signed-off-by: Bagas Sanjaya Signed-off-by: Ilya Dryomov --- include/linux/netfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 1773e5df8e65..1b18dfa52e48 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -214,7 +214,7 @@ struct netfs_request_ops { void (*issue_read)(struct netfs_io_subrequest *subreq); bool (*is_still_valid)(struct netfs_io_request *rreq); int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, - struct folio *folio, void **_fsdata); + struct folio **foliop, void **_fsdata); void (*done)(struct netfs_io_request *rreq); }; -- cgit v1.2.3 From 9745fb07474f4501eff62130a78a42a8b8c18b05 Mon Sep 17 00:00:00 2001 From: Jonathan Yong Date: Mon, 6 Jun 2022 19:41:27 +0300 Subject: platform/x86/intel: Add Primary to Sideband (P2SB) bridge support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SoC features such as GPIO are accessed via a reserved MMIO area, we don't know its address but can obtain it from the BAR of the P2SB device, that device is normally hidden so we have to temporarily unhide it, read address and hide it back. There are already a few users and at least one more is coming which require an access to Primary to Sideband (P2SB) bridge in order to get IO or MMIO BAR hidden by BIOS. Create a library to access P2SB for x86 devices in a unified way. Background information ====================== Note, the term "bridge" is used in the documentation and it has nothing to do with a PCI (host) bridge as per the PCI specifications. The P2SB is an interesting device by its nature and hardware design. First of all, it has several devices in the hardware behind it. These devices may or may not be represented as ACPI devices by a firmware. It also has a hardwired (to 0s) the least significant bits of the base address register which is represented by the only 64-bit BAR0. It means that OS mustn't reallocate the BAR. On top of that in some cases P2SB is represented by function 0 on PCI slot (in terms of B:D.F) and according to the PCI specification any other function can't be seen until function 0 is present and visible. In the PCI configuration space of P2SB device the full 32-bit register is allocated for the only purpose of hiding the entire P2SB device. As per [3]: 3.1.39 P2SB Control (P2SBC)—Offset E0h Hide Device (HIDE): When this bit is set, the P2SB will return 1s on any PCI Configuration Read on IOSF-P. All other transactions including PCI Configuration Writes on IOSF-P are unaffected by this. This does not affect reads performed on the IOSF-SB interface. This doesn't prevent MMIO accesses, although preventing the OS from assigning these addresses. The firmware on the affected platforms marks the region as unusable (by cutting it off from the PCI host bridge resources) as depicted in the Apollo Lake example below: PCI host bridge to bus 0000:00 pci_bus 0000:00: root bus resource [io 0x0070-0x0077] pci_bus 0000:00: root bus resource [io 0x0000-0x006f window] pci_bus 0000:00: root bus resource [io 0x0078-0x0cf7 window] pci_bus 0000:00: root bus resource [io 0x0d00-0xffff window] pci_bus 0000:00: root bus resource [mem 0x7c000001-0x7fffffff window] pci_bus 0000:00: root bus resource [mem 0x7b800001-0x7bffffff window] pci_bus 0000:00: root bus resource [mem 0x80000000-0xcfffffff window] pci_bus 0000:00: root bus resource [mem 0xe0000000-0xefffffff window] pci_bus 0000:00: root bus resource [bus 00-ff] The P2SB 16MB BAR is located at 0xd0000000-0xd0ffffff memory window. The generic solution ==================== The generic solution for all cases when we need to access to the information behind P2SB device is a library code where users ask for necessary resources by demand and hence those users take care of not being run on the systems where this access is not required. The library provides the p2sb_bar() API to retrieve the MMIO of the BAR0 of the device from P2SB device slot. P2SB unconditional unhiding awareness ===================================== Technically it's possible to unhide the P2SB device and devices on the same PCI slot and access them at any time as needed. But there are several potential issues with that: - the systems were never tested against such configuration and hence nobody knows what kind of bugs it may bring, especially when we talk about SPI NOR case which contains Intel FirmWare Image (IFWI) code (including BIOS) and already known to be problematic in the past for end users - the PCI by its nature is a hotpluggable bus and in case somebody attaches a driver to the functions of a P2SB slot device(s) the end user experience and system behaviour can be unpredictable - the kernel code would need some ugly hacks (or code looking as an ugly hack) under arch/x86/pci in order to enable these devices on only selected platforms (which may include CPU ID table followed by a potentially growing number of DMI strings The future improvements ======================= The future improvements with this code may go in order to gain some kind of cache, if it's possible at all, to prevent unhiding and hiding many times to take static information that may be saved once per boot. Links ===== [1]: https://lab.whitequark.org/notes/2017-11-08/accessing-intel-ich-pch-gpios/ [2]: https://cdrdv2.intel.com/v1/dl/getContent/332690?wapkw=332690 [3]: https://cdrdv2.intel.com/v1/dl/getContent/332691?wapkw=332691 [4]: https://medium.com/@jacksonchen_43335/bios-gpio-p2sb-70e9b829b403 Signed-off-by: Jonathan Yong Co-developed-by: Andy Shevchenko Signed-off-by: Andy Shevchenko Tested-by: Henning Schild Acked-by: Hans de Goede Acked-by: Linus Walleij Signed-off-by: Lee Jones --- include/linux/platform_data/x86/p2sb.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/linux/platform_data/x86/p2sb.h (limited to 'include/linux') diff --git a/include/linux/platform_data/x86/p2sb.h b/include/linux/platform_data/x86/p2sb.h new file mode 100644 index 000000000000..a1d5fddc8f13 --- /dev/null +++ b/include/linux/platform_data/x86/p2sb.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Primary to Sideband (P2SB) bridge access support + */ + +#ifndef _PLATFORM_DATA_X86_P2SB_H +#define _PLATFORM_DATA_X86_P2SB_H + +#include +#include + +struct pci_bus; +struct resource; + +#if IS_BUILTIN(CONFIG_P2SB) + +int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem); + +#else /* CONFIG_P2SB */ + +static inline int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) +{ + return -ENODEV; +} + +#endif /* CONFIG_P2SB is not set */ + +#endif /* _PLATFORM_DATA_X86_P2SB_H */ -- cgit v1.2.3 From 446f0cf9e08b483d7dc6f61eee0ee846b22f6386 Mon Sep 17 00:00:00 2001 From: Henning Schild Date: Mon, 6 Jun 2022 19:41:37 +0300 Subject: platform/x86: simatic-ipc: drop custom P2SB bar code The two drivers that used to use this have been switched over to the common P2SB accessor, so this code is not needed any longer. Signed-off-by: Henning Schild Signed-off-by: Andy Shevchenko Reviewed-by: Hans de Goede Signed-off-by: Lee Jones --- include/linux/platform_data/x86/simatic-ipc-base.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/x86/simatic-ipc-base.h b/include/linux/platform_data/x86/simatic-ipc-base.h index 62d2bc774067..39fefd48cf4d 100644 --- a/include/linux/platform_data/x86/simatic-ipc-base.h +++ b/include/linux/platform_data/x86/simatic-ipc-base.h @@ -24,6 +24,4 @@ struct simatic_ipc_platform { u8 devmode; }; -u32 simatic_ipc_get_membase0(unsigned int p2sb); - #endif /* __PLATFORM_DATA_X86_SIMATIC_IPC_BASE_H */ -- cgit v1.2.3 From 1b870fa5573e260bc74d19f381ab0dd971a8d8e7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 14 Jul 2022 07:27:31 -0400 Subject: kvm: stats: tell userspace which values are boolean Some of the statistics values exported by KVM are always only 0 or 1. It can be useful to export this fact to userspace so that it can track them specially (for example by polling the value every now and then to compute a % of time spent in a specific state). Therefore, add "boolean value" as a new "unit". While it is not exactly a unit, it walks and quacks like one. In particular, using the type would be wrong because boolean values could be instantaneous or peak values (e.g. "is the rmap allocated?") or even two-bucket histograms (e.g. "number of posted vs. non-posted interrupt injections"). Suggested-by: Amneesh Singh Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 83cf7fd842e0..90a45ef7203b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1822,6 +1822,15 @@ struct _kvm_stats_desc { STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_NONE, \ KVM_STATS_BASE_POW10, 0) +/* Instantaneous boolean value, read only */ +#define STATS_DESC_IBOOLEAN(SCOPE, name) \ + STATS_DESC_INSTANT(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \ + KVM_STATS_BASE_POW10, 0) +/* Peak (sticky) boolean value, read/write */ +#define STATS_DESC_PBOOLEAN(SCOPE, name) \ + STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \ + KVM_STATS_BASE_POW10, 0) + /* Cumulative time in nanosecond */ #define STATS_DESC_TIME_NSEC(SCOPE, name) \ STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS, \ @@ -1853,7 +1862,7 @@ struct _kvm_stats_desc { HALT_POLL_HIST_COUNT), \ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist, \ HALT_POLL_HIST_COUNT), \ - STATS_DESC_ICOUNTER(VCPU_GENERIC, blocking) + STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking) extern struct dentry *kvm_debugfs_dir; -- cgit v1.2.3 From 4fa05a67b558d2cb3acd2bb299b91220d405ca5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 11 Jul 2022 16:48:01 +0200 Subject: dma-buf: revert "return only unsignaled fences in dma_fence_unwrap_for_each v3" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 8f61973718485f3e89bc4f408f929048b7b47c83. It turned out that this is not correct. Especially the sync_file info IOCTL needs to see even signaled fences to correctly report back their status to userspace. Instead add the filter in the merge function again where it makes sense. Signed-off-by: Christian König Tested-by: Karolina Drobnik Reviewed-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20220712102849.1562-1-christian.koenig@amd.com --- include/linux/dma-fence-unwrap.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h index 390de1ee9d35..66b1e56fbb81 100644 --- a/include/linux/dma-fence-unwrap.h +++ b/include/linux/dma-fence-unwrap.h @@ -43,14 +43,10 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor); * Unwrap dma_fence_chain and dma_fence_array containers and deep dive into all * potential fences in them. If @head is just a normal fence only that one is * returned. - * - * Note that signalled fences are opportunistically filtered out, which - * means the iteration is potentially over no fence at all. */ #define dma_fence_unwrap_for_each(fence, cursor, head) \ for (fence = dma_fence_unwrap_first(head, cursor); fence; \ - fence = dma_fence_unwrap_next(cursor)) \ - if (!dma_fence_is_signaled(fence)) + fence = dma_fence_unwrap_next(cursor)) struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence **fences, -- cgit v1.2.3 From ddaf8d96f93bccb3f2b1f4f156c098b272440004 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Mon, 11 Jul 2022 07:22:55 +0000 Subject: usb: typec: Add support for retimers Introduce a retimer device class and associated functions that register and use retimer "switch" devices. These operate in a manner similar to the "mode-switch" and help configure retimers that exist between the Type-C connector and host controller(s). Type C ports can be linked to retimers using firmware node device references (again, in a manner similar to "mode-switch"). There are no new sysfs files being created; there is the new retimer class directory, but there are no class-specific files being created there. Signed-off-by: Prashant Malani Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20220711072333.2064341-2-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_retimer.h | 45 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 include/linux/usb/typec_retimer.h (limited to 'include/linux') diff --git a/include/linux/usb/typec_retimer.h b/include/linux/usb/typec_retimer.h new file mode 100644 index 000000000000..5e036b3360e2 --- /dev/null +++ b/include/linux/usb/typec_retimer.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __USB_TYPEC_RETIMER +#define __USB_TYPEC_RETIMER + +#include +#include + +struct device; +struct typec_retimer; +struct typec_altmode; +struct fwnode_handle; + +struct typec_retimer_state { + struct typec_altmode *alt; + unsigned long mode; + void *data; +}; + +typedef int (*typec_retimer_set_fn_t)(struct typec_retimer *retimer, + struct typec_retimer_state *state); + +struct typec_retimer_desc { + struct fwnode_handle *fwnode; + typec_retimer_set_fn_t set; + const char *name; + void *drvdata; +}; + +struct typec_retimer *fwnode_typec_retimer_get(struct fwnode_handle *fwnode); +void typec_retimer_put(struct typec_retimer *retimer); +int typec_retimer_set(struct typec_retimer *retimer, struct typec_retimer_state *state); + +static inline struct typec_retimer *typec_retimer_get(struct device *dev) +{ + return fwnode_typec_retimer_get(dev_fwnode(dev)); +} + +struct typec_retimer * +typec_retimer_register(struct device *parent, const struct typec_retimer_desc *desc); +void typec_retimer_unregister(struct typec_retimer *retimer); + +void *typec_retimer_get_drvdata(struct typec_retimer *retimer); + +#endif /* __USB_TYPEC_RETIMER */ -- cgit v1.2.3 From e6281c26674e037798bf674f26a7593a324cdf39 Mon Sep 17 00:00:00 2001 From: Ang Tien Sung Date: Mon, 11 Jul 2022 17:31:35 -0500 Subject: firmware: stratix10-svc: Add support for FCS Extend Intel service layer driver to support FPGA Crypto service(FCS) features on Intel Soc platforms. Adding an additional channel and FCS platform driver ("intel_fcs") as part of the probe method. FCS driver uses the driver to send crypto operations' commands to the secure device manager(SDM) on Intel Soc platforms Stratix10 and Agilex. Signed-off-by: Ang Tien Sung Signed-off-by: Dinh Nguyen Link: https://lore.kernel.org/r/20220711223140.2307945-1-dinguyen@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware/intel/stratix10-svc-client.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index 18c1841fdb1f..24121f8e0d7c 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -14,6 +14,7 @@ */ #define SVC_CLIENT_FPGA "fpga" #define SVC_CLIENT_RSU "rsu" +#define SVC_CLIENT_FCS "fcs" /* * Status of the sent command, in bit number -- cgit v1.2.3 From 79b936254aa0eb4e3bc73fecaacf049145613c0a Mon Sep 17 00:00:00 2001 From: Ang Tien Sung Date: Mon, 11 Jul 2022 17:31:36 -0500 Subject: firmware: stratix10-svc: add FCS polling command Introduce a new SMC command INTEL_SIP_SMC_FUNCID_SERVICE_COMPLETED that polls if a previous asynchronous command was completed. This SMC command is used by the new FPGA Crypto Service (FCS). A basic example is that the FCS sends an AES data encryption call to the secure device manager(SDM) and waits for the completion of the operation by continuously polling the results with the new command. Signed-off-by: Ang Tien Sung Signed-off-by: Dinh Nguyen Link: https://lore.kernel.org/r/20220711223140.2307945-2-dinguyen@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware/intel/stratix10-smc.h | 25 ++++++++++++++++++++++ .../linux/firmware/intel/stratix10-svc-client.h | 5 +++++ 2 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/intel/stratix10-smc.h b/include/linux/firmware/intel/stratix10-smc.h index aad497a9ad8b..0de104009566 100644 --- a/include/linux/firmware/intel/stratix10-smc.h +++ b/include/linux/firmware/intel/stratix10-smc.h @@ -403,6 +403,31 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE) #define INTEL_SIP_SMC_RSU_MAX_RETRY \ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_RSU_MAX_RETRY) +/** + * Request INTEL_SIP_SMC_SERVICE_COMPLETED + * Sync call to check if the secure world have completed service request + * or not. + * + * Call register usage: + * a0: INTEL_SIP_SMC_SERVICE_COMPLETED + * a1: this register is optional. If used, it is the physical address for + * secure firmware to put output data + * a2: this register is optional. If used, it is the size of output data + * a3-a7: not used + * + * Return status: + * a0: INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_STATUS_ERROR, + * INTEL_SIP_SMC_REJECTED or INTEL_SIP_SMC_STATUS_BUSY + * a1: mailbox error if a0 is INTEL_SIP_SMC_STATUS_ERROR + * a2: physical address containing the process info + * for FCS certificate -- the data contains the certificate status + * for FCS cryption -- the data contains the actual data size FW processes + * a3: output data size + */ +#define INTEL_SIP_SMC_FUNCID_SERVICE_COMPLETED 30 +#define INTEL_SIP_SMC_SERVICE_COMPLETED \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_SERVICE_COMPLETED) + /** * Request INTEL_SIP_SMC_FIRMWARE_VERSION * diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index 24121f8e0d7c..5d0e814e0c41 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -106,6 +106,9 @@ struct stratix10_svc_chan; * @COMMAND_RSU_DCMF_VERSION: query firmware for the DCMF version, return status * is SVC_STATUS_OK or SVC_STATUS_ERROR * + * @COMMAND_POLL_SERVICE_STATUS: poll if the service request is complete, + * return statis is SVC_STATUS_OK, SVC_STATUS_ERROR or SVC_STATUS_BUSY + * * @COMMAND_FIRMWARE_VERSION: query running firmware version, return status * is SVC_STATUS_OK or SVC_STATUS_ERROR */ @@ -122,6 +125,8 @@ enum stratix10_svc_command_code { COMMAND_RSU_MAX_RETRY, COMMAND_RSU_DCMF_VERSION, COMMAND_FIRMWARE_VERSION, + /* for general status poll */ + COMMAND_POLL_SERVICE_STATUS = 40, }; /** -- cgit v1.2.3 From 4a4709d470e624e65a879b5c430dee5e27e9ac83 Mon Sep 17 00:00:00 2001 From: Ang Tien Sung Date: Mon, 11 Jul 2022 17:31:37 -0500 Subject: firmware: stratix10-svc: add new FCS commands Extending the fpga svc driver to support 6 new FPGA Crypto Service(FCS) commands. We are adding FCS SDOS data encryption and decryption, random number generator, image validation request, reading the data provision and certificate validation. Signed-off-by: Ang Tien Sung Signed-off-by: Dinh Nguyen Link: https://lore.kernel.org/r/20220711223140.2307945-3-dinguyen@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware/intel/stratix10-smc.h | 111 +++++++++++++++++++++ .../linux/firmware/intel/stratix10-svc-client.h | 41 +++++++- 2 files changed, 148 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firmware/intel/stratix10-smc.h b/include/linux/firmware/intel/stratix10-smc.h index 0de104009566..8c198984a47c 100644 --- a/include/linux/firmware/intel/stratix10-smc.h +++ b/include/linux/firmware/intel/stratix10-smc.h @@ -445,4 +445,115 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE) #define INTEL_SIP_SMC_FIRMWARE_VERSION \ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FIRMWARE_VERSION) +/** + * SMC call protocol for FPGA Crypto Service (FCS) + * FUNCID starts from 90 + */ + +/** + * Request INTEL_SIP_SMC_FCS_RANDOM_NUMBER + * + * Sync call used to query the random number generated by the firmware + * + * Call register usage: + * a0 INTEL_SIP_SMC_FCS_RANDOM_NUMBER + * a1 the physical address for firmware to write generated random data + * a2-a7 not used + * + * Return status: + * a0 INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_FCS_ERROR or + * INTEL_SIP_SMC_FCS_REJECTED + * a1 mailbox error + * a2 the physical address of generated random number + * a3 size + */ +#define INTEL_SIP_SMC_FUNCID_FCS_RANDOM_NUMBER 90 +#define INTEL_SIP_SMC_FCS_RANDOM_NUMBER \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FCS_RANDOM_NUMBER) + +/** + * Request INTEL_SIP_SMC_FCS_CRYPTION + * Async call for data encryption and HMAC signature generation, or for + * data decryption and HMAC verification. + * + * Call INTEL_SIP_SMC_SERVICE_COMPLETED to get the output encrypted or + * decrypted data + * + * Call register usage: + * a0 INTEL_SIP_SMC_FCS_CRYPTION + * a1 cryption mode (1 for encryption and 0 for decryption) + * a2 physical address which stores to be encrypted or decrypted data + * a3 input data size + * a4 physical address which will hold the encrypted or decrypted output data + * a5 output data size + * a6-a7 not used + * + * Return status: + * a0 INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_STATUS_ERROR or + * INTEL_SIP_SMC_STATUS_REJECTED + * a1-3 not used + */ +#define INTEL_SIP_SMC_FUNCID_FCS_CRYPTION 91 +#define INTEL_SIP_SMC_FCS_CRYPTION \ + INTEL_SIP_SMC_STD_CALL_VAL(INTEL_SIP_SMC_FUNCID_FCS_CRYPTION) + +/** + * Request INTEL_SIP_SMC_FCS_SERVICE_REQUEST + * Async call for authentication service of HPS software + * + * Call register usage: + * a0 INTEL_SIP_SMC_FCS_SERVICE_REQUEST + * a1 the physical address of data block + * a2 size of data block + * a3-a7 not used + * + * Return status: + * a0 INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_ERROR or + * INTEL_SIP_SMC_REJECTED + * a1-a3 not used + */ +#define INTEL_SIP_SMC_FUNCID_FCS_SERVICE_REQUEST 92 +#define INTEL_SIP_SMC_FCS_SERVICE_REQUEST \ + INTEL_SIP_SMC_STD_CALL_VAL(INTEL_SIP_SMC_FUNCID_FCS_SERVICE_REQUEST) + +/** + * Request INTEL_SIP_SMC_FUNCID_FCS_SEND_CERTIFICATE + * Sync call to send a signed certificate + * + * Call register usage: + * a0 INTEL_SIP_SMC_FCS_SEND_CERTIFICATE + * a1 the physical address of CERTIFICATE block + * a2 size of data block + * a3-a7 not used + * + * Return status: + * a0 INTEL_SIP_SMC_STATUS_OK or INTEL_SIP_SMC_FCS_REJECTED + * a1-a3 not used + */ +#define INTEL_SIP_SMC_FUNCID_FCS_SEND_CERTIFICATE 93 +#define INTEL_SIP_SMC_FCS_SEND_CERTIFICATE \ + INTEL_SIP_SMC_STD_CALL_VAL(INTEL_SIP_SMC_FUNCID_FCS_SEND_CERTIFICATE) + +/** + * Request INTEL_SIP_SMC_FCS_GET_PROVISION_DATA + * Sync call to dump all the fuses and key hashes + * + * Call register usage: + * a0 INTEL_SIP_SMC_FCS_GET_PROVISION_DATA + * a1 the physical address for firmware to write structure of fuse and + * key hashes + * a2-a7 not used + * + * Return status: + * a0 INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_FCS_ERROR or + * INTEL_SIP_SMC_FCS_REJECTED + * a1 mailbox error + * a2 physical address for the structure of fuse and key hashes + * a3 the size of structure + * + */ +#define INTEL_SIP_SMC_FUNCID_FCS_GET_PROVISION_DATA 94 +#define INTEL_SIP_SMC_FCS_GET_PROVISION_DATA \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FCS_GET_PROVISION_DATA) + #endif diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index 5d0e814e0c41..677720792259 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -50,8 +50,8 @@ #define SVC_STATUS_BUSY 4 #define SVC_STATUS_ERROR 5 #define SVC_STATUS_NO_SUPPORT 6 - -/* +#define SVC_STATUS_INVALID_PARAM 7 +/** * Flag bit for COMMAND_RECONFIG * * COMMAND_RECONFIG_FLAG_PARTIAL: @@ -67,6 +67,8 @@ #define SVC_RECONFIG_REQUEST_TIMEOUT_MS 300 #define SVC_RECONFIG_BUFFER_TIMEOUT_MS 720 #define SVC_RSU_REQUEST_TIMEOUT_MS 300 +#define SVC_FCS_REQUEST_TIMEOUT_MS 2000 +#define SVC_COMPLETED_TIMEOUT_MS 30000 struct stratix10_svc_chan; @@ -111,20 +113,47 @@ struct stratix10_svc_chan; * * @COMMAND_FIRMWARE_VERSION: query running firmware version, return status * is SVC_STATUS_OK or SVC_STATUS_ERROR + * + * @COMMAND_FCS_REQUEST_SERVICE: request validation of image from firmware, + * return status is SVC_STATUS_OK, SVC_STATUS_INVALID_PARAM + * + * @COMMAND_FCS_SEND_CERTIFICATE: send a certificate, return status is + * SVC_STATUS_OK, SVC_STATUS_INVALID_PARAM, SVC_STATUS_ERROR + * + * @COMMAND_FCS_GET_PROVISION_DATA: read the provisioning data, return status is + * SVC_STATUS_OK, SVC_STATUS_INVALID_PARAM, SVC_STATUS_ERROR + * + * @COMMAND_FCS_DATA_ENCRYPTION: encrypt the data, return status is + * SVC_STATUS_OK, SVC_STATUS_INVALID_PARAM, SVC_STATUS_ERROR + * + * @COMMAND_FCS_DATA_DECRYPTION: decrypt the data, return status is + * SVC_STATUS_OK, SVC_STATUS_INVALID_PARAM, SVC_STATUS_ERROR + * + * @COMMAND_FCS_RANDOM_NUMBER_GEN: generate a random number, return status + * is SVC_STATUS_OK, SVC_STATUS_ERROR */ enum stratix10_svc_command_code { + /* for FPGA */ COMMAND_NOOP = 0, COMMAND_RECONFIG, COMMAND_RECONFIG_DATA_SUBMIT, COMMAND_RECONFIG_DATA_CLAIM, COMMAND_RECONFIG_STATUS, - COMMAND_RSU_STATUS, + /* for RSU */ + COMMAND_RSU_STATUS = 10, COMMAND_RSU_UPDATE, COMMAND_RSU_NOTIFY, COMMAND_RSU_RETRY, COMMAND_RSU_MAX_RETRY, COMMAND_RSU_DCMF_VERSION, COMMAND_FIRMWARE_VERSION, + /* for FCS */ + COMMAND_FCS_REQUEST_SERVICE = 20, + COMMAND_FCS_SEND_CERTIFICATE, + COMMAND_FCS_GET_PROVISION_DATA, + COMMAND_FCS_DATA_ENCRYPTION, + COMMAND_FCS_DATA_DECRYPTION, + COMMAND_FCS_RANDOM_NUMBER_GEN, /* for general status poll */ COMMAND_POLL_SERVICE_STATUS = 40, }; @@ -132,13 +161,17 @@ enum stratix10_svc_command_code { /** * struct stratix10_svc_client_msg - message sent by client to service * @payload: starting address of data need be processed - * @payload_length: data size in bytes + * @payload_length: to be processed data size in bytes + * @payload_output: starting address of processed data + * @payload_length_output: processed data size in bytes * @command: service command * @arg: args to be passed via registers and not physically mapped buffers */ struct stratix10_svc_client_msg { void *payload; size_t payload_length; + void *payload_output; + size_t payload_length_output; enum stratix10_svc_command_code command; u64 arg[3]; }; -- cgit v1.2.3 From 1b4394c5d731593063f53df9d72467335c3b0367 Mon Sep 17 00:00:00 2001 From: Kah Jing Lee Date: Mon, 11 Jul 2022 17:31:39 -0500 Subject: firmware: stratix10-svc: extend svc to support RSU feature Extend Intel Stratix10 service layer driver to support new RSU DCMF status reporting. The status of each DCMF is reported. The currently used DCMF is used as reference, while the other three are compared against it to determine if they are corrupted. DCMF = Decision Configuration Management Firmware RSU = Remote System Update Signed-off-by: Radu Bacrau Signed-off-by: Ang Tien Sung Signed-off-by: Kah Jing Lee Signed-off-by: Dinh Nguyen Link: https://lore.kernel.org/r/20220711223140.2307945-5-dinguyen@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware/intel/stratix10-smc.h | 21 +++++++++++++++++++++ include/linux/firmware/intel/stratix10-svc-client.h | 4 ++++ 2 files changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/intel/stratix10-smc.h b/include/linux/firmware/intel/stratix10-smc.h index 8c198984a47c..8fc4aa450517 100644 --- a/include/linux/firmware/intel/stratix10-smc.h +++ b/include/linux/firmware/intel/stratix10-smc.h @@ -403,6 +403,27 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE) #define INTEL_SIP_SMC_RSU_MAX_RETRY \ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_RSU_MAX_RETRY) +/** + * Request INTEL_SIP_SMC_RSU_DCMF_STATUS + * + * Sync call used by service driver at EL1 to query DCMF status from FW + * + * Call register usage: + * a0 INTEL_SIP_SMC_RSU_DCMF_STATUS + * a1-7 not used + * + * Return status + * a0 INTEL_SIP_SMC_STATUS_OK + * a1 dcmf3 | dcmf2 | dcmf1 | dcmf0 + * + * Or + * + * a0 INTEL_SIP_SMC_RSU_ERROR + */ +#define INTEL_SIP_SMC_FUNCID_RSU_DCMF_STATUS 20 +#define INTEL_SIP_SMC_RSU_DCMF_STATUS \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_RSU_DCMF_STATUS) + /** * Request INTEL_SIP_SMC_SERVICE_COMPLETED * Sync call to check if the secure world have completed service request diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index 677720792259..82a20e62f15f 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -114,6 +114,9 @@ struct stratix10_svc_chan; * @COMMAND_FIRMWARE_VERSION: query running firmware version, return status * is SVC_STATUS_OK or SVC_STATUS_ERROR * + * @COMMAND_RSU_DCMF_STATUS: query firmware for the DCMF status + * return status is SVC_STATUS_OK or SVC_STATUS_ERROR + * * @COMMAND_FCS_REQUEST_SERVICE: request validation of image from firmware, * return status is SVC_STATUS_OK, SVC_STATUS_INVALID_PARAM * @@ -146,6 +149,7 @@ enum stratix10_svc_command_code { COMMAND_RSU_RETRY, COMMAND_RSU_MAX_RETRY, COMMAND_RSU_DCMF_VERSION, + COMMAND_RSU_DCMF_STATUS, COMMAND_FIRMWARE_VERSION, /* for FCS */ COMMAND_FCS_REQUEST_SERVICE = 20, -- cgit v1.2.3 From 7935e899b35c93faa26e1d272a51b3d9cb39f23f Mon Sep 17 00:00:00 2001 From: Ang Tien Sung Date: Mon, 11 Jul 2022 17:31:40 -0500 Subject: firmware: stratix10-svc: To support a command ATF Get Version We are to support a new SMC Command of hexadecimal 0x200 that returns the ATF Firmware major and minor version. Signed-off-by: Ang Tien Sung Signed-off-by: Dinh Nguyen Link: https://lore.kernel.org/r/20220711223140.2307945-6-dinguyen@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware/intel/stratix10-smc.h | 18 ++++++++++++++++++ include/linux/firmware/intel/stratix10-svc-client.h | 5 +++++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/intel/stratix10-smc.h b/include/linux/firmware/intel/stratix10-smc.h index 8fc4aa450517..a718f853d457 100644 --- a/include/linux/firmware/intel/stratix10-smc.h +++ b/include/linux/firmware/intel/stratix10-smc.h @@ -466,6 +466,24 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE) #define INTEL_SIP_SMC_FIRMWARE_VERSION \ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FIRMWARE_VERSION) +/** + * Request INTEL_SIP_SMC_SVC_VERSION + * + * Sync call used to query the SIP SMC API Version + * + * Call register usage: + * a0 INTEL_SIP_SMC_SVC_VERSION + * a1-a7 not used + * + * Return status: + * a0 INTEL_SIP_SMC_STATUS_OK + * a1 Major + * a2 Minor + */ +#define INTEL_SIP_SMC_SVC_FUNCID_VERSION 512 +#define INTEL_SIP_SMC_SVC_VERSION \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_SVC_FUNCID_VERSION) + /** * SMC call protocol for FPGA Crypto Service (FCS) * FUNCID starts from 90 diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index 82a20e62f15f..a776a787ac75 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -114,6 +114,9 @@ struct stratix10_svc_chan; * @COMMAND_FIRMWARE_VERSION: query running firmware version, return status * is SVC_STATUS_OK or SVC_STATUS_ERROR * + * @COMMAND_SMC_SVC_VERSION: Non-mailbox SMC SVC API Version, + * return status is SVC_STATUS_OK + * * @COMMAND_RSU_DCMF_STATUS: query firmware for the DCMF status * return status is SVC_STATUS_OK or SVC_STATUS_ERROR * @@ -160,6 +163,8 @@ enum stratix10_svc_command_code { COMMAND_FCS_RANDOM_NUMBER_GEN, /* for general status poll */ COMMAND_POLL_SERVICE_STATUS = 40, + /* Non-mailbox SMC Call */ + COMMAND_SMC_SVC_VERSION = 200, }; /** -- cgit v1.2.3 From 900d156bac2bc474cf7c7bee4efbc6c83ec5ae58 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jul 2022 07:53:17 +0200 Subject: block: remove bdevname Replace the remaining calls of bdevname with snprintf using the %pg format specifier. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220713055317.1888500-10-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 22c477fadc0f..2775763c51b9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1457,7 +1457,6 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) int bdev_read_only(struct block_device *bdev); int set_blocksize(struct block_device *bdev, int size); -const char *bdevname(struct block_device *bdev, char *buffer); int lookup_bdev(const char *pathname, dev_t *dev); void blkdev_show(struct seq_file *seqf, off_t offset); -- cgit v1.2.3 From ff07a02e9e8e6489db841e0c48a5c78e7e78d572 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:27 -0700 Subject: treewide: Rename enum req_opf into enum req_op The type name enum req_opf is misleading since it suggests that values of this type include both an operation type and flags. Since values of this type represent an operation only, change the type name into enum req_op. Convert the enum req_op documentation into kernel-doc format. Move a few definitions such that the enum req_op documentation occurs just above the enum req_op definition. The name "req_opf" was introduced by commit ef295ecf090d ("block: better op and flags encoding"). Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-2-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 16 ++++++++-------- include/linux/blkdev.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a24d4078fb21..0e6a2af7ed3d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -337,8 +337,12 @@ enum { typedef __u32 __bitwise blk_mq_req_flags_t; -/* - * Operations and flags common to the bio and request structures. +#define REQ_OP_BITS 8 +#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) +#define REQ_FLAG_BITS 24 + +/** + * enum req_op - Operations common to the bio and request structures. * We use 8 bits for encoding the operation, and the remaining 24 for flags. * * The least significant bit of the operation number indicates the data @@ -350,11 +354,7 @@ typedef __u32 __bitwise blk_mq_req_flags_t; * If a operation does not transfer data the least significant bit has no * meaning. */ -#define REQ_OP_BITS 8 -#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) -#define REQ_FLAG_BITS 24 - -enum req_opf { +enum req_op { /* read sectors from the device */ REQ_OP_READ = 0, /* write sectors to the device */ @@ -509,7 +509,7 @@ static inline bool op_is_discard(unsigned int op) * due to its different handling in the block layer and device response in * case of command failure. */ -static inline bool op_is_zone_mgmt(enum req_opf op) +static inline bool op_is_zone_mgmt(enum req_op op) { switch (op & REQ_OP_MASK) { case REQ_OP_ZONE_RESET: diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2775763c51b9..ec072a5129bf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -322,7 +322,7 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model); int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); unsigned int bdev_nr_zones(struct block_device *bdev); -extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, +extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); int blk_revalidate_disk_zones(struct gendisk *disk, -- cgit v1.2.3 From 77e7ffd7ad3952909be6a9c599b7d164c8866fec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:28 -0700 Subject: block: Use enum req_op where appropriate Change the type of the arguments that are used to pass a REQ_OP_* value from int or unsigned int into enum req_op to improve static type checking. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-3-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- include/linux/blkdev.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 0e6a2af7ed3d..cce8768bc00b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -522,7 +522,7 @@ static inline bool op_is_zone_mgmt(enum req_op op) } } -static inline int op_stat_group(unsigned int op) +static inline int op_stat_group(enum req_op op) { if (op_is_discard(op)) return STAT_DISCARD; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ec072a5129bf..2f13f0062192 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -872,7 +872,7 @@ extern void blk_queue_exit(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); /* Helper to convert REQ_OP_XXX to its string format XXX */ -extern const char *blk_op_str(unsigned int op); +extern const char *blk_op_str(enum req_op op); int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); @@ -1434,9 +1434,9 @@ static inline void blk_wake_io_task(struct task_struct *waiter) } unsigned long bdev_start_io_acct(struct block_device *bdev, - unsigned int sectors, unsigned int op, + unsigned int sectors, enum req_op op, unsigned long start_time); -void bdev_end_io_acct(struct block_device *bdev, unsigned int op, +void bdev_end_io_acct(struct block_device *bdev, enum req_op op, unsigned long start_time); void bio_start_io_acct_time(struct bio *bio, unsigned long start_time); -- cgit v1.2.3 From 86947df3a9236481276e8baadde50a403b02b4d4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:29 -0700 Subject: block: Change the type of the last .rw_page() argument All .rw_page() callers pass an enum req_op value as last argument. Make this explicit by changing the type of the last argument into enum req_op. See also commit 3f289dcb4b26 ("block: make bdev_ops->rw_page() take a REQ_OP instead of bool"). Cc: Tejun Heo Cc: Minchan Kim Cc: Dan Williams Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-4-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2f13f0062192..ca2ff113ea00 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1381,7 +1381,7 @@ struct block_device_operations { unsigned int flags); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); - int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); + int (*rw_page)(struct block_device *, sector_t, struct page *, enum req_op); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); unsigned int (*check_events) (struct gendisk *disk, -- cgit v1.2.3 From 2d9b02be73ba8efba406b399a722b4e33614dd0e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:30 -0700 Subject: block: Change the type of req_op() and bio_op() into enum req_op Improve static type checking by changing the type of the value returned by req_op() and bio_op() from unsigned int into enum req_op. Insert 'default: break;' in switch statements on the enum req_op type to prevent that the compiler warns about these switch statements. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Cc: Tim Waugh Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Mikulas Patocka Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-5-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 ++++-- include/linux/blk_types.h | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d74f6a6b7e69..677195de0663 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -198,8 +198,10 @@ struct request { void *end_io_data; }; -#define req_op(req) \ - ((req)->cmd_flags & REQ_OP_MASK) +static inline enum req_op req_op(const struct request *req) +{ + return req->cmd_flags & REQ_OP_MASK; +} static inline bool blk_rq_is_passthrough(struct request *rq) { diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index cce8768bc00b..e66cbe377ae8 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -463,8 +463,10 @@ enum stat_group { NR_STAT_GROUPS }; -#define bio_op(bio) \ - ((bio)->bi_opf & REQ_OP_MASK) +static inline enum req_op bio_op(const struct bio *bio) +{ + return bio->bi_opf & REQ_OP_MASK; +} /* obsolete, don't use in new code */ static inline void bio_set_op_attrs(struct bio *bio, unsigned op, -- cgit v1.2.3 From 342a72a334073f163da924b69c3d3fb4685eb33a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:31 -0700 Subject: block: Introduce the type blk_opf_t Introduce the type blk_opf_t for the request operation and flags (REQ_OP_* and REQ_*). This type will be used to improve documentation of the block layer code and also to allow sparse to verify whether request flags are used correctly. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-6-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 97 +++++++++++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index e66cbe377ae8..1ef99790f6ed 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -240,6 +240,8 @@ static inline void bio_issue_init(struct bio_issue *issue, ((u64)size << BIO_ISSUE_SIZE_SHIFT)); } +typedef __u32 __bitwise blk_opf_t; + typedef unsigned int blk_qc_t; #define BLK_QC_T_NONE -1U @@ -250,7 +252,7 @@ typedef unsigned int blk_qc_t; struct bio { struct bio *bi_next; /* request queue link */ struct block_device *bi_bdev; - unsigned int bi_opf; /* bottom bits REQ_OP, top bits + blk_opf_t bi_opf; /* bottom bits REQ_OP, top bits * req_flags. */ unsigned short bi_flags; /* BIO_* below */ @@ -338,7 +340,7 @@ enum { typedef __u32 __bitwise blk_mq_req_flags_t; #define REQ_OP_BITS 8 -#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) +#define REQ_OP_MASK (__force blk_opf_t)((1 << REQ_OP_BITS) - 1) #define REQ_FLAG_BITS 24 /** @@ -356,35 +358,35 @@ typedef __u32 __bitwise blk_mq_req_flags_t; */ enum req_op { /* read sectors from the device */ - REQ_OP_READ = 0, + REQ_OP_READ = (__force blk_opf_t)0, /* write sectors to the device */ - REQ_OP_WRITE = 1, + REQ_OP_WRITE = (__force blk_opf_t)1, /* flush the volatile write cache */ - REQ_OP_FLUSH = 2, + REQ_OP_FLUSH = (__force blk_opf_t)2, /* discard sectors */ - REQ_OP_DISCARD = 3, + REQ_OP_DISCARD = (__force blk_opf_t)3, /* securely erase sectors */ - REQ_OP_SECURE_ERASE = 5, + REQ_OP_SECURE_ERASE = (__force blk_opf_t)5, /* write the zero filled sector many times */ - REQ_OP_WRITE_ZEROES = 9, + REQ_OP_WRITE_ZEROES = (__force blk_opf_t)9, /* Open a zone */ - REQ_OP_ZONE_OPEN = 10, + REQ_OP_ZONE_OPEN = (__force blk_opf_t)10, /* Close a zone */ - REQ_OP_ZONE_CLOSE = 11, + REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11, /* Transition a zone to full */ - REQ_OP_ZONE_FINISH = 12, + REQ_OP_ZONE_FINISH = (__force blk_opf_t)12, /* write data at the current zone write pointer */ - REQ_OP_ZONE_APPEND = 13, + REQ_OP_ZONE_APPEND = (__force blk_opf_t)13, /* reset a zone write pointer */ - REQ_OP_ZONE_RESET = 15, + REQ_OP_ZONE_RESET = (__force blk_opf_t)15, /* reset all the zone present on the device */ - REQ_OP_ZONE_RESET_ALL = 17, + REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17, /* Driver private requests */ - REQ_OP_DRV_IN = 34, - REQ_OP_DRV_OUT = 35, + REQ_OP_DRV_IN = (__force blk_opf_t)34, + REQ_OP_DRV_OUT = (__force blk_opf_t)35, - REQ_OP_LAST, + REQ_OP_LAST = (__force blk_opf_t)36, }; enum req_flag_bits { @@ -425,28 +427,31 @@ enum req_flag_bits { __REQ_NR_BITS, /* stops here */ }; -#define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV) -#define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT) -#define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER) -#define REQ_SYNC (1ULL << __REQ_SYNC) -#define REQ_META (1ULL << __REQ_META) -#define REQ_PRIO (1ULL << __REQ_PRIO) -#define REQ_NOMERGE (1ULL << __REQ_NOMERGE) -#define REQ_IDLE (1ULL << __REQ_IDLE) -#define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY) -#define REQ_FUA (1ULL << __REQ_FUA) -#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) -#define REQ_RAHEAD (1ULL << __REQ_RAHEAD) -#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) -#define REQ_NOWAIT (1ULL << __REQ_NOWAIT) -#define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) - -#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) -#define REQ_POLLED (1ULL << __REQ_POLLED) -#define REQ_ALLOC_CACHE (1ULL << __REQ_ALLOC_CACHE) - -#define REQ_DRV (1ULL << __REQ_DRV) -#define REQ_SWAP (1ULL << __REQ_SWAP) +#define REQ_FAILFAST_DEV \ + (__force blk_opf_t)(1ULL << __REQ_FAILFAST_DEV) +#define REQ_FAILFAST_TRANSPORT \ + (__force blk_opf_t)(1ULL << __REQ_FAILFAST_TRANSPORT) +#define REQ_FAILFAST_DRIVER \ + (__force blk_opf_t)(1ULL << __REQ_FAILFAST_DRIVER) +#define REQ_SYNC (__force blk_opf_t)(1ULL << __REQ_SYNC) +#define REQ_META (__force blk_opf_t)(1ULL << __REQ_META) +#define REQ_PRIO (__force blk_opf_t)(1ULL << __REQ_PRIO) +#define REQ_NOMERGE (__force blk_opf_t)(1ULL << __REQ_NOMERGE) +#define REQ_IDLE (__force blk_opf_t)(1ULL << __REQ_IDLE) +#define REQ_INTEGRITY (__force blk_opf_t)(1ULL << __REQ_INTEGRITY) +#define REQ_FUA (__force blk_opf_t)(1ULL << __REQ_FUA) +#define REQ_PREFLUSH (__force blk_opf_t)(1ULL << __REQ_PREFLUSH) +#define REQ_RAHEAD (__force blk_opf_t)(1ULL << __REQ_RAHEAD) +#define REQ_BACKGROUND (__force blk_opf_t)(1ULL << __REQ_BACKGROUND) +#define REQ_NOWAIT (__force blk_opf_t)(1ULL << __REQ_NOWAIT) +#define REQ_CGROUP_PUNT (__force blk_opf_t)(1ULL << __REQ_CGROUP_PUNT) + +#define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP) +#define REQ_POLLED (__force blk_opf_t)(1ULL << __REQ_POLLED) +#define REQ_ALLOC_CACHE (__force blk_opf_t)(1ULL << __REQ_ALLOC_CACHE) + +#define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV) +#define REQ_SWAP (__force blk_opf_t)(1ULL << __REQ_SWAP) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) @@ -469,22 +474,22 @@ static inline enum req_op bio_op(const struct bio *bio) } /* obsolete, don't use in new code */ -static inline void bio_set_op_attrs(struct bio *bio, unsigned op, - unsigned op_flags) +static inline void bio_set_op_attrs(struct bio *bio, enum req_op op, + blk_opf_t op_flags) { bio->bi_opf = op | op_flags; } -static inline bool op_is_write(unsigned int op) +static inline bool op_is_write(blk_opf_t op) { - return (op & 1); + return !!(op & (__force blk_opf_t)1); } /* * Check if the bio or request is one that needs special treatment in the * flush state machine. */ -static inline bool op_is_flush(unsigned int op) +static inline bool op_is_flush(blk_opf_t op) { return op & (REQ_FUA | REQ_PREFLUSH); } @@ -494,13 +499,13 @@ static inline bool op_is_flush(unsigned int op) * PREFLUSH flag. Other operations may be marked as synchronous using the * REQ_SYNC flag. */ -static inline bool op_is_sync(unsigned int op) +static inline bool op_is_sync(blk_opf_t op) { return (op & REQ_OP_MASK) == REQ_OP_READ || (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); } -static inline bool op_is_discard(unsigned int op) +static inline bool op_is_discard(blk_opf_t op) { return (op & REQ_OP_MASK) == REQ_OP_DISCARD; } -- cgit v1.2.3 From 16458cf3bd15e5624205df6e8a76b9a5363555f3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:32 -0700 Subject: block: Use the new blk_opf_t type Use the new blk_opf_t type for arguments and variables that represent request flags or a bitwise combination of a request operation and request flags. Rename the function arguments and also a structure member that hold a request operation and flags from 'rw' into 'opf'. This patch does not change any functionality. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-7-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/bio.h | 10 +++++----- include/linux/blk-mq.h | 6 +++--- include/linux/blkdev.h | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 992ee987f273..ca22b06700a9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -405,7 +405,7 @@ extern void bioset_exit(struct bio_set *); extern int biovec_init_pool(mempool_t *pool, int pool_entries); struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, - unsigned int opf, gfp_t gfp_mask, + blk_opf_t opf, gfp_t gfp_mask, struct bio_set *bs); struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask); extern void bio_put(struct bio *); @@ -418,7 +418,7 @@ int bio_init_clone(struct block_device *bdev, struct bio *bio, extern struct bio_set fs_bio_set; static inline struct bio *bio_alloc(struct block_device *bdev, - unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask) + unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp_mask) { return bio_alloc_bioset(bdev, nr_vecs, opf, gfp_mask, &fs_bio_set); } @@ -456,9 +456,9 @@ struct request_queue; extern int submit_bio_wait(struct bio *bio); void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, - unsigned short max_vecs, unsigned int opf); + unsigned short max_vecs, blk_opf_t opf); extern void bio_uninit(struct bio *); -void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf); +void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf); void bio_chain(struct bio *, struct bio *); int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off); @@ -789,6 +789,6 @@ static inline void bio_clear_polled(struct bio *bio) } struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, - unsigned int nr_pages, unsigned int opf, gfp_t gfp); + unsigned int nr_pages, blk_opf_t opf, gfp_t gfp); #endif /* __LINUX_BIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 677195de0663..effee1dc715a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -80,7 +80,7 @@ struct request { struct blk_mq_ctx *mq_ctx; struct blk_mq_hw_ctx *mq_hctx; - unsigned int cmd_flags; /* op and common flags */ + blk_opf_t cmd_flags; /* op and common flags */ req_flags_t rq_flags; int tag; @@ -715,10 +715,10 @@ enum { BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), }; -struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, +struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, blk_mq_req_flags_t flags); struct request *blk_mq_alloc_request_hctx(struct request_queue *q, - unsigned int op, blk_mq_req_flags_t flags, + blk_opf_t opf, blk_mq_req_flags_t flags, unsigned int hctx_idx); /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ca2ff113ea00..d04bdf549efa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -250,7 +250,7 @@ static inline int blk_validate_block_size(unsigned long bsize) return 0; } -static inline bool blk_op_is_passthrough(unsigned int op) +static inline bool blk_op_is_passthrough(blk_opf_t op) { op &= REQ_OP_MASK; return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; -- cgit v1.2.3 From 919dbca8670d0f7828dfbb2f9b434ac22dca8d2e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:37 -0700 Subject: blktrace: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for a function argument that represents a combination of a request operation and request flags. Rename that argument from 'op' into 'opf' to make its role more clear. Cc: Christoph Hellwig Cc: Steven Rostedt Cc: Li Zefan Cc: Chaitanya Kulkarni Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-12-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index f6f9b544365a..cfbda114348c 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -7,6 +7,7 @@ #include #include #include +#include #if defined(CONFIG_BLK_DEV_IO_TRACE) @@ -105,7 +106,7 @@ struct compat_blk_user_trace_setup { #endif -void blk_fill_rwbs(char *rwbs, unsigned int op); +void blk_fill_rwbs(char *rwbs, blk_opf_t opf); static inline sector_t blk_rq_trace_sector(struct request *rq) { -- cgit v1.2.3 From 581075e4f6475bb97c73ecccf68636a9453a31fd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:47 -0700 Subject: dm/core: Reduce the size of struct dm_io_request Combine the bi_op and bi_op_flags into the bi_opf member. Use the new blk_opf_t type to improve static type checking. This patch does not change any functionality. Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Mikulas Patocka Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-22-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/dm-io.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index a52c6580cc9a..8e1c4ab5df04 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -13,6 +13,7 @@ #ifdef __KERNEL__ #include +#include struct dm_io_region { struct block_device *bdev; @@ -57,8 +58,7 @@ struct dm_io_notify { */ struct dm_io_client; struct dm_io_request { - int bi_op; /* REQ_OP */ - int bi_op_flags; /* req_flag_bits */ + blk_opf_t bi_opf; /* Request type and flags */ struct dm_io_memory mem; /* Memory to use for io */ struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */ struct dm_io_client *client; /* Client memory handler */ -- cgit v1.2.3 From f8e6e4bd9fd8c452565f3eaeb358e3cc08d880f4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:11 -0700 Subject: mm: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for block layer request flags. Cc: Andrew Morton Cc: Christoph Hellwig Cc: Jan Kara Cc: Stefan Roesch Cc: NeilBrown Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-46-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/writeback.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index da21d63f70e2..e91bea371b18 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -101,9 +101,9 @@ struct writeback_control { #endif }; -static inline int wbc_to_write_flags(struct writeback_control *wbc) +static inline blk_opf_t wbc_to_write_flags(struct writeback_control *wbc) { - int flags = 0; + blk_opf_t flags = 0; if (wbc->punt_to_cgroup) flags = REQ_CGROUP_PUNT; -- cgit v1.2.3 From 3ae7286943ae6f6bfecfe0a3da9d1a4c64f5531f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:12 -0700 Subject: fs/buffer: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for block layer request flags. Change WRITE into REQ_OP_WRITE. This patch does not change any functionality since REQ_OP_WRITE == WRITE == 1. Reviewed-by: Jan Kara Cc: Al Viro Cc: Christoph Hellwig Cc: Matthew Wilcox Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-47-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c9d1463bb20f..9795df9400bd 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -9,6 +9,7 @@ #define _LINUX_BUFFER_HEAD_H #include +#include #include #include #include @@ -201,11 +202,11 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); -void ll_rw_block(int, int, int, struct buffer_head * bh[]); +void ll_rw_block(enum req_op, blk_opf_t, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); -int __sync_dirty_buffer(struct buffer_head *bh, int op_flags); -void write_dirty_buffer(struct buffer_head *bh, int op_flags); -int submit_bh(int, int, struct buffer_head *); +int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); +void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); +int submit_bh(enum req_op, blk_opf_t, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); -- cgit v1.2.3 From 1420c4a549bf28ffddbed827d61fb3d4d2132ddb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:13 -0700 Subject: fs/buffer: Combine two submit_bh() and ll_rw_block() arguments Both submit_bh() and ll_rw_block() accept a request operation type and request flags as their first two arguments. Micro-optimize these two functions by combining these first two arguments into a single argument. This patch does not change the behavior of any of the modified code. Cc: Alexander Viro Cc: Jan Kara Acked-by: Song Liu (for the md changes) Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-48-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 9795df9400bd..bb68eb6407da 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -202,11 +202,11 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); -void ll_rw_block(enum req_op, blk_opf_t, int, struct buffer_head * bh[]); +void ll_rw_block(blk_opf_t, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); -int submit_bh(enum req_op, blk_opf_t, struct buffer_head *); +int submit_bh(blk_opf_t, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); -- cgit v1.2.3 From 6669797b0dd41ced457760b6e1014fdda8ce19ce Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:22 -0700 Subject: fs/jbd2: Fix the documentation of the jbd2_write_superblock() callers Commit 2a222ca992c3 ("fs: have submit_bh users pass in op and flags separately") renamed the jbd2_write_superblock() 'write_op' argument into 'write_flags'. Propagate this change to the jbd2_write_superblock() callers. Additionally, change the type of 'write_flags' into blk_opf_t. Cc: Mike Christie Cc: Theodore Ts'o Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-57-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index e79d6e0b14e8..dc1724131300 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1557,7 +1557,7 @@ extern int jbd2_journal_wipe (journal_t *, int); extern int jbd2_journal_skip_recovery (journal_t *); extern void jbd2_journal_update_sb_errno(journal_t *); extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t, - unsigned long, int); + unsigned long, blk_opf_t); extern void jbd2_journal_abort (journal_t *, int); extern int jbd2_journal_errno (journal_t *); extern void jbd2_journal_ack_err (journal_t *); -- cgit v1.2.3 From b644c95598adfe2b968e97daee3a890dd2fda84d Mon Sep 17 00:00:00 2001 From: PaddyKP_Yao Date: Mon, 11 Jul 2022 19:51:25 +0800 Subject: platform/x86: asus-wmi: Add mic-mute LED classdev support In some new ASUS devices, hotkey Fn+F13 is used for mic mute. If mic-mute LED is present by checking WMI ASUS_WMI_DEVID_MICMUTE_LED, we will add a mic-mute LED classdev, asus::micmute, in the asus-wmi driver to control it. The binding of mic-mute LED controls will be swithched with LED trigger. Signed-off-by: PaddyKP_Yao Link: https://lore.kernel.org/r/20220711115125.2072508-1-PaddyKP_Yao@asus.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/platform_data/x86/asus-wmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index a571b47ff362..98f2b2f20f3e 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -49,6 +49,7 @@ #define ASUS_WMI_DEVID_LED4 0x00020014 #define ASUS_WMI_DEVID_LED5 0x00020015 #define ASUS_WMI_DEVID_LED6 0x00020016 +#define ASUS_WMI_DEVID_MICMUTE_LED 0x00040017 /* Backlight and Brightness */ #define ASUS_WMI_DEVID_ALS_ENABLE 0x00050001 /* Ambient Light Sensor */ -- cgit v1.2.3 From cd89edda4002b7fb3c0a6765c3a60a60d5b1dc16 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 14 Jul 2022 20:42:12 +0800 Subject: PCI: loongson: Add ACPI init support Loongson PCH (LS7A chipset) will be used by both MIPS-based and LoongArch- based Loongson processors. MIPS-based Loongson uses FDT, while LoongArch- based Loongson uses ACPI. Add ACPI init support for the driver in pci-loongson.c because it is currently FDT-only. LoongArch is a new RISC ISA, mainline support will come soon, and documentations are here (in translation): https://github.com/loongson/LoongArch-Documentation Link: https://lore.kernel.org/r/20220714124216.1489304-4-chenhuacai@loongson.cn Signed-off-by: Huacai Chen Signed-off-by: Bjorn Helgaas --- include/linux/pci-ecam.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index adea5a4771cf..6b1301e2498e 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -87,6 +87,7 @@ extern const struct pci_ecam_ops xgene_v1_pcie_ecam_ops; /* APM X-Gene PCIe v1 * extern const struct pci_ecam_ops xgene_v2_pcie_ecam_ops; /* APM X-Gene PCIe v2.x */ extern const struct pci_ecam_ops al_pcie_ops; /* Amazon Annapurna Labs PCIe */ extern const struct pci_ecam_ops tegra194_pcie_ops; /* Tegra194 PCIe */ +extern const struct pci_ecam_ops loongson_pci_ecam_ops; /* Loongson PCIe */ #endif #if IS_ENABLED(CONFIG_PCI_HOST_COMMON) -- cgit v1.2.3 From e2863a78593d638d3924a6f67900c4820034f349 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 1 Jul 2022 05:54:24 -0700 Subject: lib/bitmap: change return types to bool where appropriate Some bitmap functions return boolean results in int variables. Fix it by changing return types to bool. Signed-off-by: Yury Norov --- include/linux/bitmap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index c91638e507f2..e1a438bdda52 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -148,13 +148,13 @@ void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits); void bitmap_cut(unsigned long *dst, const unsigned long *src, unsigned int first, unsigned int cut, unsigned int nbits); -int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, +bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); -int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, +bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); void __bitmap_replace(unsigned long *dst, const unsigned long *old, const unsigned long *new, @@ -315,7 +315,7 @@ void bitmap_to_arr64(u64 *buf, const unsigned long *bitmap, unsigned int nbits); bitmap_copy_clear_tail((unsigned long *)(buf), (const unsigned long *)(bitmap), (nbits)) #endif -static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, +static inline bool bitmap_and(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) @@ -341,7 +341,7 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, __bitmap_xor(dst, src1, src2, nbits); } -static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1, +static inline bool bitmap_andnot(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) -- cgit v1.2.3 From 3a06ed80265fa62eecaf519d92f1633e4f9510c7 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Fri, 8 Jul 2022 17:57:14 +0800 Subject: extcon: Add EXTCON_DISP_CVBS and EXTCON_DISP_EDP Add EXTCON_DISP_CVBS for Composite Video Broadcast Signal. Add EXTCON_DISP_EDP for Embedded Display Port [1] https://en.wikipedia.org/wiki/Composite_video [2] https://en.wikipedia.org/wiki/DisplayPort#eDP Signed-off-by: Michael Wu Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 685401d94d39..3c45c3846fe9 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -76,6 +76,8 @@ #define EXTCON_DISP_VGA 43 /* Video Graphics Array */ #define EXTCON_DISP_DP 44 /* Display Port */ #define EXTCON_DISP_HMD 45 /* Head-Mounted Display */ +#define EXTCON_DISP_CVBS 46 /* Composite Video Broadcast Signal */ +#define EXTCON_DISP_EDP 47 /* Embedded Display Port */ /* Miscellaneous external connector */ #define EXTCON_DOCK 60 -- cgit v1.2.3 From 309c56e84602d894e7ca424b0b13837117568fca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 Jul 2022 10:06:13 +0200 Subject: iommu: remove the unused dev_has_feat method This method is never actually called. Signed-off-by: Christoph Hellwig Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220708080616.238833-2-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e6abd998dbe7..a3acdb46b939 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -164,8 +164,7 @@ struct iommu_iort_rmr_data { * supported, this feature must be enabled before and * disabled after %IOMMU_DEV_FEAT_SVA. * - * Device drivers query whether a feature is supported using - * iommu_dev_has_feature(), and enable it using iommu_dev_enable_feature(). + * Device drivers enable a feature using iommu_dev_enable_feature(). */ enum iommu_dev_features { IOMMU_DEV_FEAT_SVA, @@ -248,7 +247,6 @@ struct iommu_ops { bool (*is_attach_deferred)(struct device *dev); /* Per device IOMMU features */ - bool (*dev_has_feat)(struct device *dev, enum iommu_dev_features f); bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f); int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); -- cgit v1.2.3 From a871765d5588531e1972902d1ae077b8be306c94 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 Jul 2022 10:06:14 +0200 Subject: iommu: remove iommu_dev_feature_enabled Remove the unused iommu_dev_feature_enabled function. Signed-off-by: Christoph Hellwig Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220708080616.238833-3-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a3acdb46b939..0bc2eb14b026 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -215,7 +215,6 @@ struct iommu_iotlb_gather { * driver init to device driver init (default no) * @dev_has/enable/disable_feat: per device entries to check/enable/disable * iommu specific features. - * @dev_feat_enabled: check enabled feature * @sva_bind: Bind process address space to device * @sva_unbind: Unbind process address space from device * @sva_get_pasid: Get PASID associated to a SVA handle @@ -247,7 +246,6 @@ struct iommu_ops { bool (*is_attach_deferred)(struct device *dev); /* Per device IOMMU features */ - bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f); int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); @@ -670,7 +668,6 @@ void iommu_release_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); -bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f); struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, @@ -997,12 +994,6 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) return NULL; } -static inline bool -iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) -{ - return false; -} - static inline int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) { -- cgit v1.2.3 From ae3ff39a51a0f5843960487962e110339f321b0f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 Jul 2022 10:06:15 +0200 Subject: iommu: remove the put_resv_regions method All drivers that implement get_resv_regions just use generic_put_resv_regions to implement the put side. Remove the indirections and document the allocations constraints. Signed-off-by: Christoph Hellwig Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220708080616.238833-4-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0bc2eb14b026..ea30f00dc145 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -209,7 +209,6 @@ struct iommu_iotlb_gather { * group and attached to the groups domain * @device_group: find iommu group for a particular device * @get_resv_regions: Request list of reserved regions for a device - * @put_resv_regions: Free list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) @@ -240,7 +239,6 @@ struct iommu_ops { /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); - void (*put_resv_regions)(struct device *dev, struct list_head *list); int (*of_xlate)(struct device *dev, struct of_phandle_args *args); bool (*is_attach_deferred)(struct device *dev); @@ -454,8 +452,6 @@ extern void iommu_set_fault_handler(struct iommu_domain *domain, extern void iommu_get_resv_regions(struct device *dev, struct list_head *list); extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); -extern void generic_iommu_put_resv_regions(struct device *dev, - struct list_head *list); extern void iommu_set_default_passthrough(bool cmd_line); extern void iommu_set_default_translated(bool cmd_line); extern bool iommu_default_passthrough(void); -- cgit v1.2.3 From f9903555dd05a4096d8fef4eb823c0b94f710982 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:46 +0800 Subject: iommu/vt-d: Remove unnecessary exported symbol The exported symbol intel_iommu_gfx_mapped is not used anywhere in the tree. Remove it to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wahl Link: https://lore.kernel.org/r/20220514014322.2927339-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 5fcf89faa31a..4ebf3c4da45e 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -787,7 +787,6 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; extern int intel_iommu_enabled; -extern int intel_iommu_gfx_mapped; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) { -- cgit v1.2.3 From 853788b9a66ff089053df3b4ed7d166e61def449 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:49 +0800 Subject: x86/boot/tboot: Move tboot_force_iommu() to Intel IOMMU tboot_force_iommu() is only called by the Intel IOMMU driver. Move the helper into that driver. No functional change intended. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wahl Link: https://lore.kernel.org/r/20220514014322.2927339-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/tboot.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tboot.h b/include/linux/tboot.h index 5146d2574e85..d2279160ef39 100644 --- a/include/linux/tboot.h +++ b/include/linux/tboot.h @@ -126,7 +126,6 @@ extern void tboot_probe(void); extern void tboot_shutdown(u32 shutdown_type); extern struct acpi_table_header *tboot_get_dmar_table( struct acpi_table_header *dmar_tbl); -extern int tboot_force_iommu(void); #else @@ -136,7 +135,6 @@ extern int tboot_force_iommu(void); #define tboot_sleep(sleep_state, pm1a_control, pm1b_control) \ do { } while (0) #define tboot_get_dmar_table(dmar_tbl) (dmar_tbl) -#define tboot_force_iommu() 0 #endif /* !CONFIG_INTEL_TXT */ -- cgit v1.2.3 From 2585a2790e7fdb3dadfe309c9220bbc03704f84f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:50 +0800 Subject: iommu/vt-d: Move include/linux/intel-iommu.h under iommu This header file is private to the Intel IOMMU driver. Move it to the driver folder. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wahl Link: https://lore.kernel.org/r/20220514014322.2927339-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 831 -------------------------------------------- 1 file changed, 831 deletions(-) delete mode 100644 include/linux/intel-iommu.h (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h deleted file mode 100644 index 4ebf3c4da45e..000000000000 --- a/include/linux/intel-iommu.h +++ /dev/null @@ -1,831 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright © 2006-2015, Intel Corporation. - * - * Authors: Ashok Raj - * Anil S Keshavamurthy - * David Woodhouse - */ - -#ifndef _INTEL_IOMMU_H_ -#define _INTEL_IOMMU_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -/* - * VT-d hardware uses 4KiB page size regardless of host page size. - */ -#define VTD_PAGE_SHIFT (12) -#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT) -#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) -#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) - -#define VTD_STRIDE_SHIFT (9) -#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) - -#define DMA_PTE_READ BIT_ULL(0) -#define DMA_PTE_WRITE BIT_ULL(1) -#define DMA_PTE_LARGE_PAGE BIT_ULL(7) -#define DMA_PTE_SNP BIT_ULL(11) - -#define DMA_FL_PTE_PRESENT BIT_ULL(0) -#define DMA_FL_PTE_US BIT_ULL(2) -#define DMA_FL_PTE_ACCESS BIT_ULL(5) -#define DMA_FL_PTE_DIRTY BIT_ULL(6) -#define DMA_FL_PTE_XD BIT_ULL(63) - -#define ADDR_WIDTH_5LEVEL (57) -#define ADDR_WIDTH_4LEVEL (48) - -#define CONTEXT_TT_MULTI_LEVEL 0 -#define CONTEXT_TT_DEV_IOTLB 1 -#define CONTEXT_TT_PASS_THROUGH 2 -#define CONTEXT_PASIDE BIT_ULL(3) - -/* - * Intel IOMMU register specification per version 1.0 public spec. - */ -#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ -#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ -#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ -#define DMAR_GCMD_REG 0x18 /* Global command register */ -#define DMAR_GSTS_REG 0x1c /* Global status register */ -#define DMAR_RTADDR_REG 0x20 /* Root entry table */ -#define DMAR_CCMD_REG 0x28 /* Context command reg */ -#define DMAR_FSTS_REG 0x34 /* Fault Status register */ -#define DMAR_FECTL_REG 0x38 /* Fault control register */ -#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ -#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ -#define DMAR_FEUADDR_REG 0x44 /* Upper address register */ -#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ -#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ -#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ -#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ -#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ -#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ -#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ -#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ -#define DMAR_IQ_SHIFT 4 /* Invalidation queue head/tail shift */ -#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ -#define DMAR_ICS_REG 0x9c /* Invalidation complete status register */ -#define DMAR_IQER_REG 0xb0 /* Invalidation queue error record register */ -#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ -#define DMAR_PQH_REG 0xc0 /* Page request queue head register */ -#define DMAR_PQT_REG 0xc8 /* Page request queue tail register */ -#define DMAR_PQA_REG 0xd0 /* Page request queue address register */ -#define DMAR_PRS_REG 0xdc /* Page request status register */ -#define DMAR_PECTL_REG 0xe0 /* Page request event control register */ -#define DMAR_PEDATA_REG 0xe4 /* Page request event interrupt data register */ -#define DMAR_PEADDR_REG 0xe8 /* Page request event interrupt addr register */ -#define DMAR_PEUADDR_REG 0xec /* Page request event Upper address register */ -#define DMAR_MTRRCAP_REG 0x100 /* MTRR capability register */ -#define DMAR_MTRRDEF_REG 0x108 /* MTRR default type register */ -#define DMAR_MTRR_FIX64K_00000_REG 0x120 /* MTRR Fixed range registers */ -#define DMAR_MTRR_FIX16K_80000_REG 0x128 -#define DMAR_MTRR_FIX16K_A0000_REG 0x130 -#define DMAR_MTRR_FIX4K_C0000_REG 0x138 -#define DMAR_MTRR_FIX4K_C8000_REG 0x140 -#define DMAR_MTRR_FIX4K_D0000_REG 0x148 -#define DMAR_MTRR_FIX4K_D8000_REG 0x150 -#define DMAR_MTRR_FIX4K_E0000_REG 0x158 -#define DMAR_MTRR_FIX4K_E8000_REG 0x160 -#define DMAR_MTRR_FIX4K_F0000_REG 0x168 -#define DMAR_MTRR_FIX4K_F8000_REG 0x170 -#define DMAR_MTRR_PHYSBASE0_REG 0x180 /* MTRR Variable range registers */ -#define DMAR_MTRR_PHYSMASK0_REG 0x188 -#define DMAR_MTRR_PHYSBASE1_REG 0x190 -#define DMAR_MTRR_PHYSMASK1_REG 0x198 -#define DMAR_MTRR_PHYSBASE2_REG 0x1a0 -#define DMAR_MTRR_PHYSMASK2_REG 0x1a8 -#define DMAR_MTRR_PHYSBASE3_REG 0x1b0 -#define DMAR_MTRR_PHYSMASK3_REG 0x1b8 -#define DMAR_MTRR_PHYSBASE4_REG 0x1c0 -#define DMAR_MTRR_PHYSMASK4_REG 0x1c8 -#define DMAR_MTRR_PHYSBASE5_REG 0x1d0 -#define DMAR_MTRR_PHYSMASK5_REG 0x1d8 -#define DMAR_MTRR_PHYSBASE6_REG 0x1e0 -#define DMAR_MTRR_PHYSMASK6_REG 0x1e8 -#define DMAR_MTRR_PHYSBASE7_REG 0x1f0 -#define DMAR_MTRR_PHYSMASK7_REG 0x1f8 -#define DMAR_MTRR_PHYSBASE8_REG 0x200 -#define DMAR_MTRR_PHYSMASK8_REG 0x208 -#define DMAR_MTRR_PHYSBASE9_REG 0x210 -#define DMAR_MTRR_PHYSMASK9_REG 0x218 -#define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */ -#define DMAR_VCMD_REG 0xe00 /* Virtual command register */ -#define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */ - -#define DMAR_IQER_REG_IQEI(reg) FIELD_GET(GENMASK_ULL(3, 0), reg) -#define DMAR_IQER_REG_ITESID(reg) FIELD_GET(GENMASK_ULL(47, 32), reg) -#define DMAR_IQER_REG_ICESID(reg) FIELD_GET(GENMASK_ULL(63, 48), reg) - -#define OFFSET_STRIDE (9) - -#define dmar_readq(a) readq(a) -#define dmar_writeq(a,v) writeq(v,a) -#define dmar_readl(a) readl(a) -#define dmar_writel(a, v) writel(v, a) - -#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) -#define DMAR_VER_MINOR(v) ((v) & 0x0f) - -/* - * Decoding Capability Register - */ -#define cap_5lp_support(c) (((c) >> 60) & 1) -#define cap_pi_support(c) (((c) >> 59) & 1) -#define cap_fl1gp_support(c) (((c) >> 56) & 1) -#define cap_read_drain(c) (((c) >> 55) & 1) -#define cap_write_drain(c) (((c) >> 54) & 1) -#define cap_max_amask_val(c) (((c) >> 48) & 0x3f) -#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1) -#define cap_pgsel_inv(c) (((c) >> 39) & 1) - -#define cap_super_page_val(c) (((c) >> 34) & 0xf) -#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \ - * OFFSET_STRIDE) + 21) - -#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16) -#define cap_max_fault_reg_offset(c) \ - (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16) - -#define cap_zlr(c) (((c) >> 22) & 1) -#define cap_isoch(c) (((c) >> 23) & 1) -#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) -#define cap_sagaw(c) (((c) >> 8) & 0x1f) -#define cap_caching_mode(c) (((c) >> 7) & 1) -#define cap_phmr(c) (((c) >> 6) & 1) -#define cap_plmr(c) (((c) >> 5) & 1) -#define cap_rwbf(c) (((c) >> 4) & 1) -#define cap_afl(c) (((c) >> 3) & 1) -#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7))) -/* - * Extended Capability Register - */ - -#define ecap_rps(e) (((e) >> 49) & 0x1) -#define ecap_smpwc(e) (((e) >> 48) & 0x1) -#define ecap_flts(e) (((e) >> 47) & 0x1) -#define ecap_slts(e) (((e) >> 46) & 0x1) -#define ecap_slads(e) (((e) >> 45) & 0x1) -#define ecap_vcs(e) (((e) >> 44) & 0x1) -#define ecap_smts(e) (((e) >> 43) & 0x1) -#define ecap_dit(e) (((e) >> 41) & 0x1) -#define ecap_pds(e) (((e) >> 42) & 0x1) -#define ecap_pasid(e) (((e) >> 40) & 0x1) -#define ecap_pss(e) (((e) >> 35) & 0x1f) -#define ecap_eafs(e) (((e) >> 34) & 0x1) -#define ecap_nwfs(e) (((e) >> 33) & 0x1) -#define ecap_srs(e) (((e) >> 31) & 0x1) -#define ecap_ers(e) (((e) >> 30) & 0x1) -#define ecap_prs(e) (((e) >> 29) & 0x1) -#define ecap_broken_pasid(e) (((e) >> 28) & 0x1) -#define ecap_dis(e) (((e) >> 27) & 0x1) -#define ecap_nest(e) (((e) >> 26) & 0x1) -#define ecap_mts(e) (((e) >> 25) & 0x1) -#define ecap_ecs(e) (((e) >> 24) & 0x1) -#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) -#define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) -#define ecap_coherent(e) ((e) & 0x1) -#define ecap_qis(e) ((e) & 0x2) -#define ecap_pass_through(e) (((e) >> 6) & 0x1) -#define ecap_eim_support(e) (((e) >> 4) & 0x1) -#define ecap_ir_support(e) (((e) >> 3) & 0x1) -#define ecap_dev_iotlb_support(e) (((e) >> 2) & 0x1) -#define ecap_max_handle_mask(e) (((e) >> 20) & 0xf) -#define ecap_sc_support(e) (((e) >> 7) & 0x1) /* Snooping Control */ - -/* Virtual command interface capability */ -#define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */ - -/* IOTLB_REG */ -#define DMA_TLB_FLUSH_GRANU_OFFSET 60 -#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) -#define DMA_TLB_DSI_FLUSH (((u64)2) << 60) -#define DMA_TLB_PSI_FLUSH (((u64)3) << 60) -#define DMA_TLB_IIRG(type) ((type >> 60) & 3) -#define DMA_TLB_IAIG(val) (((val) >> 57) & 3) -#define DMA_TLB_READ_DRAIN (((u64)1) << 49) -#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) -#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) -#define DMA_TLB_IVT (((u64)1) << 63) -#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) -#define DMA_TLB_MAX_SIZE (0x3f) - -/* INVALID_DESC */ -#define DMA_CCMD_INVL_GRANU_OFFSET 61 -#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 4) -#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 4) -#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 4) -#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7) -#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6) -#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16))) -#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6) -#define DMA_ID_TLB_ADDR(addr) (addr) -#define DMA_ID_TLB_ADDR_MASK(mask) (mask) - -/* PMEN_REG */ -#define DMA_PMEN_EPM (((u32)1)<<31) -#define DMA_PMEN_PRS (((u32)1)<<0) - -/* GCMD_REG */ -#define DMA_GCMD_TE (((u32)1) << 31) -#define DMA_GCMD_SRTP (((u32)1) << 30) -#define DMA_GCMD_SFL (((u32)1) << 29) -#define DMA_GCMD_EAFL (((u32)1) << 28) -#define DMA_GCMD_WBF (((u32)1) << 27) -#define DMA_GCMD_QIE (((u32)1) << 26) -#define DMA_GCMD_SIRTP (((u32)1) << 24) -#define DMA_GCMD_IRE (((u32) 1) << 25) -#define DMA_GCMD_CFI (((u32) 1) << 23) - -/* GSTS_REG */ -#define DMA_GSTS_TES (((u32)1) << 31) -#define DMA_GSTS_RTPS (((u32)1) << 30) -#define DMA_GSTS_FLS (((u32)1) << 29) -#define DMA_GSTS_AFLS (((u32)1) << 28) -#define DMA_GSTS_WBFS (((u32)1) << 27) -#define DMA_GSTS_QIES (((u32)1) << 26) -#define DMA_GSTS_IRTPS (((u32)1) << 24) -#define DMA_GSTS_IRES (((u32)1) << 25) -#define DMA_GSTS_CFIS (((u32)1) << 23) - -/* DMA_RTADDR_REG */ -#define DMA_RTADDR_RTT (((u64)1) << 11) -#define DMA_RTADDR_SMT (((u64)1) << 10) - -/* CCMD_REG */ -#define DMA_CCMD_ICC (((u64)1) << 63) -#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) -#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61) -#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61) -#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32) -#define DMA_CCMD_MASK_NOBIT 0 -#define DMA_CCMD_MASK_1BIT 1 -#define DMA_CCMD_MASK_2BIT 2 -#define DMA_CCMD_MASK_3BIT 3 -#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) -#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) - -/* FECTL_REG */ -#define DMA_FECTL_IM (((u32)1) << 31) - -/* FSTS_REG */ -#define DMA_FSTS_PFO (1 << 0) /* Primary Fault Overflow */ -#define DMA_FSTS_PPF (1 << 1) /* Primary Pending Fault */ -#define DMA_FSTS_IQE (1 << 4) /* Invalidation Queue Error */ -#define DMA_FSTS_ICE (1 << 5) /* Invalidation Completion Error */ -#define DMA_FSTS_ITE (1 << 6) /* Invalidation Time-out Error */ -#define DMA_FSTS_PRO (1 << 7) /* Page Request Overflow */ -#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) - -/* FRCD_REG, 32 bits access */ -#define DMA_FRCD_F (((u32)1) << 31) -#define dma_frcd_type(d) ((d >> 30) & 1) -#define dma_frcd_fault_reason(c) (c & 0xff) -#define dma_frcd_source_id(c) (c & 0xffff) -#define dma_frcd_pasid_value(c) (((c) >> 8) & 0xfffff) -#define dma_frcd_pasid_present(c) (((c) >> 31) & 1) -/* low 64 bit */ -#define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT)) - -/* PRS_REG */ -#define DMA_PRS_PPR ((u32)1) -#define DMA_PRS_PRO ((u32)2) - -#define DMA_VCS_PAS ((u64)1) - -#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ -do { \ - cycles_t start_time = get_cycles(); \ - while (1) { \ - sts = op(iommu->reg + offset); \ - if (cond) \ - break; \ - if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ - panic("DMAR hardware is malfunctioning\n"); \ - cpu_relax(); \ - } \ -} while (0) - -#define QI_LENGTH 256 /* queue length */ - -enum { - QI_FREE, - QI_IN_USE, - QI_DONE, - QI_ABORT -}; - -#define QI_CC_TYPE 0x1 -#define QI_IOTLB_TYPE 0x2 -#define QI_DIOTLB_TYPE 0x3 -#define QI_IEC_TYPE 0x4 -#define QI_IWD_TYPE 0x5 -#define QI_EIOTLB_TYPE 0x6 -#define QI_PC_TYPE 0x7 -#define QI_DEIOTLB_TYPE 0x8 -#define QI_PGRP_RESP_TYPE 0x9 -#define QI_PSTRM_RESP_TYPE 0xa - -#define QI_IEC_SELECTIVE (((u64)1) << 4) -#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32)) -#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27)) - -#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32) -#define QI_IWD_STATUS_WRITE (((u64)1) << 5) -#define QI_IWD_FENCE (((u64)1) << 6) -#define QI_IWD_PRQ_DRAIN (((u64)1) << 7) - -#define QI_IOTLB_DID(did) (((u64)did) << 16) -#define QI_IOTLB_DR(dr) (((u64)dr) << 7) -#define QI_IOTLB_DW(dw) (((u64)dw) << 6) -#define QI_IOTLB_GRAN(gran) (((u64)gran) >> (DMA_TLB_FLUSH_GRANU_OFFSET-4)) -#define QI_IOTLB_ADDR(addr) (((u64)addr) & VTD_PAGE_MASK) -#define QI_IOTLB_IH(ih) (((u64)ih) << 6) -#define QI_IOTLB_AM(am) (((u8)am) & 0x3f) - -#define QI_CC_FM(fm) (((u64)fm) << 48) -#define QI_CC_SID(sid) (((u64)sid) << 32) -#define QI_CC_DID(did) (((u64)did) << 16) -#define QI_CC_GRAN(gran) (((u64)gran) >> (DMA_CCMD_INVL_GRANU_OFFSET-4)) - -#define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) -#define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16) -#define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) -#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \ - ((u64)((pfsid >> 4) & 0xfff) << 52)) -#define QI_DEV_IOTLB_SIZE 1 -#define QI_DEV_IOTLB_MAX_INVS 32 - -#define QI_PC_PASID(pasid) (((u64)pasid) << 32) -#define QI_PC_DID(did) (((u64)did) << 16) -#define QI_PC_GRAN(gran) (((u64)gran) << 4) - -/* PASID cache invalidation granu */ -#define QI_PC_ALL_PASIDS 0 -#define QI_PC_PASID_SEL 1 -#define QI_PC_GLOBAL 3 - -#define QI_EIOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) -#define QI_EIOTLB_IH(ih) (((u64)ih) << 6) -#define QI_EIOTLB_AM(am) (((u64)am) & 0x3f) -#define QI_EIOTLB_PASID(pasid) (((u64)pasid) << 32) -#define QI_EIOTLB_DID(did) (((u64)did) << 16) -#define QI_EIOTLB_GRAN(gran) (((u64)gran) << 4) - -/* QI Dev-IOTLB inv granu */ -#define QI_DEV_IOTLB_GRAN_ALL 1 -#define QI_DEV_IOTLB_GRAN_PASID_SEL 0 - -#define QI_DEV_EIOTLB_ADDR(a) ((u64)(a) & VTD_PAGE_MASK) -#define QI_DEV_EIOTLB_SIZE (((u64)1) << 11) -#define QI_DEV_EIOTLB_PASID(p) ((u64)((p) & 0xfffff) << 32) -#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16) -#define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4) -#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \ - ((u64)((pfsid >> 4) & 0xfff) << 52)) -#define QI_DEV_EIOTLB_MAX_INVS 32 - -/* Page group response descriptor QW0 */ -#define QI_PGRP_PASID_P(p) (((u64)(p)) << 4) -#define QI_PGRP_PDP(p) (((u64)(p)) << 5) -#define QI_PGRP_RESP_CODE(res) (((u64)(res)) << 12) -#define QI_PGRP_DID(rid) (((u64)(rid)) << 16) -#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32) - -/* Page group response descriptor QW1 */ -#define QI_PGRP_LPIG(x) (((u64)(x)) << 2) -#define QI_PGRP_IDX(idx) (((u64)(idx)) << 3) - - -#define QI_RESP_SUCCESS 0x0 -#define QI_RESP_INVALID 0x1 -#define QI_RESP_FAILURE 0xf - -#define QI_GRAN_NONG_PASID 2 -#define QI_GRAN_PSI_PASID 3 - -#define qi_shift(iommu) (DMAR_IQ_SHIFT + !!ecap_smts((iommu)->ecap)) - -struct qi_desc { - u64 qw0; - u64 qw1; - u64 qw2; - u64 qw3; -}; - -struct q_inval { - raw_spinlock_t q_lock; - void *desc; /* invalidation queue */ - int *desc_status; /* desc status */ - int free_head; /* first free entry */ - int free_tail; /* last free entry */ - int free_cnt; -}; - -struct dmar_pci_notify_info; - -#ifdef CONFIG_IRQ_REMAP -/* 1MB - maximum possible interrupt remapping table size */ -#define INTR_REMAP_PAGE_ORDER 8 -#define INTR_REMAP_TABLE_REG_SIZE 0xf -#define INTR_REMAP_TABLE_REG_SIZE_MASK 0xf - -#define INTR_REMAP_TABLE_ENTRIES 65536 - -struct irq_domain; - -struct ir_table { - struct irte *base; - unsigned long *bitmap; -}; - -void intel_irq_remap_add_device(struct dmar_pci_notify_info *info); -#else -static inline void -intel_irq_remap_add_device(struct dmar_pci_notify_info *info) { } -#endif - -struct iommu_flush { - void (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid, - u8 fm, u64 type); - void (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type); -}; - -enum { - SR_DMAR_FECTL_REG, - SR_DMAR_FEDATA_REG, - SR_DMAR_FEADDR_REG, - SR_DMAR_FEUADDR_REG, - MAX_SR_DMAR_REGS -}; - -#define VTD_FLAG_TRANS_PRE_ENABLED (1 << 0) -#define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1) -#define VTD_FLAG_SVM_CAPABLE (1 << 2) - -extern int intel_iommu_sm; -extern spinlock_t device_domain_lock; - -#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) -#define pasid_supported(iommu) (sm_supported(iommu) && \ - ecap_pasid((iommu)->ecap)) - -struct pasid_entry; -struct pasid_state_entry; -struct page_req_dsc; - -/* - * 0: Present - * 1-11: Reserved - * 12-63: Context Ptr (12 - (haw-1)) - * 64-127: Reserved - */ -struct root_entry { - u64 lo; - u64 hi; -}; - -/* - * low 64 bits: - * 0: present - * 1: fault processing disable - * 2-3: translation type - * 12-63: address space root - * high 64 bits: - * 0-2: address width - * 3-6: aval - * 8-23: domain id - */ -struct context_entry { - u64 lo; - u64 hi; -}; - -/* - * When VT-d works in the scalable mode, it allows DMA translation to - * happen through either first level or second level page table. This - * bit marks that the DMA translation for the domain goes through the - * first level page table, otherwise, it goes through the second level. - */ -#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(1) - -struct dmar_domain { - int nid; /* node id */ - - unsigned int iommu_refcnt[DMAR_UNITS_SUPPORTED]; - /* Refcount of devices per iommu */ - - - u16 iommu_did[DMAR_UNITS_SUPPORTED]; - /* Domain ids per IOMMU. Use u16 since - * domain ids are 16 bit wide according - * to VT-d spec, section 9.3 */ - - u8 has_iotlb_device: 1; - u8 iommu_coherency: 1; /* indicate coherency of iommu access */ - u8 force_snooping : 1; /* Create IOPTEs with snoop control */ - u8 set_pte_snp:1; - - struct list_head devices; /* all devices' list */ - struct iova_domain iovad; /* iova's that belong to this domain */ - - struct dma_pte *pgd; /* virtual address */ - int gaw; /* max guest address width */ - - /* adjusted guest address width, 0 is level 2 30-bit */ - int agaw; - - int flags; /* flags to find out type of domain */ - int iommu_superpage;/* Level of superpages supported: - 0 == 4KiB (no superpages), 1 == 2MiB, - 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ - u64 max_addr; /* maximum mapped address */ - - struct iommu_domain domain; /* generic domain data structure for - iommu core */ -}; - -struct intel_iommu { - void __iomem *reg; /* Pointer to hardware regs, virtual addr */ - u64 reg_phys; /* physical address of hw register set */ - u64 reg_size; /* size of hw register set */ - u64 cap; - u64 ecap; - u64 vccap; - u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ - raw_spinlock_t register_lock; /* protect register handling */ - int seq_id; /* sequence id of the iommu */ - int agaw; /* agaw of this iommu */ - int msagaw; /* max sagaw of this iommu */ - unsigned int irq, pr_irq; - u16 segment; /* PCI segment# */ - unsigned char name[13]; /* Device Name */ - -#ifdef CONFIG_INTEL_IOMMU - unsigned long *domain_ids; /* bitmap of domains */ - spinlock_t lock; /* protect context, domain ids */ - struct root_entry *root_entry; /* virtual address */ - - struct iommu_flush flush; -#endif -#ifdef CONFIG_INTEL_IOMMU_SVM - struct page_req_dsc *prq; - unsigned char prq_name[16]; /* Name for PRQ interrupt */ - struct completion prq_complete; - struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */ -#endif - struct iopf_queue *iopf_queue; - unsigned char iopfq_name[16]; - struct q_inval *qi; /* Queued invalidation info */ - u32 *iommu_state; /* Store iommu states between suspend and resume.*/ - -#ifdef CONFIG_IRQ_REMAP - struct ir_table *ir_table; /* Interrupt remapping info */ - struct irq_domain *ir_domain; - struct irq_domain *ir_msi_domain; -#endif - struct iommu_device iommu; /* IOMMU core code handle */ - int node; - u32 flags; /* Software defined flags */ - - struct dmar_drhd_unit *drhd; - void *perf_statistic; -}; - -/* PCI domain-device relationship */ -struct device_domain_info { - struct list_head link; /* link to domain siblings */ - struct list_head global; /* link to global list */ - u32 segment; /* PCI segment number */ - u8 bus; /* PCI bus number */ - u8 devfn; /* PCI devfn number */ - u16 pfsid; /* SRIOV physical function source ID */ - u8 pasid_supported:3; - u8 pasid_enabled:1; - u8 pri_supported:1; - u8 pri_enabled:1; - u8 ats_supported:1; - u8 ats_enabled:1; - u8 ats_qdep; - struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ - struct intel_iommu *iommu; /* IOMMU used by this device */ - struct dmar_domain *domain; /* pointer to domain */ - struct pasid_table *pasid_table; /* pasid table */ -}; - -static inline void __iommu_flush_cache( - struct intel_iommu *iommu, void *addr, int size) -{ - if (!ecap_coherent(iommu->ecap)) - clflush_cache_range(addr, size); -} - -/* Convert generic struct iommu_domain to private struct dmar_domain */ -static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) -{ - return container_of(dom, struct dmar_domain, domain); -} - -/* - * 0: readable - * 1: writable - * 2-6: reserved - * 7: super page - * 8-10: available - * 11: snoop behavior - * 12-63: Host physical address - */ -struct dma_pte { - u64 val; -}; - -static inline void dma_clear_pte(struct dma_pte *pte) -{ - pte->val = 0; -} - -static inline u64 dma_pte_addr(struct dma_pte *pte) -{ -#ifdef CONFIG_64BIT - return pte->val & VTD_PAGE_MASK & (~DMA_FL_PTE_XD); -#else - /* Must have a full atomic 64-bit read */ - return __cmpxchg64(&pte->val, 0ULL, 0ULL) & - VTD_PAGE_MASK & (~DMA_FL_PTE_XD); -#endif -} - -static inline bool dma_pte_present(struct dma_pte *pte) -{ - return (pte->val & 3) != 0; -} - -static inline bool dma_pte_superpage(struct dma_pte *pte) -{ - return (pte->val & DMA_PTE_LARGE_PAGE); -} - -static inline bool first_pte_in_page(struct dma_pte *pte) -{ - return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE); -} - -static inline int nr_pte_to_next_page(struct dma_pte *pte) -{ - return first_pte_in_page(pte) ? BIT_ULL(VTD_STRIDE_SHIFT) : - (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte; -} - -extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); - -extern int dmar_enable_qi(struct intel_iommu *iommu); -extern void dmar_disable_qi(struct intel_iommu *iommu); -extern int dmar_reenable_qi(struct intel_iommu *iommu); -extern void qi_global_iec(struct intel_iommu *iommu); - -extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, - u8 fm, u64 type); -extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type); -extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, - u16 qdep, u64 addr, unsigned mask); - -void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, - unsigned long npages, bool ih); - -void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, - u32 pasid, u16 qdep, u64 addr, - unsigned int size_order); -void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, - u32 pasid); - -int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, - unsigned int count, unsigned long options); -/* - * Options used in qi_submit_sync: - * QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8. - */ -#define QI_OPT_WAIT_DRAIN BIT(0) - -extern int dmar_ir_support(void); - -void *alloc_pgtable_page(int node); -void free_pgtable_page(void *vaddr); -struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); -void iommu_flush_write_buffer(struct intel_iommu *iommu); -int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); -struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); - -#ifdef CONFIG_INTEL_IOMMU_SVM -extern void intel_svm_check(struct intel_iommu *iommu); -extern int intel_svm_enable_prq(struct intel_iommu *iommu); -extern int intel_svm_finish_prq(struct intel_iommu *iommu); -struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, - void *drvdata); -void intel_svm_unbind(struct iommu_sva *handle); -u32 intel_svm_get_pasid(struct iommu_sva *handle); -int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, - struct iommu_page_response *msg); - -struct intel_svm_dev { - struct list_head list; - struct rcu_head rcu; - struct device *dev; - struct intel_iommu *iommu; - struct iommu_sva sva; - unsigned long prq_seq_number; - u32 pasid; - int users; - u16 did; - u16 dev_iotlb:1; - u16 sid, qdep; -}; - -struct intel_svm { - struct mmu_notifier notifier; - struct mm_struct *mm; - - unsigned int flags; - u32 pasid; - struct list_head devs; -}; -#else -static inline void intel_svm_check(struct intel_iommu *iommu) {} -#endif - -#ifdef CONFIG_INTEL_IOMMU_DEBUGFS -void intel_iommu_debugfs_init(void); -#else -static inline void intel_iommu_debugfs_init(void) {} -#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */ - -extern const struct attribute_group *intel_iommu_groups[]; -bool context_present(struct context_entry *context); -struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, - u8 devfn, int alloc); - -extern const struct iommu_ops intel_iommu_ops; - -#ifdef CONFIG_INTEL_IOMMU -extern int iommu_calculate_agaw(struct intel_iommu *iommu); -extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); -extern int dmar_disabled; -extern int intel_iommu_enabled; -#else -static inline int iommu_calculate_agaw(struct intel_iommu *iommu) -{ - return 0; -} -static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) -{ - return 0; -} -#define dmar_disabled (1) -#define intel_iommu_enabled (0) -#endif - -static inline const char *decode_prq_descriptor(char *str, size_t size, - u64 dw0, u64 dw1, u64 dw2, u64 dw3) -{ - char *buf = str; - int bytes; - - bytes = snprintf(buf, size, - "rid=0x%llx addr=0x%llx %c%c%c%c%c pasid=0x%llx index=0x%llx", - FIELD_GET(GENMASK_ULL(31, 16), dw0), - FIELD_GET(GENMASK_ULL(63, 12), dw1), - dw1 & BIT_ULL(0) ? 'r' : '-', - dw1 & BIT_ULL(1) ? 'w' : '-', - dw0 & BIT_ULL(52) ? 'x' : '-', - dw0 & BIT_ULL(53) ? 'p' : '-', - dw1 & BIT_ULL(2) ? 'l' : '-', - FIELD_GET(GENMASK_ULL(51, 32), dw0), - FIELD_GET(GENMASK_ULL(11, 3), dw1)); - - /* Private Data */ - if (dw0 & BIT_ULL(9)) { - size -= bytes; - buf += bytes; - snprintf(buf, size, " private=0x%llx/0x%llx\n", dw2, dw3); - } - - return str; -} - -#endif -- cgit v1.2.3 From 25357900f4e67094dd09b7dcb8a5f47c3f5a159d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:09:08 +0800 Subject: iommu/vt-d: Make DMAR_UNITS_SUPPORTED default 1024 If the available hardware exceeds DMAR_UNITS_SUPPORTED (previously set to MAX_IO_APICS, or 128), it causes these messages: "DMAR: Failed to allocate seq_id", "DMAR: Parse DMAR table failure.", and "x2apic: IRQ remapping doesn't support X2APIC mode x2apic disabled"; and the system fails to boot properly. To support up to 64 sockets with 10 DMAR units each (640), make the value of DMAR_UNITS_SUPPORTED default 1024. Signed-off-by: Steve Wahl Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Steve Wahl Link: https://lore.kernel.org/linux-iommu/20220615183650.32075-1-steve.wahl@hpe.com/ Link: https://lore.kernel.org/r/20220702015610.2849494-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/dmar.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index cbd714a198a0..d81a51978d01 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -18,11 +18,7 @@ struct acpi_dmar_header; -#ifdef CONFIG_X86 -# define DMAR_UNITS_SUPPORTED MAX_IO_APICS -#else -# define DMAR_UNITS_SUPPORTED 64 -#endif +#define DMAR_UNITS_SUPPORTED 1024 /* DMAR Flags */ #define DMAR_INTR_REMAP 0x1 -- cgit v1.2.3 From fb2accadaa9427a374fa9f482ea24ca731f675ba Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 13 Jul 2022 17:56:48 -0500 Subject: iommu/amd: Introduce function to check and enable SNP To support SNP, IOMMU needs to be enabled, and prohibits IOMMU configurations where DTE[Mode]=0, which means it cannot be supported with IOMMU passthrough domain (a.k.a IOMMU_DOMAIN_IDENTITY), and when AMD IOMMU driver is configured to not use the IOMMU host (v1) page table. Otherwise, RMP table initialization could cause the system to crash. The request to enable SNP support in IOMMU must be done before PCI initialization state of the IOMMU driver because enabling SNP affects how IOMMU driver sets up IOMMU data structures (i.e. DTE). Unlike other IOMMU features, SNP feature does not have an enable bit in the IOMMU control register. Instead, the IOMMU driver introduces an amd_iommu_snp_en variable to track enabling state of SNP. Introduce amd_iommu_snp_enable() for other drivers to request enabling the SNP support in IOMMU, which checks all prerequisites and determines if the feature can be safely enabled. Please see the IOMMU spec section 2.12 for further details. Reviewed-by: Robin Murphy Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Brijesh Singh Link: https://lore.kernel.org/r/20220713225651.20758-7-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- include/linux/amd-iommu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 58e6c3806c09..953e6f12fa1c 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -206,4 +206,8 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value); struct amd_iommu *get_amd_iommu(unsigned int idx); +#ifdef CONFIG_AMD_MEM_ENCRYPT +int amd_iommu_snp_enable(void); +#endif + #endif /* _ASM_X86_AMD_IOMMU_H */ -- cgit v1.2.3 From 1e0b3b0b6cb5e04bcb25fbe4164180d64e3ace07 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 27 Apr 2022 18:02:10 +0300 Subject: wifi: mac80211: Align with Draft P802.11be_D1.5 Align the mac80211 implementation with P802.11be_D1.5. Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 52 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f386f9ed41f3..e75c73ca11ec 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2046,25 +2046,38 @@ struct ieee80211_eht_cap_elem { u8 optional[]; } __packed; +#define IEEE80211_EHT_OPER_INFO_PRESENT 0x1 +#define IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT 0x2 + /** * struct ieee80211_eht_operation - eht operation element * * This structure is the "EHT Operation Element" fields as - * described in P802.11be_D1.4 section 9.4.2.311 + * described in P802.11be_D1.5 section 9.4.2.311 * - * FIXME: The spec is unclear how big the fields are, and doesn't - * indicate the "Disabled Subchannel Bitmap Present" in the - * structure (Figure 9-1002a) at all ... + * @params: EHT operation element parameters. See &IEEE80211_EHT_OPER_* + * @optional: optional parts */ struct ieee80211_eht_operation { - u8 chan_width; - u8 ccfs; - u8 present_bm; - - u8 disable_subchannel_bitmap[]; + u8 params; + u8 optional[]; } __packed; -#define IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT 0x1 +/** + * struct ieee80211_eht_operation_info - eht operation information + * + * @control: EHT operation information control. + * @ccfs0: defines a channel center frequency for a 20, 40, 80, 160, or 320 MHz + * EHT BSS. + * @ccfs1: defines a channel center frequency for a 160 or 320 MHz EHT BSS. + * @optional: optional parts + */ +struct ieee80211_eht_operation_info { + u8 control; + u8 ccfs0; + u8 ccfs1; + u8 optional[]; +} __packed; /* 802.11ac VHT Capabilities */ #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 0x00000000 @@ -2773,10 +2786,12 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2 0x08 #define IEEE80211_EHT_MAC_CAP0_RESTRICTED_TWT 0x10 #define IEEE80211_EHT_MAC_CAP0_SCS_TRAFFIC_DESC 0x20 -#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_MASK 0xc0 -#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_3895 0 -#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_7991 1 -#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_11454 2 +#define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK 0xc0 +#define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_3895 0 +#define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_7991 1 +#define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454 2 + +#define IEEE80211_EHT_MAC_CAP1_MAX_AMPDU_LEN_MASK 0x01 /* EHT PHY capabilities as defined in P802.11be_D1.4 section 9.4.2.313.3 */ #define IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ 0x02 @@ -2949,8 +2964,13 @@ ieee80211_eht_oper_size_ok(const u8 *data, u8 len) if (len < needed) return false; - if (elem->present_bm & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT) - needed += 2; + if (elem->params & IEEE80211_EHT_OPER_INFO_PRESENT) { + needed += 3; + + if (elem->params & + IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT) + needed += 2; + } return len >= needed; } -- cgit v1.2.3 From 062e8e02dfd43c94b0d601c071e8a5c50bed830e Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 1 Jun 2022 17:43:34 +0300 Subject: wifi: mac80211: Align with Draft P802.11be_D2.0 Align the mac80211 implementation with P802.11be_D2.0. Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e75c73ca11ec..cca564372d16 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2020,7 +2020,7 @@ struct ieee80211_eht_mcs_nss_supp_bw { * struct ieee80211_eht_cap_elem_fixed - EHT capabilities fixed data * * This structure is the "EHT Capabilities element" fixed fields as - * described in P802.11be_D1.4 section 9.4.2.313. + * described in P802.11be_D2.0 section 9.4.2.313. * * @mac_cap_info: MAC capabilities, see IEEE80211_EHT_MAC_CAP* * @phy_cap_info: PHY capabilities, see IEEE80211_EHT_PHY_CAP* @@ -2046,20 +2046,27 @@ struct ieee80211_eht_cap_elem { u8 optional[]; } __packed; -#define IEEE80211_EHT_OPER_INFO_PRESENT 0x1 -#define IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT 0x2 +#define IEEE80211_EHT_OPER_INFO_PRESENT 0x01 +#define IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT 0x02 +#define IEEE80211_EHT_OPER_EHT_DEF_PE_DURATION 0x04 +#define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_LIMIT 0x08 +#define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_EXP_MASK 0x30 /** * struct ieee80211_eht_operation - eht operation element * * This structure is the "EHT Operation Element" fields as - * described in P802.11be_D1.5 section 9.4.2.311 + * described in P802.11be_D2.0 section 9.4.2.311 * * @params: EHT operation element parameters. See &IEEE80211_EHT_OPER_* + * @basic_mcs_nss: indicates the EHT-MCSs for each number of spatial streams in + * EHT PPDUs that are supported by all EHT STAs in the BSS in transmit and + * receive. * @optional: optional parts */ struct ieee80211_eht_operation { u8 params; + __le32 basic_mcs_nss; u8 optional[]; } __packed; @@ -2779,8 +2786,8 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define S1G_OPER_CH_WIDTH_PRIMARY_1MHZ BIT(0) #define S1G_OPER_CH_WIDTH_OPER GENMASK(4, 1) -/* EHT MAC capabilities as defined in P802.11be_D1.4 section 9.4.2.313.2 */ -#define IEEE80211_EHT_MAC_CAP0_NSEP_PRIO_ACCESS 0x01 +/* EHT MAC capabilities as defined in P802.11be_D2.0 section 9.4.2.313.2 */ +#define IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS 0x01 #define IEEE80211_EHT_MAC_CAP0_OM_CONTROL 0x02 #define IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 0x04 #define IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2 0x08 @@ -2793,7 +2800,7 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define IEEE80211_EHT_MAC_CAP1_MAX_AMPDU_LEN_MASK 0x01 -/* EHT PHY capabilities as defined in P802.11be_D1.4 section 9.4.2.313.3 */ +/* EHT PHY capabilities as defined in P802.11be_D2.0 section 9.4.2.313.3 */ #define IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ 0x02 #define IEEE80211_EHT_PHY_CAP0_242_TONE_RU_GT20MHZ 0x04 #define IEEE80211_EHT_PHY_CAP0_NDP_4_EHT_LFT_32_GI 0x08 @@ -2858,7 +2865,7 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define IEEE80211_EHT_PHY_CAP8_RX_4096QAM_WIDER_BW_DL_OFDMA 0x02 /* - * EHT operation channel width as defined in P802.11be_D1.4 section 9.4.2.311 + * EHT operation channel width as defined in P802.11be_D2.0 section 9.4.2.311 */ #define IEEE80211_EHT_OPER_CHAN_WIDTH 0x7 #define IEEE80211_EHT_OPER_CHAN_WIDTH_20MHZ 0 -- cgit v1.2.3 From 93064e15c8a3a8394319a11b8037666e4b7d653d Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Thu, 7 Jul 2022 16:10:36 +0100 Subject: ACPI: utils: Add api to read _SUB from ACPI Add a wrapper function to read the _SUB string from ACPI. Signed-off-by: Stefan Binding Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20220707151037.3901050-2-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/acpi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4f82a5bc6d98..968a187f14f0 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -762,6 +762,7 @@ static inline u64 acpi_arch_get_root_pointer(void) #endif int acpi_get_local_address(acpi_handle handle, u32 *addr); +const char *acpi_get_subsystem_id(acpi_handle handle); #else /* !CONFIG_ACPI */ @@ -1023,6 +1024,11 @@ static inline int acpi_get_local_address(acpi_handle handle, u32 *addr) return -ENODEV; } +static inline const char *acpi_get_subsystem_id(acpi_handle handle) +{ + return ERR_PTR(-ENODEV); +} + static inline int acpi_register_wakeup_handler(int wake_irq, bool (*wakeup)(void *context), void *context) { -- cgit v1.2.3 From 28a6ed8e39f77f6ac613ec9b7461aa75e85fa79a Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Mon, 11 Jul 2022 07:22:57 +0000 Subject: platform/chrome: Add Type-C mux set command definitions Copy EC header definitions for the USB Type-C Mux control command from the EC code base. Also pull in "TBT_UFP_REPLY" definitions, since that is the prior entry in the enum. These headers are already present in the EC code base. [1] [1] https://chromium.googlesource.com/chromiumos/platform/ec/+/b80f85a94a423273c1638ef7b662c56931a138dd/include/ec_commands.h Signed-off-by: Prashant Malani Link: https://lore.kernel.org/r/20220711072333.2064341-4-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/cros_ec_commands.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 8cfa8cfca77e..a3945c5e7f50 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -5722,8 +5722,21 @@ enum typec_control_command { TYPEC_CONTROL_COMMAND_EXIT_MODES, TYPEC_CONTROL_COMMAND_CLEAR_EVENTS, TYPEC_CONTROL_COMMAND_ENTER_MODE, + TYPEC_CONTROL_COMMAND_TBT_UFP_REPLY, + TYPEC_CONTROL_COMMAND_USB_MUX_SET, }; +/* Replies the AP may specify to the TBT EnterMode command as a UFP */ +enum typec_tbt_ufp_reply { + TYPEC_TBT_UFP_REPLY_NAK, + TYPEC_TBT_UFP_REPLY_ACK, +}; + +struct typec_usb_mux_set { + uint8_t mux_index; /* Index of the mux to set in the chain */ + uint8_t mux_flags; /* USB_PD_MUX_*-encoded USB mux state to set */ +} __ec_align1; + struct ec_params_typec_control { uint8_t port; uint8_t command; /* enum typec_control_command */ @@ -5737,6 +5750,8 @@ struct ec_params_typec_control { union { uint32_t clear_events_mask; uint8_t mode_to_enter; /* enum typec_mode */ + uint8_t tbt_ufp_reply; /* enum typec_tbt_ufp_reply */ + struct typec_usb_mux_set mux_params; uint8_t placeholder[128]; }; } __ec_align1; @@ -5815,6 +5830,9 @@ enum tcpc_cc_polarity { #define PD_STATUS_EVENT_SOP_DISC_DONE BIT(0) #define PD_STATUS_EVENT_SOP_PRIME_DISC_DONE BIT(1) #define PD_STATUS_EVENT_HARD_RESET BIT(2) +#define PD_STATUS_EVENT_DISCONNECTED BIT(3) +#define PD_STATUS_EVENT_MUX_0_SET_DONE BIT(4) +#define PD_STATUS_EVENT_MUX_1_SET_DONE BIT(5) struct ec_params_typec_status { uint8_t port; -- cgit v1.2.3 From 4dea97f8636d0514befc9fc5cf342b351b7d0e20 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 1 Jul 2022 05:54:25 -0700 Subject: lib/bitmap: change type of bitmap_weight to unsigned long bitmap_weight() doesn't return negative values, so change it's type to unsigned long. It may help compiler to generate better code and catch bugs. Signed-off-by: Yury Norov --- include/linux/bitmap.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index e1a438bdda52..035d4ac66641 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -163,7 +163,7 @@ bool __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); bool __bitmap_subset(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); -int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); +unsigned long __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); void __bitmap_set(unsigned long *map, unsigned int start, int len); void __bitmap_clear(unsigned long *map, unsigned int start, int len); @@ -431,7 +431,8 @@ static inline bool bitmap_full(const unsigned long *src, unsigned int nbits) return find_first_zero_bit(src, nbits) == nbits; } -static __always_inline int bitmap_weight(const unsigned long *src, unsigned int nbits) +static __always_inline +unsigned long bitmap_weight(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); -- cgit v1.2.3 From cb32c285cc10e428589194e30233d673e7c23c72 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 1 Jul 2022 05:54:26 -0700 Subject: cpumask: change return types to bool where appropriate Some cpumask functions have integer return types where return values are naturally booleans. Signed-off-by: Yury Norov --- include/linux/cpumask.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index fe29ac7cc469..b54e27d9da6b 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -372,9 +372,9 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns 1 if @cpu is set in @cpumask, else returns 0 + * Returns true if @cpu is set in @cpumask, else returns false */ -static __always_inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask) +static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask) { return test_bit(cpumask_check(cpu), cpumask_bits((cpumask))); } @@ -384,11 +384,11 @@ static __always_inline int cpumask_test_cpu(int cpu, const struct cpumask *cpuma * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns 1 if @cpu is set in old bitmap of @cpumask, else returns 0 + * Returns true if @cpu is set in old bitmap of @cpumask, else returns false * * test_and_set_bit wrapper for cpumasks. */ -static __always_inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) +static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } @@ -398,11 +398,11 @@ static __always_inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpu * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns 1 if @cpu is set in old bitmap of @cpumask, else returns 0 + * Returns true if @cpu is set in old bitmap of @cpumask, else returns false * * test_and_clear_bit wrapper for cpumasks. */ -static __always_inline int cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) +static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) { return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } @@ -431,9 +431,9 @@ static inline void cpumask_clear(struct cpumask *dstp) * @src1p: the first input * @src2p: the second input * - * If *@dstp is empty, returns 0, else returns 1 + * If *@dstp is empty, returns false, else returns true */ -static inline int cpumask_and(struct cpumask *dstp, +static inline bool cpumask_and(struct cpumask *dstp, const struct cpumask *src1p, const struct cpumask *src2p) { @@ -474,9 +474,9 @@ static inline void cpumask_xor(struct cpumask *dstp, * @src1p: the first input * @src2p: the second input * - * If *@dstp is empty, returns 0, else returns 1 + * If *@dstp is empty, returns false, else returns true */ -static inline int cpumask_andnot(struct cpumask *dstp, +static inline bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p, const struct cpumask *src2p) { @@ -539,9 +539,9 @@ static inline bool cpumask_intersects(const struct cpumask *src1p, * @src1p: the first input * @src2p: the second input * - * Returns 1 if *@src1p is a subset of *@src2p, else returns 0 + * Returns true if *@src1p is a subset of *@src2p, else returns false */ -static inline int cpumask_subset(const struct cpumask *src1p, +static inline bool cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p), -- cgit v1.2.3 From 8b6b795d9bfc031a8953c40fac8d3cf67e1a4d3d Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 1 Jul 2022 05:54:27 -0700 Subject: lib/cpumask: change return types to unsigned where appropriate Switch return types to unsigned int where return values cannot be negative. Signed-off-by: Yury Norov --- include/linux/cpumask.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index b54e27d9da6b..760022bcb925 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -176,12 +176,12 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node) return 0; } -static inline int cpumask_any_and_distribute(const struct cpumask *src1p, +static inline unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p) { return cpumask_first_and(src1p, src2p); } -static inline int cpumask_any_distribute(const struct cpumask *srcp) +static inline unsigned int cpumask_any_distribute(const struct cpumask *srcp) { return cpumask_first(srcp); } @@ -258,12 +258,12 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); } -int __pure cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); -int __pure cpumask_any_but(const struct cpumask *mask, unsigned int cpu); +unsigned int __pure cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); +unsigned int __pure cpumask_any_but(const struct cpumask *mask, unsigned int cpu); unsigned int cpumask_local_spread(unsigned int i, int node); -int cpumask_any_and_distribute(const struct cpumask *src1p, +unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p); -int cpumask_any_distribute(const struct cpumask *srcp); +unsigned int cpumask_any_distribute(const struct cpumask *srcp); /** * for_each_cpu - iterate over every cpu in a mask @@ -289,7 +289,7 @@ int cpumask_any_distribute(const struct cpumask *srcp); (cpu) = cpumask_next_zero((cpu), (mask)), \ (cpu) < nr_cpu_ids;) -extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); +unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); /** * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location -- cgit v1.2.3 From 9b2e70860ef2f0d74b6d9e57929d57b14481b9c9 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 1 Jul 2022 05:54:28 -0700 Subject: lib/cpumask: move trivial wrappers around find_bit to the header To avoid circular dependencies, cpumask keeps simple (almost) one-line wrappers around find_bit() in a c-file. Commit 47d8c15615c0a2 ("include: move find.h from asm_generic to linux") moved find.h header out of asm_generic include path, and it helped to fix many circular dependencies, including some in cpumask.h. This patch moves those one-liners to header files. Signed-off-by: Yury Norov --- include/linux/cpumask.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 760022bcb925..ea3de2c2c180 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -241,7 +241,21 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp) return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits); } -unsigned int __pure cpumask_next(int n, const struct cpumask *srcp); +/** + * cpumask_next - get the next cpu in a cpumask + * @n: the cpu prior to the place to search (ie. return will be > @n) + * @srcp: the cpumask pointer + * + * Returns >= nr_cpu_ids if no further cpus set. + */ +static inline +unsigned int cpumask_next(int n, const struct cpumask *srcp) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1); +} /** * cpumask_next_zero - get the next unset cpu in a cpumask @@ -258,8 +272,25 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); } -unsigned int __pure cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); -unsigned int __pure cpumask_any_but(const struct cpumask *mask, unsigned int cpu); +/** + * cpumask_next_and - get the next cpu in *src1p & *src2p + * @n: the cpu prior to the place to search (ie. return will be > @n) + * @src1p: the first cpumask pointer + * @src2p: the second cpumask pointer + * + * Returns >= nr_cpu_ids if no further cpus set in both. + */ +static inline +unsigned int cpumask_next_and(int n, const struct cpumask *src1p, + const struct cpumask *src2p) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p), + nr_cpumask_bits, n + 1); +} + unsigned int cpumask_local_spread(unsigned int i, int node); unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p); @@ -324,6 +355,26 @@ unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, boo for ((cpu) = -1; \ (cpu) = cpumask_next_and((cpu), (mask1), (mask2)), \ (cpu) < nr_cpu_ids;) + +/** + * cpumask_any_but - return a "random" in a cpumask, but not this one. + * @mask: the cpumask to search + * @cpu: the cpu to ignore. + * + * Often used to find any cpu but smp_processor_id() in a mask. + * Returns >= nr_cpu_ids if no cpus set. + */ +static inline +unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) +{ + unsigned int i; + + cpumask_check(cpu); + for_each_cpu(i, mask) + if (i != cpu) + break; + return i; +} #endif /* SMP */ #define CPU_BITS_NONE \ -- cgit v1.2.3 From db96b0c5f9db22d908ab5f7cd75904adba4b28ca Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 10 Jun 2021 10:56:49 +0200 Subject: headers/deps: mm: Optimize header dependencies There's a couple of superfluous inclusions here - remove them before doing bigger changes. Signed-off-by: Ingo Molnar Signed-off-by: Yury Norov --- include/linux/gfp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2d2ccae933c2..52f2c873a7d4 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -2,10 +2,7 @@ #ifndef __LINUX_GFP_H #define __LINUX_GFP_H -#include #include -#include -#include #include /* The typedef is in types.h but we want the documentation here */ -- cgit v1.2.3 From cb5a065b4ea9c062a18143c8a14e831179687f54 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Apr 2022 18:42:28 +0200 Subject: headers/deps: mm: Split out of This is a much smaller header. Signed-off-by: Ingo Molnar Signed-off-by: Yury Norov --- include/linux/gfp.h | 345 +-------------------------------------------- include/linux/gfp_types.h | 348 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 350 insertions(+), 343 deletions(-) create mode 100644 include/linux/gfp_types.h (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 52f2c873a7d4..f314be58fa77 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -2,354 +2,13 @@ #ifndef __LINUX_GFP_H #define __LINUX_GFP_H +#include + #include #include -/* The typedef is in types.h but we want the documentation here */ -#if 0 -/** - * typedef gfp_t - Memory allocation flags. - * - * GFP flags are commonly used throughout Linux to indicate how memory - * should be allocated. The GFP acronym stands for get_free_pages(), - * the underlying memory allocation function. Not every GFP flag is - * supported by every function which may allocate memory. Most users - * will want to use a plain ``GFP_KERNEL``. - */ -typedef unsigned int __bitwise gfp_t; -#endif - struct vm_area_struct; -/* - * In case of changes, please don't forget to update - * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c - */ - -/* Plain integer GFP bitmasks. Do not use this directly. */ -#define ___GFP_DMA 0x01u -#define ___GFP_HIGHMEM 0x02u -#define ___GFP_DMA32 0x04u -#define ___GFP_MOVABLE 0x08u -#define ___GFP_RECLAIMABLE 0x10u -#define ___GFP_HIGH 0x20u -#define ___GFP_IO 0x40u -#define ___GFP_FS 0x80u -#define ___GFP_ZERO 0x100u -#define ___GFP_ATOMIC 0x200u -#define ___GFP_DIRECT_RECLAIM 0x400u -#define ___GFP_KSWAPD_RECLAIM 0x800u -#define ___GFP_WRITE 0x1000u -#define ___GFP_NOWARN 0x2000u -#define ___GFP_RETRY_MAYFAIL 0x4000u -#define ___GFP_NOFAIL 0x8000u -#define ___GFP_NORETRY 0x10000u -#define ___GFP_MEMALLOC 0x20000u -#define ___GFP_COMP 0x40000u -#define ___GFP_NOMEMALLOC 0x80000u -#define ___GFP_HARDWALL 0x100000u -#define ___GFP_THISNODE 0x200000u -#define ___GFP_ACCOUNT 0x400000u -#define ___GFP_ZEROTAGS 0x800000u -#ifdef CONFIG_KASAN_HW_TAGS -#define ___GFP_SKIP_ZERO 0x1000000u -#define ___GFP_SKIP_KASAN_UNPOISON 0x2000000u -#define ___GFP_SKIP_KASAN_POISON 0x4000000u -#else -#define ___GFP_SKIP_ZERO 0 -#define ___GFP_SKIP_KASAN_UNPOISON 0 -#define ___GFP_SKIP_KASAN_POISON 0 -#endif -#ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x8000000u -#else -#define ___GFP_NOLOCKDEP 0 -#endif -/* If the above are modified, __GFP_BITS_SHIFT may need updating */ - -/* - * Physical address zone modifiers (see linux/mmzone.h - low four bits) - * - * Do not put any conditional on these. If necessary modify the definitions - * without the underscores and use them consistently. The definitions here may - * be used in bit comparisons. - */ -#define __GFP_DMA ((__force gfp_t)___GFP_DMA) -#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) -#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) -#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ -#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) - -/** - * DOC: Page mobility and placement hints - * - * Page mobility and placement hints - * --------------------------------- - * - * These flags provide hints about how mobile the page is. Pages with similar - * mobility are placed within the same pageblocks to minimise problems due - * to external fragmentation. - * - * %__GFP_MOVABLE (also a zone modifier) indicates that the page can be - * moved by page migration during memory compaction or can be reclaimed. - * - * %__GFP_RECLAIMABLE is used for slab allocations that specify - * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers. - * - * %__GFP_WRITE indicates the caller intends to dirty the page. Where possible, - * these pages will be spread between local zones to avoid all the dirty - * pages being in one zone (fair zone allocation policy). - * - * %__GFP_HARDWALL enforces the cpuset memory allocation policy. - * - * %__GFP_THISNODE forces the allocation to be satisfied from the requested - * node with no fallbacks or placement policy enforcements. - * - * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. - */ -#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) -#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) -#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) -#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) -#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) - -/** - * DOC: Watermark modifiers - * - * Watermark modifiers -- controls access to emergency reserves - * ------------------------------------------------------------ - * - * %__GFP_HIGH indicates that the caller is high-priority and that granting - * the request is necessary before the system can make forward progress. - * For example, creating an IO context to clean pages. - * - * %__GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is - * high priority. Users are typically interrupt handlers. This may be - * used in conjunction with %__GFP_HIGH - * - * %__GFP_MEMALLOC allows access to all memory. This should only be used when - * the caller guarantees the allocation will allow more memory to be freed - * very shortly e.g. process exiting or swapping. Users either should - * be the MM or co-ordinating closely with the VM (e.g. swap over NFS). - * Users of this flag have to be extremely careful to not deplete the reserve - * completely and implement a throttling mechanism which controls the - * consumption of the reserve based on the amount of freed memory. - * Usage of a pre-allocated pool (e.g. mempool) should be always considered - * before using this flag. - * - * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. - * This takes precedence over the %__GFP_MEMALLOC flag if both are set. - */ -#define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) -#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) -#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) -#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) - -/** - * DOC: Reclaim modifiers - * - * Reclaim modifiers - * ----------------- - * Please note that all the following flags are only applicable to sleepable - * allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them). - * - * %__GFP_IO can start physical IO. - * - * %__GFP_FS can call down to the low-level FS. Clearing the flag avoids the - * allocator recursing into the filesystem which might already be holding - * locks. - * - * %__GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim. - * This flag can be cleared to avoid unnecessary delays when a fallback - * option is available. - * - * %__GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when - * the low watermark is reached and have it reclaim pages until the high - * watermark is reached. A caller may wish to clear this flag when fallback - * options are available and the reclaim is likely to disrupt the system. The - * canonical example is THP allocation where a fallback is cheap but - * reclaim/compaction may cause indirect stalls. - * - * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. - * - * The default allocator behavior depends on the request size. We have a concept - * of so called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER). - * !costly allocations are too essential to fail so they are implicitly - * non-failing by default (with some exceptions like OOM victims might fail so - * the caller still has to check for failures) while costly requests try to be - * not disruptive and back off even without invoking the OOM killer. - * The following three modifiers might be used to override some of these - * implicit rules - * - * %__GFP_NORETRY: The VM implementation will try only very lightweight - * memory direct reclaim to get some memory under memory pressure (thus - * it can sleep). It will avoid disruptive actions like OOM killer. The - * caller must handle the failure which is quite likely to happen under - * heavy memory pressure. The flag is suitable when failure can easily be - * handled at small cost, such as reduced throughput - * - * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim - * procedures that have previously failed if there is some indication - * that progress has been made else where. It can wait for other - * tasks to attempt high level approaches to freeing memory such as - * compaction (which removes fragmentation) and page-out. - * There is still a definite limit to the number of retries, but it is - * a larger limit than with %__GFP_NORETRY. - * Allocations with this flag may fail, but only when there is - * genuinely little unused memory. While these allocations do not - * directly trigger the OOM killer, their failure indicates that - * the system is likely to need to use the OOM killer soon. The - * caller must handle failure, but can reasonably do so by failing - * a higher-level request, or completing it only in a much less - * efficient manner. - * If the allocation does fail, and the caller is in a position to - * free some non-essential memory, doing so could benefit the system - * as a whole. - * - * %__GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller - * cannot handle allocation failures. The allocation could block - * indefinitely but will never return with failure. Testing for - * failure is pointless. - * New users should be evaluated carefully (and the flag should be - * used only when there is no reasonable failure policy) but it is - * definitely preferable to use the flag rather than opencode endless - * loop around allocator. - * Using this flag for costly allocations is _highly_ discouraged. - */ -#define __GFP_IO ((__force gfp_t)___GFP_IO) -#define __GFP_FS ((__force gfp_t)___GFP_FS) -#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ -#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ -#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) -#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL) -#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) -#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) - -/** - * DOC: Action modifiers - * - * Action modifiers - * ---------------- - * - * %__GFP_NOWARN suppresses allocation failure reports. - * - * %__GFP_COMP address compound page metadata. - * - * %__GFP_ZERO returns a zeroed page on success. - * - * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself - * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that - * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting - * memory tags at the same time as zeroing memory has minimal additional - * performace impact. - * - * %__GFP_SKIP_KASAN_UNPOISON makes KASAN skip unpoisoning on page allocation. - * Only effective in HW_TAGS mode. - * - * %__GFP_SKIP_KASAN_POISON makes KASAN skip poisoning on page deallocation. - * Typically, used for userspace pages. Only effective in HW_TAGS mode. - */ -#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) -#define __GFP_COMP ((__force gfp_t)___GFP_COMP) -#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) -#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) -#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO) -#define __GFP_SKIP_KASAN_UNPOISON ((__force gfp_t)___GFP_SKIP_KASAN_UNPOISON) -#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) - -/* Disable lockdep for GFP context tracking */ -#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) - -/* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT (27 + IS_ENABLED(CONFIG_LOCKDEP)) -#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) - -/** - * DOC: Useful GFP flag combinations - * - * Useful GFP flag combinations - * ---------------------------- - * - * Useful GFP flag combinations that are commonly used. It is recommended - * that subsystems start with one of these combinations and then set/clear - * %__GFP_FOO flags as necessary. - * - * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower - * watermark is applied to allow access to "atomic reserves". - * The current implementation doesn't support NMI and few other strict - * non-preemptive contexts (e.g. raw_spin_lock). The same applies to %GFP_NOWAIT. - * - * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires - * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim. - * - * %GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is - * accounted to kmemcg. - * - * %GFP_NOWAIT is for kernel allocations that should not stall for direct - * reclaim, start physical IO or use any filesystem callback. - * - * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages - * that do not require the starting of any physical IO. - * Please try to avoid using this flag directly and instead use - * memalloc_noio_{save,restore} to mark the whole scope which cannot - * perform any IO with a short explanation why. All allocation requests - * will inherit GFP_NOIO implicitly. - * - * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. - * Please try to avoid using this flag directly and instead use - * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't - * recurse into the FS layer with a short explanation why. All allocation - * requests will inherit GFP_NOFS implicitly. - * - * %GFP_USER is for userspace allocations that also need to be directly - * accessibly by the kernel or hardware. It is typically used by hardware - * for buffers that are mapped to userspace (e.g. graphics) that hardware - * still must DMA to. cpuset limits are enforced for these allocations. - * - * %GFP_DMA exists for historical reasons and should be avoided where possible. - * The flags indicates that the caller requires that the lowest zone be - * used (%ZONE_DMA or 16M on x86-64). Ideally, this would be removed but - * it would require careful auditing as some users really require it and - * others use the flag to avoid lowmem reserves in %ZONE_DMA and treat the - * lowest zone as a type of emergency reserve. - * - * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit - * address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory - * because the DMA32 kmalloc cache array is not implemented. - * (Reason: there is no such user in kernel). - * - * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, - * do not need to be directly accessible by the kernel but that cannot - * move once in use. An example may be a hardware allocation that maps - * data directly into userspace but has no addressing limitations. - * - * %GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not - * need direct access to but can use kmap() when access is required. They - * are expected to be movable via page reclaim or page migration. Typically, - * pages on the LRU would also be allocated with %GFP_HIGHUSER_MOVABLE. - * - * %GFP_TRANSHUGE and %GFP_TRANSHUGE_LIGHT are used for THP allocations. They - * are compound allocations that will generally fail quickly if memory is not - * available and will not wake kswapd/kcompactd on failure. The _LIGHT - * version does not attempt reclaim/compaction at all and is by default used - * in page fault path, while the non-light is used by khugepaged. - */ -#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) -#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) -#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) -#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) -#define GFP_NOIO (__GFP_RECLAIM) -#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) -#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) -#define GFP_DMA __GFP_DMA -#define GFP_DMA32 __GFP_DMA32 -#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) -#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | \ - __GFP_SKIP_KASAN_POISON) -#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ - __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) -#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) - /* Convert GFP flags to their corresponding migrate type */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) #define GFP_MOVABLE_SHIFT 3 diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h new file mode 100644 index 000000000000..06fc85cee23f --- /dev/null +++ b/include/linux/gfp_types.h @@ -0,0 +1,348 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GFP_TYPES_H +#define __LINUX_GFP_TYPES_H + +/* The typedef is in types.h but we want the documentation here */ +#if 0 +/** + * typedef gfp_t - Memory allocation flags. + * + * GFP flags are commonly used throughout Linux to indicate how memory + * should be allocated. The GFP acronym stands for get_free_pages(), + * the underlying memory allocation function. Not every GFP flag is + * supported by every function which may allocate memory. Most users + * will want to use a plain ``GFP_KERNEL``. + */ +typedef unsigned int __bitwise gfp_t; +#endif + +/* + * In case of changes, please don't forget to update + * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c + */ + +/* Plain integer GFP bitmasks. Do not use this directly. */ +#define ___GFP_DMA 0x01u +#define ___GFP_HIGHMEM 0x02u +#define ___GFP_DMA32 0x04u +#define ___GFP_MOVABLE 0x08u +#define ___GFP_RECLAIMABLE 0x10u +#define ___GFP_HIGH 0x20u +#define ___GFP_IO 0x40u +#define ___GFP_FS 0x80u +#define ___GFP_ZERO 0x100u +#define ___GFP_ATOMIC 0x200u +#define ___GFP_DIRECT_RECLAIM 0x400u +#define ___GFP_KSWAPD_RECLAIM 0x800u +#define ___GFP_WRITE 0x1000u +#define ___GFP_NOWARN 0x2000u +#define ___GFP_RETRY_MAYFAIL 0x4000u +#define ___GFP_NOFAIL 0x8000u +#define ___GFP_NORETRY 0x10000u +#define ___GFP_MEMALLOC 0x20000u +#define ___GFP_COMP 0x40000u +#define ___GFP_NOMEMALLOC 0x80000u +#define ___GFP_HARDWALL 0x100000u +#define ___GFP_THISNODE 0x200000u +#define ___GFP_ACCOUNT 0x400000u +#define ___GFP_ZEROTAGS 0x800000u +#ifdef CONFIG_KASAN_HW_TAGS +#define ___GFP_SKIP_ZERO 0x1000000u +#define ___GFP_SKIP_KASAN_UNPOISON 0x2000000u +#define ___GFP_SKIP_KASAN_POISON 0x4000000u +#else +#define ___GFP_SKIP_ZERO 0 +#define ___GFP_SKIP_KASAN_UNPOISON 0 +#define ___GFP_SKIP_KASAN_POISON 0 +#endif +#ifdef CONFIG_LOCKDEP +#define ___GFP_NOLOCKDEP 0x8000000u +#else +#define ___GFP_NOLOCKDEP 0 +#endif +/* If the above are modified, __GFP_BITS_SHIFT may need updating */ + +/* + * Physical address zone modifiers (see linux/mmzone.h - low four bits) + * + * Do not put any conditional on these. If necessary modify the definitions + * without the underscores and use them consistently. The definitions here may + * be used in bit comparisons. + */ +#define __GFP_DMA ((__force gfp_t)___GFP_DMA) +#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) +#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) +#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ +#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) + +/** + * DOC: Page mobility and placement hints + * + * Page mobility and placement hints + * --------------------------------- + * + * These flags provide hints about how mobile the page is. Pages with similar + * mobility are placed within the same pageblocks to minimise problems due + * to external fragmentation. + * + * %__GFP_MOVABLE (also a zone modifier) indicates that the page can be + * moved by page migration during memory compaction or can be reclaimed. + * + * %__GFP_RECLAIMABLE is used for slab allocations that specify + * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers. + * + * %__GFP_WRITE indicates the caller intends to dirty the page. Where possible, + * these pages will be spread between local zones to avoid all the dirty + * pages being in one zone (fair zone allocation policy). + * + * %__GFP_HARDWALL enforces the cpuset memory allocation policy. + * + * %__GFP_THISNODE forces the allocation to be satisfied from the requested + * node with no fallbacks or placement policy enforcements. + * + * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. + */ +#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) +#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) +#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) +#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) +#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) + +/** + * DOC: Watermark modifiers + * + * Watermark modifiers -- controls access to emergency reserves + * ------------------------------------------------------------ + * + * %__GFP_HIGH indicates that the caller is high-priority and that granting + * the request is necessary before the system can make forward progress. + * For example, creating an IO context to clean pages. + * + * %__GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is + * high priority. Users are typically interrupt handlers. This may be + * used in conjunction with %__GFP_HIGH + * + * %__GFP_MEMALLOC allows access to all memory. This should only be used when + * the caller guarantees the allocation will allow more memory to be freed + * very shortly e.g. process exiting or swapping. Users either should + * be the MM or co-ordinating closely with the VM (e.g. swap over NFS). + * Users of this flag have to be extremely careful to not deplete the reserve + * completely and implement a throttling mechanism which controls the + * consumption of the reserve based on the amount of freed memory. + * Usage of a pre-allocated pool (e.g. mempool) should be always considered + * before using this flag. + * + * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. + * This takes precedence over the %__GFP_MEMALLOC flag if both are set. + */ +#define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) +#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) +#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) +#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) + +/** + * DOC: Reclaim modifiers + * + * Reclaim modifiers + * ----------------- + * Please note that all the following flags are only applicable to sleepable + * allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them). + * + * %__GFP_IO can start physical IO. + * + * %__GFP_FS can call down to the low-level FS. Clearing the flag avoids the + * allocator recursing into the filesystem which might already be holding + * locks. + * + * %__GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim. + * This flag can be cleared to avoid unnecessary delays when a fallback + * option is available. + * + * %__GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when + * the low watermark is reached and have it reclaim pages until the high + * watermark is reached. A caller may wish to clear this flag when fallback + * options are available and the reclaim is likely to disrupt the system. The + * canonical example is THP allocation where a fallback is cheap but + * reclaim/compaction may cause indirect stalls. + * + * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. + * + * The default allocator behavior depends on the request size. We have a concept + * of so called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER). + * !costly allocations are too essential to fail so they are implicitly + * non-failing by default (with some exceptions like OOM victims might fail so + * the caller still has to check for failures) while costly requests try to be + * not disruptive and back off even without invoking the OOM killer. + * The following three modifiers might be used to override some of these + * implicit rules + * + * %__GFP_NORETRY: The VM implementation will try only very lightweight + * memory direct reclaim to get some memory under memory pressure (thus + * it can sleep). It will avoid disruptive actions like OOM killer. The + * caller must handle the failure which is quite likely to happen under + * heavy memory pressure. The flag is suitable when failure can easily be + * handled at small cost, such as reduced throughput + * + * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim + * procedures that have previously failed if there is some indication + * that progress has been made else where. It can wait for other + * tasks to attempt high level approaches to freeing memory such as + * compaction (which removes fragmentation) and page-out. + * There is still a definite limit to the number of retries, but it is + * a larger limit than with %__GFP_NORETRY. + * Allocations with this flag may fail, but only when there is + * genuinely little unused memory. While these allocations do not + * directly trigger the OOM killer, their failure indicates that + * the system is likely to need to use the OOM killer soon. The + * caller must handle failure, but can reasonably do so by failing + * a higher-level request, or completing it only in a much less + * efficient manner. + * If the allocation does fail, and the caller is in a position to + * free some non-essential memory, doing so could benefit the system + * as a whole. + * + * %__GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller + * cannot handle allocation failures. The allocation could block + * indefinitely but will never return with failure. Testing for + * failure is pointless. + * New users should be evaluated carefully (and the flag should be + * used only when there is no reasonable failure policy) but it is + * definitely preferable to use the flag rather than opencode endless + * loop around allocator. + * Using this flag for costly allocations is _highly_ discouraged. + */ +#define __GFP_IO ((__force gfp_t)___GFP_IO) +#define __GFP_FS ((__force gfp_t)___GFP_FS) +#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ +#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ +#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) +#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL) +#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) +#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) + +/** + * DOC: Action modifiers + * + * Action modifiers + * ---------------- + * + * %__GFP_NOWARN suppresses allocation failure reports. + * + * %__GFP_COMP address compound page metadata. + * + * %__GFP_ZERO returns a zeroed page on success. + * + * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself + * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that + * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting + * memory tags at the same time as zeroing memory has minimal additional + * performace impact. + * + * %__GFP_SKIP_KASAN_UNPOISON makes KASAN skip unpoisoning on page allocation. + * Only effective in HW_TAGS mode. + * + * %__GFP_SKIP_KASAN_POISON makes KASAN skip poisoning on page deallocation. + * Typically, used for userspace pages. Only effective in HW_TAGS mode. + */ +#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) +#define __GFP_COMP ((__force gfp_t)___GFP_COMP) +#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) +#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) +#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO) +#define __GFP_SKIP_KASAN_UNPOISON ((__force gfp_t)___GFP_SKIP_KASAN_UNPOISON) +#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) + +/* Disable lockdep for GFP context tracking */ +#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) + +/* Room for N __GFP_FOO bits */ +#define __GFP_BITS_SHIFT (27 + IS_ENABLED(CONFIG_LOCKDEP)) +#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) + +/** + * DOC: Useful GFP flag combinations + * + * Useful GFP flag combinations + * ---------------------------- + * + * Useful GFP flag combinations that are commonly used. It is recommended + * that subsystems start with one of these combinations and then set/clear + * %__GFP_FOO flags as necessary. + * + * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower + * watermark is applied to allow access to "atomic reserves". + * The current implementation doesn't support NMI and few other strict + * non-preemptive contexts (e.g. raw_spin_lock). The same applies to %GFP_NOWAIT. + * + * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires + * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim. + * + * %GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is + * accounted to kmemcg. + * + * %GFP_NOWAIT is for kernel allocations that should not stall for direct + * reclaim, start physical IO or use any filesystem callback. + * + * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages + * that do not require the starting of any physical IO. + * Please try to avoid using this flag directly and instead use + * memalloc_noio_{save,restore} to mark the whole scope which cannot + * perform any IO with a short explanation why. All allocation requests + * will inherit GFP_NOIO implicitly. + * + * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. + * Please try to avoid using this flag directly and instead use + * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't + * recurse into the FS layer with a short explanation why. All allocation + * requests will inherit GFP_NOFS implicitly. + * + * %GFP_USER is for userspace allocations that also need to be directly + * accessibly by the kernel or hardware. It is typically used by hardware + * for buffers that are mapped to userspace (e.g. graphics) that hardware + * still must DMA to. cpuset limits are enforced for these allocations. + * + * %GFP_DMA exists for historical reasons and should be avoided where possible. + * The flags indicates that the caller requires that the lowest zone be + * used (%ZONE_DMA or 16M on x86-64). Ideally, this would be removed but + * it would require careful auditing as some users really require it and + * others use the flag to avoid lowmem reserves in %ZONE_DMA and treat the + * lowest zone as a type of emergency reserve. + * + * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit + * address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory + * because the DMA32 kmalloc cache array is not implemented. + * (Reason: there is no such user in kernel). + * + * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, + * do not need to be directly accessible by the kernel but that cannot + * move once in use. An example may be a hardware allocation that maps + * data directly into userspace but has no addressing limitations. + * + * %GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not + * need direct access to but can use kmap() when access is required. They + * are expected to be movable via page reclaim or page migration. Typically, + * pages on the LRU would also be allocated with %GFP_HIGHUSER_MOVABLE. + * + * %GFP_TRANSHUGE and %GFP_TRANSHUGE_LIGHT are used for THP allocations. They + * are compound allocations that will generally fail quickly if memory is not + * available and will not wake kswapd/kcompactd on failure. The _LIGHT + * version does not attempt reclaim/compaction at all and is by default used + * in page fault path, while the non-light is used by khugepaged. + */ +#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) +#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) +#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) +#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) +#define GFP_NOIO (__GFP_RECLAIM) +#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) +#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) +#define GFP_DMA __GFP_DMA +#define GFP_DMA32 __GFP_DMA32 +#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) +#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | \ + __GFP_SKIP_KASAN_POISON) +#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ + __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) +#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) + +#endif /* __LINUX_GFP_TYPES_H */ -- cgit v1.2.3 From f0dd891dd5a1d6dc6c9d486333aac4f433f17d17 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 1 Jul 2022 05:54:30 -0700 Subject: lib/cpumask: move some one-line wrappers to header file After moving gfp flags to a separate header, it's possible to move some cpumask allocators into headers, and avoid creating real functions. Signed-off-by: Yury Norov --- include/linux/cpumask.h | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index ea3de2c2c180..80627362c774 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -12,6 +12,8 @@ #include #include #include +#include +#include /* Don't assign or return these: may not be this big! */ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; @@ -794,9 +796,35 @@ typedef struct cpumask *cpumask_var_t; #define __cpumask_var_read_mostly __read_mostly bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); -bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); -bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); -bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); + +static inline +bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) +{ + return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node); +} + +/** + * alloc_cpumask_var - allocate a struct cpumask + * @mask: pointer to cpumask_var_t where the cpumask is returned + * @flags: GFP_ flags + * + * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is + * a nop returning a constant 1 (in ). + * + * See alloc_cpumask_var_node. + */ +static inline +bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + return alloc_cpumask_var_node(mask, flags, NUMA_NO_NODE); +} + +static inline +bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + return alloc_cpumask_var(mask, flags | __GFP_ZERO); +} + void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); void free_bootmem_cpumask_var(cpumask_var_t mask); -- cgit v1.2.3 From a8755e9bdd6a416331e286cc25cddbd93b2c064a Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Fri, 15 Jul 2022 10:03:49 -0500 Subject: firmware: stratix10-svc: fix kernel-doc warning include/linux/firmware/intel/stratix10-svc-client.h:55: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Flag bit for COMMAND_RECONFIG Fixes: 4a4709d470e6 ("firmware: stratix10-svc: add new FCS commands") Signed-off-by: Dinh Nguyen Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20220715150349.2413994-1-dinguyen@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware/intel/stratix10-svc-client.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index a776a787ac75..0c16037fd08d 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -51,7 +51,8 @@ #define SVC_STATUS_ERROR 5 #define SVC_STATUS_NO_SUPPORT 6 #define SVC_STATUS_INVALID_PARAM 7 -/** + +/* * Flag bit for COMMAND_RECONFIG * * COMMAND_RECONFIG_FLAG_PARTIAL: -- cgit v1.2.3 From 7ee951acd31a88f941fd6535fbdee3a1567f1d63 Mon Sep 17 00:00:00 2001 From: Phil Auld Date: Fri, 15 Jul 2022 09:49:24 -0400 Subject: drivers/base: fix userspace break from using bin_attributes for cpumap and cpulist Using bin_attributes with a 0 size causes fstat and friends to return that 0 size. This breaks userspace code that retrieves the size before reading the file. Rather than reverting 75bd50fa841 ("drivers/base/node.c: use bin_attribute to break the size limitation of cpumap ABI") let's put in a size value at compile time. For cpulist the maximum size is on the order of NR_CPUS * (ceil(log10(NR_CPUS)) + 1)/2 which for 8192 is 20480 (8192 * 5)/2. In order to get near that you'd need a system with every other CPU on one node. For example: (0,2,4,8, ... ). To simplify the math and support larger NR_CPUS in the future we are using (NR_CPUS * 7)/2. We also set it to a min of PAGE_SIZE to retain the older behavior for smaller NR_CPUS. The cpumap file the size works out to be NR_CPUS/4 + NR_CPUS/32 - 1 (or NR_CPUS * 9/32 - 1) including the ","s. Add a set of macros for these values to cpumask.h so they can be used in multiple places. Apply these to the handful of such files in drivers/base/topology.c as well as node.c. As an example, on an 80 cpu 4-node system (NR_CPUS == 8192): before: -r--r--r--. 1 root root 0 Jul 12 14:08 system/node/node0/cpulist -r--r--r--. 1 root root 0 Jul 11 17:25 system/node/node0/cpumap after: -r--r--r--. 1 root root 28672 Jul 13 11:32 system/node/node0/cpulist -r--r--r--. 1 root root 4096 Jul 13 11:31 system/node/node0/cpumap CONFIG_NR_CPUS = 16384 -r--r--r--. 1 root root 57344 Jul 13 14:03 system/node/node0/cpulist -r--r--r--. 1 root root 4607 Jul 13 14:02 system/node/node0/cpumap The actual number of cpus doesn't matter for the reported size since they are based on NR_CPUS. Fixes: 75bd50fa841d ("drivers/base/node.c: use bin_attribute to break the size limitation of cpumap ABI") Fixes: bb9ec13d156e ("topology: use bin_attribute to break the size limitation of cpumap ABI") Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Yury Norov Cc: stable@vger.kernel.org Acked-by: Yury Norov (for include/linux/cpumask.h) Signed-off-by: Phil Auld Link: https://lore.kernel.org/r/20220715134924.3466194-1-pauld@redhat.com Signed-off-by: Greg Kroah-Hartman --- include/linux/cpumask.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index fe29ac7cc469..4592d0845941 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1071,4 +1071,22 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, [0] = 1UL \ } } +/* + * Provide a valid theoretical max size for cpumap and cpulist sysfs files + * to avoid breaking userspace which may allocate a buffer based on the size + * reported by e.g. fstat. + * + * for cpumap NR_CPUS * 9/32 - 1 should be an exact length. + * + * For cpulist 7 is (ceil(log10(NR_CPUS)) + 1) allowing for NR_CPUS to be up + * to 2 orders of magnitude larger than 8192. And then we divide by 2 to + * cover a worst-case of every other cpu being on one of two nodes for a + * very large NR_CPUS. + * + * Use PAGE_SIZE as a minimum for smaller configurations. + */ +#define CPUMAP_FILE_MAX_BYTES ((((NR_CPUS * 9)/32 - 1) > PAGE_SIZE) \ + ? (NR_CPUS * 9)/32 - 1 : PAGE_SIZE) +#define CPULIST_FILE_MAX_BYTES (((NR_CPUS * 7)/2 > PAGE_SIZE) ? (NR_CPUS * 7)/2 : PAGE_SIZE) + #endif /* __LINUX_CPUMASK_H */ -- cgit v1.2.3 From 65d9a9a60fd71be964effb2e94747a6acb6e7015 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Fri, 1 Jul 2022 13:04:04 +0530 Subject: kexec_file: drop weak attribute from functions As requested (http://lkml.kernel.org/r/87ee0q7b92.fsf@email.froward.int.ebiederm.org), this series converts weak functions in kexec to use the #ifdef approach. Quoting the 3e35142ef99fe ("kexec_file: drop weak attribute from arch_kexec_apply_relocations[_add]") changelog: : Since commit d1bcae833b32f1 ("ELF: Don't generate unused section symbols") : [1], binutils (v2.36+) started dropping section symbols that it thought : were unused. This isn't an issue in general, but with kexec_file.c, gcc : is placing kexec_arch_apply_relocations[_add] into a separate : .text.unlikely section and the section symbol ".text.unlikely" is being : dropped. Due to this, recordmcount is unable to find a non-weak symbol in : .text.unlikely to generate a relocation record against. This patch (of 2); Drop __weak attribute from functions in kexec_file.c: - arch_kexec_kernel_image_probe() - arch_kimage_file_post_load_cleanup() - arch_kexec_kernel_image_load() - arch_kexec_locate_mem_hole() - arch_kexec_kernel_verify_sig() arch_kexec_kernel_image_load() calls into kexec_image_load_default(), so drop the static attribute for the latter. arch_kexec_kernel_verify_sig() is not overridden by any architecture, so drop the __weak attribute. Link: https://lkml.kernel.org/r/cover.1656659357.git.naveen.n.rao@linux.vnet.ibm.com Link: https://lkml.kernel.org/r/2cd7ca1fe4d6bb6ca38e3283c717878388ed6788.1656659357.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Naveen N. Rao Suggested-by: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Mimi Zohar --- include/linux/kexec.h | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 475683cd67f1..6958c6b471f4 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -188,21 +188,53 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, void *buf, unsigned int size, bool get_value); void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name); +void *kexec_image_load_default(struct kimage *image); + +#ifndef arch_kexec_kernel_image_probe +static inline int +arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len) +{ + return kexec_image_probe_default(image, buf, buf_len); +} +#endif + +#ifndef arch_kimage_file_post_load_cleanup +static inline int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + return kexec_image_post_load_cleanup_default(image); +} +#endif + +#ifndef arch_kexec_kernel_image_load +static inline void *arch_kexec_kernel_image_load(struct kimage *image) +{ + return kexec_image_load_default(image); +} +#endif -/* Architectures may override the below functions */ -int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len); -void *arch_kexec_kernel_image_load(struct kimage *image); -int arch_kimage_file_post_load_cleanup(struct kimage *image); #ifdef CONFIG_KEXEC_SIG int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, unsigned long buf_len); #endif -int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf); extern int kexec_add_buffer(struct kexec_buf *kbuf); int kexec_locate_mem_hole(struct kexec_buf *kbuf); +#ifndef arch_kexec_locate_mem_hole +/** + * arch_kexec_locate_mem_hole - Find free memory to place the segments. + * @kbuf: Parameters for the memory search. + * + * On success, kbuf->mem will have the start address of the memory region found. + * + * Return: 0 on success, negative errno on error. + */ +static inline int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) +{ + return kexec_locate_mem_hole(kbuf); +} +#endif + /* Alignment required for elf header segment */ #define ELF_CORE_HEADER_ALIGN 4096 -- cgit v1.2.3 From 0738eceb6201691534df07e0928d0a6168a35787 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Fri, 1 Jul 2022 13:04:05 +0530 Subject: kexec: drop weak attribute from functions Drop __weak attribute from functions in kexec_core.c: - machine_kexec_post_load() - arch_kexec_protect_crashkres() - arch_kexec_unprotect_crashkres() - crash_free_reserved_phys_range() Link: https://lkml.kernel.org/r/c0f6219e03cb399d166d518ab505095218a902dd.1656659357.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Naveen N. Rao Suggested-by: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Mimi Zohar --- include/linux/kexec.h | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 6958c6b471f4..8107606ad1e8 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -390,7 +390,10 @@ extern void machine_kexec_cleanup(struct kimage *image); extern int kernel_kexec(void); extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); -int machine_kexec_post_load(struct kimage *image); + +#ifndef machine_kexec_post_load +static inline int machine_kexec_post_load(struct kimage *image) { return 0; } +#endif extern void __crash_kexec(struct pt_regs *); extern void crash_kexec(struct pt_regs *); @@ -423,10 +426,21 @@ extern bool kexec_in_progress; int crash_shrink_memory(unsigned long new_size); size_t crash_get_memory_size(void); -void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); -void arch_kexec_protect_crashkres(void); -void arch_kexec_unprotect_crashkres(void); +#ifndef arch_kexec_protect_crashkres +/* + * Protection mechanism for crashkernel reserved memory after + * the kdump kernel is loaded. + * + * Provide an empty default implementation here -- architecture + * code may override this + */ +static inline void arch_kexec_protect_crashkres(void) { } +#endif + +#ifndef arch_kexec_unprotect_crashkres +static inline void arch_kexec_unprotect_crashkres(void) { } +#endif #ifndef page_to_boot_pfn static inline unsigned long page_to_boot_pfn(struct page *page) @@ -456,6 +470,16 @@ static inline phys_addr_t boot_phys_to_phys(unsigned long boot_phys) } #endif +#ifndef crash_free_reserved_phys_range +static inline void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) +{ + unsigned long addr; + + for (addr = begin; addr < end; addr += PAGE_SIZE) + free_reserved_page(boot_pfn_to_page(addr >> PAGE_SHIFT)); +} +#endif + static inline unsigned long virt_to_boot_phys(void *addr) { return phys_to_boot_phys(__pa((unsigned long)addr)); -- cgit v1.2.3 From 689a71493bd2f31c024f8c0395f85a1fd4b2138e Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Thu, 14 Jul 2022 21:40:24 +0800 Subject: kexec: clean up arch_kexec_kernel_verify_sig Before commit 105e10e2cf1c ("kexec_file: drop weak attribute from functions"), there was already no arch-specific implementation of arch_kexec_kernel_verify_sig. With weak attribute dropped by that commit, arch_kexec_kernel_verify_sig is completely useless. So clean it up. Note later patches are dependent on this patch so it should be backported to the stable tree as well. Cc: stable@vger.kernel.org Suggested-by: Eric W. Biederman Reviewed-by: Michal Suchanek Acked-by: Baoquan He Signed-off-by: Coiby Xu [zohar@linux.ibm.com: reworded patch description "Note"] Link: https://lore.kernel.org/linux-integrity/20220714134027.394370-1-coxu@redhat.com/ Signed-off-by: Mimi Zohar --- include/linux/kexec.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 8107606ad1e8..7f710fb3712b 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -212,11 +212,6 @@ static inline void *arch_kexec_kernel_image_load(struct kimage *image) } #endif -#ifdef CONFIG_KEXEC_SIG -int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, - unsigned long buf_len); -#endif - extern int kexec_add_buffer(struct kexec_buf *kbuf); int kexec_locate_mem_hole(struct kexec_buf *kbuf); -- cgit v1.2.3 From c903dae8941deb55043ee46ded29e84e97cd84bb Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Thu, 14 Jul 2022 21:40:25 +0800 Subject: kexec, KEYS: make the code in bzImage64_verify_sig generic commit 278311e417be ("kexec, KEYS: Make use of platform keyring for signature verify") adds platform keyring support on x86 kexec but not arm64. The code in bzImage64_verify_sig uses the keys on the .builtin_trusted_keys, .machine, if configured and enabled, .secondary_trusted_keys, also if configured, and .platform keyrings to verify the signed kernel image as PE file. Cc: kexec@lists.infradead.org Cc: keyrings@vger.kernel.org Cc: linux-security-module@vger.kernel.org Reviewed-by: Michal Suchanek Signed-off-by: Coiby Xu Signed-off-by: Mimi Zohar --- include/linux/kexec.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 7f710fb3712b..13e6c4b58f07 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -19,6 +19,7 @@ #include #include +#include /* Location of a reserved region to hold the crash kernel. */ @@ -212,6 +213,12 @@ static inline void *arch_kexec_kernel_image_load(struct kimage *image) } #endif +#ifdef CONFIG_KEXEC_SIG +#ifdef CONFIG_SIGNED_PE_FILE_VERIFICATION +int kexec_kernel_verify_pe_sig(const char *kernel, unsigned long kernel_len); +#endif +#endif + extern int kexec_add_buffer(struct kexec_buf *kbuf); int kexec_locate_mem_hole(struct kexec_buf *kbuf); -- cgit v1.2.3 From ae6ccaa650380d243cf43d31c864c5ced2fd4612 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 7 Jul 2022 08:15:52 +0100 Subject: PM: EM: convert power field to micro-Watts precision and align drivers The milli-Watts precision causes rounding errors while calculating efficiency cost for each OPP. This is especially visible in the 'simple' Energy Model (EM), where the power for each OPP is provided from OPP framework. This can cause some OPPs to be marked inefficient, while using micro-Watts precision that might not happen. Update all EM users which access 'power' field and assume the value is in milli-Watts. Solve also an issue with potential overflow in calculation of energy estimation on 32bit machine. It's needed now since the power value (thus the 'cost' as well) are higher. Example calculation which shows the rounding error and impact: power = 'dyn-power-coeff' * volt_mV * volt_mV * freq_MHz power_a_uW = (100 * 600mW * 600mW * 500MHz) / 10^6 = 18000 power_a_mW = (100 * 600mW * 600mW * 500MHz) / 10^9 = 18 power_b_uW = (100 * 605mW * 605mW * 600MHz) / 10^6 = 21961 power_b_mW = (100 * 605mW * 605mW * 600MHz) / 10^9 = 21 max_freq = 2000MHz cost_a_mW = 18 * 2000MHz/500MHz = 72 cost_a_uW = 18000 * 2000MHz/500MHz = 72000 cost_b_mW = 21 * 2000MHz/600MHz = 70 // <- artificially better cost_b_uW = 21961 * 2000MHz/600MHz = 73203 The 'cost_b_mW' (which is based on old milli-Watts) is misleadingly better that the 'cost_b_uW' (this patch uses micro-Watts) and such would have impact on the 'inefficient OPPs' information in the Cpufreq framework. This patch set removes the rounding issue. Signed-off-by: Lukasz Luba Acked-by: Daniel Lezcano Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 54 +++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 8419bffb4398..b9caa01dfac4 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -62,7 +62,7 @@ struct em_perf_domain { /* * em_perf_domain flags: * - * EM_PERF_DOMAIN_MILLIWATTS: The power values are in milli-Watts or some + * EM_PERF_DOMAIN_MICROWATTS: The power values are in micro-Watts or some * other scale. * * EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating @@ -71,7 +71,7 @@ struct em_perf_domain { * EM_PERF_DOMAIN_ARTIFICIAL: The power values are artificial and might be * created by platform missing real power information */ -#define EM_PERF_DOMAIN_MILLIWATTS BIT(0) +#define EM_PERF_DOMAIN_MICROWATTS BIT(0) #define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1) #define EM_PERF_DOMAIN_ARTIFICIAL BIT(2) @@ -79,22 +79,44 @@ struct em_perf_domain { #define em_is_artificial(em) ((em)->flags & EM_PERF_DOMAIN_ARTIFICIAL) #ifdef CONFIG_ENERGY_MODEL -#define EM_MAX_POWER 0xFFFF +/* + * The max power value in micro-Watts. The limit of 64 Watts is set as + * a safety net to not overflow multiplications on 32bit platforms. The + * 32bit value limit for total Perf Domain power implies a limit of + * maximum CPUs in such domain to 64. + */ +#define EM_MAX_POWER (64000000) /* 64 Watts */ + +/* + * To avoid possible energy estimation overflow on 32bit machines add + * limits to number of CPUs in the Perf. Domain. + * We are safe on 64bit machine, thus some big number. + */ +#ifdef CONFIG_64BIT +#define EM_MAX_NUM_CPUS 4096 +#else +#define EM_MAX_NUM_CPUS 16 +#endif /* - * Increase resolution of energy estimation calculations for 64-bit - * architectures. The extra resolution improves decision made by EAS for the - * task placement when two Performance Domains might provide similar energy - * estimation values (w/o better resolution the values could be equal). + * To avoid an overflow on 32bit machines while calculating the energy + * use a different order in the operation. First divide by the 'cpu_scale' + * which would reduce big value stored in the 'cost' field, then multiply by + * the 'sum_util'. This would allow to handle existing platforms, which have + * e.g. power ~1.3 Watt at max freq, so the 'cost' value > 1mln micro-Watts. + * In such scenario, where there are 4 CPUs in the Perf. Domain the 'sum_util' + * could be 4096, then multiplication: 'cost' * 'sum_util' would overflow. + * This reordering of operations has some limitations, we lose small + * precision in the estimation (comparing to 64bit platform w/o reordering). * - * We increase resolution only if we have enough bits to allow this increased - * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit - * are pretty high and the returns do not justify the increased costs. + * We are safe on 64bit machine. */ #ifdef CONFIG_64BIT -#define em_scale_power(p) ((p) * 1000) +#define em_estimate_energy(cost, sum_util, scale_cpu) \ + (((cost) * (sum_util)) / (scale_cpu)) #else -#define em_scale_power(p) (p) +#define em_estimate_energy(cost, sum_util, scale_cpu) \ + (((cost) / (scale_cpu)) * (sum_util)) #endif struct em_data_callback { @@ -112,7 +134,7 @@ struct em_data_callback { * and frequency. * * In case of CPUs, the power is the one of a single CPU in the domain, - * expressed in milli-Watts or an abstract scale. It is expected to + * expressed in micro-Watts or an abstract scale. It is expected to * fit in the [0, EM_MAX_POWER] range. * * Return 0 on success. @@ -148,7 +170,7 @@ struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span, - bool milliwatts); + bool microwatts); void em_dev_unregister_perf_domain(struct device *dev); /** @@ -273,7 +295,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * pd_nrg = ------------------------ (4) * scale_cpu */ - return ps->cost * sum_util / scale_cpu; + return em_estimate_energy(ps->cost, sum_util, scale_cpu); } /** @@ -297,7 +319,7 @@ struct em_data_callback {}; static inline int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span, - bool milliwatts) + bool microwatts) { return -EINVAL; } -- cgit v1.2.3 From 5e0fd2026cdd474a85b3135c312912321e60f47a Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 7 Jul 2022 08:15:54 +0100 Subject: firmware: arm_scmi: Get detailed power scale from perf In SCMI v3.1 the power scale can be in micro-Watts. The upper layers, e.g. cpufreq and EM should handle received power values properly (upscale when needed). Thus, provide an interface which allows to check what is the scale for power values. The old interface allowed to distinguish between bogo-Watts and milli-Watts only (which was good for older SCMI spec). Acked-by: Sudeep Holla Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/scmi_protocol.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 704111f63993..a0a246310ba1 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -60,6 +60,12 @@ struct scmi_clock_info { }; }; +enum scmi_power_scale { + SCMI_POWER_BOGOWATTS, + SCMI_POWER_MILLIWATTS, + SCMI_POWER_MICROWATTS +}; + struct scmi_handle; struct scmi_device; struct scmi_protocol_handle; @@ -135,7 +141,7 @@ struct scmi_perf_proto_ops { unsigned long *rate, unsigned long *power); bool (*fast_switch_possible)(const struct scmi_protocol_handle *ph, struct device *dev); - bool (*power_scale_mw_get)(const struct scmi_protocol_handle *ph); + enum scmi_power_scale (*power_scale_get)(const struct scmi_protocol_handle *ph); }; /** -- cgit v1.2.3 From fcfe0ac2fcfae7d5fcad3d0375cb8ff38caf8aba Mon Sep 17 00:00:00 2001 From: Micah Morton Date: Wed, 8 Jun 2022 20:57:11 +0000 Subject: security: Add LSM hook to setgroups() syscall Give the LSM framework the ability to filter setgroups() syscalls. There are already analagous hooks for the set*uid() and set*gid() syscalls. The SafeSetID LSM will use this new hook to ensure setgroups() calls are allowed by the installed security policy. Tested by putting print statement in security_task_fix_setgroups() hook and confirming that it gets hit when userspace does a setgroups() syscall. Acked-by: Casey Schaufler Reviewed-by: Serge Hallyn Signed-off-by: Micah Morton --- include/linux/lsm_hook_defs.h | 1 + include/linux/lsm_hooks.h | 7 +++++++ include/linux/security.h | 7 +++++++ 3 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index eafa1d2489fd..806448173033 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -201,6 +201,7 @@ LSM_HOOK(int, 0, task_fix_setuid, struct cred *new, const struct cred *old, int flags) LSM_HOOK(int, 0, task_fix_setgid, struct cred *new, const struct cred * old, int flags) +LSM_HOOK(int, 0, task_fix_setgroups, struct cred *new, const struct cred * old) LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid) LSM_HOOK(int, 0, task_getpgid, struct task_struct *p) LSM_HOOK(int, 0, task_getsid, struct task_struct *p) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 91c8146649f5..84a0d7e02176 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -702,6 +702,13 @@ * @old is the set of credentials that are being replaced. * @flags contains one of the LSM_SETID_* values. * Return 0 on success. + * @task_fix_setgroups: + * Update the module's state after setting the supplementary group + * identity attributes of the current process. + * @new is the set of credentials that will be installed. Modifications + * should be made to this rather than to @current->cred. + * @old is the set of credentials that are being replaced. + * Return 0 on success. * @task_setpgid: * Check permission before setting the process group identifier of the * process @p to @pgid. diff --git a/include/linux/security.h b/include/linux/security.h index 7fc4e9f49f54..1dfd32c49fa3 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -415,6 +415,7 @@ int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags); int security_task_fix_setgid(struct cred *new, const struct cred *old, int flags); +int security_task_fix_setgroups(struct cred *new, const struct cred *old); int security_task_setpgid(struct task_struct *p, pid_t pgid); int security_task_getpgid(struct task_struct *p); int security_task_getsid(struct task_struct *p); @@ -1098,6 +1099,12 @@ static inline int security_task_fix_setgid(struct cred *new, return 0; } +static inline int security_task_fix_setgroups(struct cred *new, + const struct cred *old) +{ + return 0; +} + static inline int security_task_setpgid(struct task_struct *p, pid_t pgid) { return 0; -- cgit v1.2.3 From c9fa2b07fa99e648b5042a32dbfa39ba68a190db Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 26 Jun 2022 20:45:07 +0200 Subject: mnt_idmapping: add vfs[g,u]id_into_k[g,u]id() Add two tiny helpers to conver a vfsuid into a kuid. Signed-off-by: Christian Brauner (Microsoft) --- include/linux/mnt_idmapping.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 41dc80f8b67c..f6e5369d2928 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -333,6 +333,19 @@ static inline bool vfsuid_has_fsmapping(struct user_namespace *mnt_userns, return uid_valid(from_vfsuid(mnt_userns, fs_userns, vfsuid)); } +/** + * vfsuid_into_kuid - convert vfsuid into kuid + * @vfsuid: the vfsuid to convert + * + * This can be used when a vfsuid is committed as a kuid. + * + * Return: a kuid with the value of @vfsuid + */ +static inline kuid_t vfsuid_into_kuid(vfsuid_t vfsuid) +{ + return AS_KUIDT(vfsuid); +} + /** * from_vfsgid - map a vfsgid into the filesystem idmapping * @mnt_userns: the mount's idmapping @@ -406,6 +419,19 @@ static inline bool vfsgid_has_fsmapping(struct user_namespace *mnt_userns, return gid_valid(from_vfsgid(mnt_userns, fs_userns, vfsgid)); } +/** + * vfsgid_into_kgid - convert vfsgid into kgid + * @vfsgid: the vfsgid to convert + * + * This can be used when a vfsgid is committed as a kgid. + * + * Return: a kgid with the value of @vfsgid + */ +static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid) +{ + return AS_KGIDT(vfsgid); +} + /** * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns * @mnt_userns: the mount's idmapping -- cgit v1.2.3 From 0c5fd887d2bb47aa37aa9fb1eb1d1d2abac62972 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 6 Jul 2022 18:30:59 +0200 Subject: acl: move idmapped mount fixup into vfs_{g,s}etxattr() This cycle we added support for mounting overlayfs on top of idmapped mounts. Recently I've started looking into potential corner cases when trying to add additional tests and I noticed that reporting for POSIX ACLs is currently wrong when using idmapped layers with overlayfs mounted on top of it. I'm going to give a rather detailed explanation to both the origin of the problem and the solution. Let's assume the user creates the following directory layout and they have a rootfs /var/lib/lxc/c1/rootfs. The files in this rootfs are owned as you would expect files on your host system to be owned. For example, ~/.bashrc for your regular user would be owned by 1000:1000 and /root/.bashrc would be owned by 0:0. IOW, this is just regular boring filesystem tree on an ext4 or xfs filesystem. The user chooses to set POSIX ACLs using the setfacl binary granting the user with uid 4 read, write, and execute permissions for their .bashrc file: setfacl -m u:4:rwx /var/lib/lxc/c2/rootfs/home/ubuntu/.bashrc Now they to expose the whole rootfs to a container using an idmapped mount. So they first create: mkdir -pv /vol/contpool/{ctrover,merge,lowermap,overmap} mkdir -pv /vol/contpool/ctrover/{over,work} chown 10000000:10000000 /vol/contpool/ctrover/{over,work} The user now creates an idmapped mount for the rootfs: mount-idmapped/mount-idmapped --map-mount=b:0:10000000:65536 \ /var/lib/lxc/c2/rootfs \ /vol/contpool/lowermap This for example makes it so that /var/lib/lxc/c2/rootfs/home/ubuntu/.bashrc which is owned by uid and gid 1000 as being owned by uid and gid 10001000 at /vol/contpool/lowermap/home/ubuntu/.bashrc. Assume the user wants to expose these idmapped mounts through an overlayfs mount to a container. mount -t overlay overlay \ -o lowerdir=/vol/contpool/lowermap, \ upperdir=/vol/contpool/overmap/over, \ workdir=/vol/contpool/overmap/work \ /vol/contpool/merge The user can do this in two ways: (1) Mount overlayfs in the initial user namespace and expose it to the container. (2) Mount overlayfs on top of the idmapped mounts inside of the container's user namespace. Let's assume the user chooses the (1) option and mounts overlayfs on the host and then changes into a container which uses the idmapping 0:10000000:65536 which is the same used for the two idmapped mounts. Now the user tries to retrieve the POSIX ACLs using the getfacl command getfacl -n /vol/contpool/lowermap/home/ubuntu/.bashrc and to their surprise they see: # file: vol/contpool/merge/home/ubuntu/.bashrc # owner: 1000 # group: 1000 user::rw- user:4294967295:rwx group::r-- mask::rwx other::r-- indicating the the uid wasn't correctly translated according to the idmapped mount. The problem is how we currently translate POSIX ACLs. Let's inspect the callchain in this example: idmapped mount /vol/contpool/merge: 0:10000000:65536 caller's idmapping: 0:10000000:65536 overlayfs idmapping (ofs->creator_cred): 0:0:4k /* initial idmapping */ sys_getxattr() -> path_getxattr() -> getxattr() -> do_getxattr() |> vfs_getxattr() | -> __vfs_getxattr() | -> handler->get == ovl_posix_acl_xattr_get() | -> ovl_xattr_get() | -> vfs_getxattr() | -> __vfs_getxattr() | -> handler->get() /* lower filesystem callback */ |> posix_acl_fix_xattr_to_user() { 4 = make_kuid(&init_user_ns, 4); 4 = mapped_kuid_fs(&init_user_ns /* no idmapped mount */, 4); /* FAILURE */ -1 = from_kuid(0:10000000:65536 /* caller's idmapping */, 4); } If the user chooses to use option (2) and mounts overlayfs on top of idmapped mounts inside the container things don't look that much better: idmapped mount /vol/contpool/merge: 0:10000000:65536 caller's idmapping: 0:10000000:65536 overlayfs idmapping (ofs->creator_cred): 0:10000000:65536 sys_getxattr() -> path_getxattr() -> getxattr() -> do_getxattr() |> vfs_getxattr() | -> __vfs_getxattr() | -> handler->get == ovl_posix_acl_xattr_get() | -> ovl_xattr_get() | -> vfs_getxattr() | -> __vfs_getxattr() | -> handler->get() /* lower filesystem callback */ |> posix_acl_fix_xattr_to_user() { 4 = make_kuid(&init_user_ns, 4); 4 = mapped_kuid_fs(&init_user_ns, 4); /* FAILURE */ -1 = from_kuid(0:10000000:65536 /* caller's idmapping */, 4); } As is easily seen the problem arises because the idmapping of the lower mount isn't taken into account as all of this happens in do_gexattr(). But do_getxattr() is always called on an overlayfs mount and inode and thus cannot possible take the idmapping of the lower layers into account. This problem is similar for fscaps but there the translation happens as part of vfs_getxattr() already. Let's walk through an fscaps overlayfs callchain: setcap 'cap_net_raw+ep' /var/lib/lxc/c2/rootfs/home/ubuntu/.bashrc The expected outcome here is that we'll receive the cap_net_raw capability as we are able to map the uid associated with the fscap to 0 within our container. IOW, we want to see 0 as the result of the idmapping translations. If the user chooses option (1) we get the following callchain for fscaps: idmapped mount /vol/contpool/merge: 0:10000000:65536 caller's idmapping: 0:10000000:65536 overlayfs idmapping (ofs->creator_cred): 0:0:4k /* initial idmapping */ sys_getxattr() -> path_getxattr() -> getxattr() -> do_getxattr() -> vfs_getxattr() -> xattr_getsecurity() -> security_inode_getsecurity() ________________________________ -> cap_inode_getsecurity() | | { V | 10000000 = make_kuid(0:0:4k /* overlayfs idmapping */, 10000000); | 10000000 = mapped_kuid_fs(0:0:4k /* no idmapped mount */, 10000000); | /* Expected result is 0 and thus that we own the fscap. */ | 0 = from_kuid(0:10000000:65536 /* caller's idmapping */, 10000000); | } | -> vfs_getxattr_alloc() | -> handler->get == ovl_other_xattr_get() | -> vfs_getxattr() | -> xattr_getsecurity() | -> security_inode_getsecurity() | -> cap_inode_getsecurity() | { | 0 = make_kuid(0:0:4k /* lower s_user_ns */, 0); | 10000000 = mapped_kuid_fs(0:10000000:65536 /* idmapped mount */, 0); | 10000000 = from_kuid(0:0:4k /* overlayfs idmapping */, 10000000); | |____________________________________________________________________| } -> vfs_getxattr_alloc() -> handler->get == /* lower filesystem callback */ And if the user chooses option (2) we get: idmapped mount /vol/contpool/merge: 0:10000000:65536 caller's idmapping: 0:10000000:65536 overlayfs idmapping (ofs->creator_cred): 0:10000000:65536 sys_getxattr() -> path_getxattr() -> getxattr() -> do_getxattr() -> vfs_getxattr() -> xattr_getsecurity() -> security_inode_getsecurity() _______________________________ -> cap_inode_getsecurity() | | { V | 10000000 = make_kuid(0:10000000:65536 /* overlayfs idmapping */, 0); | 10000000 = mapped_kuid_fs(0:0:4k /* no idmapped mount */, 10000000); | /* Expected result is 0 and thus that we own the fscap. */ | 0 = from_kuid(0:10000000:65536 /* caller's idmapping */, 10000000); | } | -> vfs_getxattr_alloc() | -> handler->get == ovl_other_xattr_get() | |-> vfs_getxattr() | -> xattr_getsecurity() | -> security_inode_getsecurity() | -> cap_inode_getsecurity() | { | 0 = make_kuid(0:0:4k /* lower s_user_ns */, 0); | 10000000 = mapped_kuid_fs(0:10000000:65536 /* idmapped mount */, 0); | 0 = from_kuid(0:10000000:65536 /* overlayfs idmapping */, 10000000); | |____________________________________________________________________| } -> vfs_getxattr_alloc() -> handler->get == /* lower filesystem callback */ We can see how the translation happens correctly in those cases as the conversion happens within the vfs_getxattr() helper. For POSIX ACLs we need to do something similar. However, in contrast to fscaps we cannot apply the fix directly to the kernel internal posix acl data structure as this would alter the cached values and would also require a rework of how we currently deal with POSIX ACLs in general which almost never take the filesystem idmapping into account (the noteable exception being FUSE but even there the implementation is special) and instead retrieve the raw values based on the initial idmapping. The correct values are then generated right before returning to userspace. The fix for this is to move taking the mount's idmapping into account directly in vfs_getxattr() instead of having it be part of posix_acl_fix_xattr_to_user(). To this end we split out two small and unexported helpers posix_acl_getxattr_idmapped_mnt() and posix_acl_setxattr_idmapped_mnt(). The former to be called in vfs_getxattr() and the latter to be called in vfs_setxattr(). Let's go back to the original example. Assume the user chose option (1) and mounted overlayfs on top of idmapped mounts on the host: idmapped mount /vol/contpool/merge: 0:10000000:65536 caller's idmapping: 0:10000000:65536 overlayfs idmapping (ofs->creator_cred): 0:0:4k /* initial idmapping */ sys_getxattr() -> path_getxattr() -> getxattr() -> do_getxattr() |> vfs_getxattr() | |> __vfs_getxattr() | | -> handler->get == ovl_posix_acl_xattr_get() | | -> ovl_xattr_get() | | -> vfs_getxattr() | | |> __vfs_getxattr() | | | -> handler->get() /* lower filesystem callback */ | | |> posix_acl_getxattr_idmapped_mnt() | | { | | 4 = make_kuid(&init_user_ns, 4); | | 10000004 = mapped_kuid_fs(0:10000000:65536 /* lower idmapped mount */, 4); | | 10000004 = from_kuid(&init_user_ns, 10000004); | | |_______________________ | | } | | | | | |> posix_acl_getxattr_idmapped_mnt() | | { | | V | 10000004 = make_kuid(&init_user_ns, 10000004); | 10000004 = mapped_kuid_fs(&init_user_ns /* no idmapped mount */, 10000004); | 10000004 = from_kuid(&init_user_ns, 10000004); | } |_________________________________________________ | | | | |> posix_acl_fix_xattr_to_user() | { V 10000004 = make_kuid(0:0:4k /* init_user_ns */, 10000004); /* SUCCESS */ 4 = from_kuid(0:10000000:65536 /* caller's idmapping */, 10000004); } And similarly if the user chooses option (1) and mounted overayfs on top of idmapped mounts inside the container: idmapped mount /vol/contpool/merge: 0:10000000:65536 caller's idmapping: 0:10000000:65536 overlayfs idmapping (ofs->creator_cred): 0:10000000:65536 sys_getxattr() -> path_getxattr() -> getxattr() -> do_getxattr() |> vfs_getxattr() | |> __vfs_getxattr() | | -> handler->get == ovl_posix_acl_xattr_get() | | -> ovl_xattr_get() | | -> vfs_getxattr() | | |> __vfs_getxattr() | | | -> handler->get() /* lower filesystem callback */ | | |> posix_acl_getxattr_idmapped_mnt() | | { | | 4 = make_kuid(&init_user_ns, 4); | | 10000004 = mapped_kuid_fs(0:10000000:65536 /* lower idmapped mount */, 4); | | 10000004 = from_kuid(&init_user_ns, 10000004); | | |_______________________ | | } | | | | | |> posix_acl_getxattr_idmapped_mnt() | | { V | 10000004 = make_kuid(&init_user_ns, 10000004); | 10000004 = mapped_kuid_fs(&init_user_ns /* no idmapped mount */, 10000004); | 10000004 = from_kuid(0(&init_user_ns, 10000004); | |_________________________________________________ | } | | | |> posix_acl_fix_xattr_to_user() | { V 10000004 = make_kuid(0:0:4k /* init_user_ns */, 10000004); /* SUCCESS */ 4 = from_kuid(0:10000000:65536 /* caller's idmappings */, 10000004); } The last remaining problem we need to fix here is ovl_get_acl(). During ovl_permission() overlayfs will call: ovl_permission() -> generic_permission() -> acl_permission_check() -> check_acl() -> get_acl() -> inode->i_op->get_acl() == ovl_get_acl() > get_acl() /* on the underlying filesystem) ->inode->i_op->get_acl() == /*lower filesystem callback */ -> posix_acl_permission() passing through the get_acl request to the underlying filesystem. This will retrieve the acls stored in the lower filesystem without taking the idmapping of the underlying mount into account as this would mean altering the cached values for the lower filesystem. So we block using ACLs for now until we decided on a nice way to fix this. Note this limitation both in the documentation and in the code. The most straightforward solution would be to have ovl_get_acl() simply duplicate the ACLs, update the values according to the idmapped mount and return it to acl_permission_check() so it can be used in posix_acl_permission() forgetting them afterwards. This is a bit heavy handed but fairly straightforward otherwise. Link: https://github.com/brauner/mount-idmapped/issues/9 Link: https://lore.kernel.org/r/20220708090134.385160-2-brauner@kernel.org Cc: Seth Forshee Cc: Amir Goldstein Cc: Vivek Goyal Cc: Christoph Hellwig Cc: Aleksa Sarai Cc: Miklos Szeredi Cc: linux-unionfs@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) --- include/linux/posix_acl_xattr.h | 34 ++++++++++++++++++++++------------ include/linux/xattr.h | 2 +- 2 files changed, 23 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index 1766e1de6956..b6bd3eac2bcc 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -33,21 +33,31 @@ posix_acl_xattr_count(size_t size) } #ifdef CONFIG_FS_POSIX_ACL -void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, - struct inode *inode, - void *value, size_t size); -void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns, - struct inode *inode, - void *value, size_t size); +void posix_acl_fix_xattr_from_user(void *value, size_t size); +void posix_acl_fix_xattr_to_user(void *value, size_t size); +void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, + const struct inode *inode, + void *value, size_t size); +void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, + const struct inode *inode, + void *value, size_t size); #else -static inline void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, - struct inode *inode, - void *value, size_t size) +static inline void posix_acl_fix_xattr_from_user(void *value, size_t size) { } -static inline void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns, - struct inode *inode, - void *value, size_t size) +static inline void posix_acl_fix_xattr_to_user(void *value, size_t size) +{ +} +static inline void +posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, + const struct inode *inode, void *value, + size_t size) +{ +} +static inline void +posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, + const struct inode *inode, void *value, + size_t size) { } #endif diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 4c379d23ec6e..979a9d3e5bfb 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -61,7 +61,7 @@ int __vfs_setxattr_locked(struct user_namespace *, struct dentry *, const char *, const void *, size_t, int, struct inode **); int vfs_setxattr(struct user_namespace *, struct dentry *, const char *, - const void *, size_t, int); + void *, size_t, int); int __vfs_removexattr(struct user_namespace *, struct dentry *, const char *); int __vfs_removexattr_locked(struct user_namespace *, struct dentry *, const char *, struct inode **); -- cgit v1.2.3 From 8043bffd01833a8544f2466fb3804310d6e73d09 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 6 Jul 2022 17:13:23 +0200 Subject: acl: make posix_acl_clone() available to overlayfs The ovl_get_acl() function needs to alter the POSIX ACLs retrieved from the lower filesystem. Instead of hand-rolling a overlayfs specific posix_acl_clone() variant allow export it. It's not special and it's not deeply internal anyway. Link: https://lore.kernel.org/r/20220708090134.385160-3-brauner@kernel.org Cc: Seth Forshee Cc: Amir Goldstein Cc: Vivek Goyal Cc: Christoph Hellwig Cc: Aleksa Sarai Cc: Miklos Szeredi Cc: linux-unionfs@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) --- include/linux/posix_acl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index b65c877d92b8..7d1e604c1325 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -73,6 +73,7 @@ extern int set_posix_acl(struct user_namespace *, struct inode *, int, struct posix_acl *); struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); +struct posix_acl *posix_acl_clone(const struct posix_acl *acl, gfp_t flags); #ifdef CONFIG_FS_POSIX_ACL int posix_acl_chmod(struct user_namespace *, struct inode *, umode_t); -- cgit v1.2.3 From 0563231f93c6d1f582b168a47753b345c1e20d81 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Jul 2022 18:44:54 -0400 Subject: tracing/events: Add __vstring() and __assign_vstr() helper macros There's several places that open code the following logic: TP_STRUCT__entry(__dynamic_array(char, msg, MSG_MAX)), TP_fast_assign(vsnprintf(__get_str(msg), MSG_MAX, vaf->fmt, *vaf->va);) To load a string created by variable array va_list. The main issue with this approach is that "MSG_MAX" usage in the __dynamic_array() portion. That actually just reserves the MSG_MAX in the event, and even wastes space because there's dynamic meta data also saved in the event to denote the offset and size of the dynamic array. It would have been better to just use a static __array() field. Instead, create __vstring() and __assign_vstr() that work like __string and __assign_str() but instead of taking a destination string to copy, take a format string and a va_list pointer and fill in the values. It uses the helper: #define __trace_event_vstr_len(fmt, va) \ ({ \ va_list __ap; \ int __ret; \ \ va_copy(__ap, *(va)); \ __ret = vsnprintf(NULL, 0, fmt, __ap) + 1; \ va_end(__ap); \ \ min(__ret, TRACE_EVENT_STR_MAX); \ }) To figure out the length to store the string. It may be slightly slower as it needs to run the vsnprintf() twice, but it now saves space on the ring buffer. Link: https://lkml.kernel.org/r/20220705224749.053570613@goodmis.org Cc: Dennis Dalessandro Cc: Ingo Molnar Cc: Andrew Morton Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: Kalle Valo Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Arend van Spriel Cc: Franky Lin Cc: Hante Meuleman Cc: Gregory Greenman Cc: Peter Chen Cc: Greg Kroah-Hartman Cc: Mathias Nyman Cc: Chunfeng Yun Cc: Bin Liu Cc: Marek Lindner Cc: Simon Wunderlich Cc: Antonio Quartulli Cc: Sven Eckelmann Cc: Johannes Berg Cc: Jim Cromie Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index e6e95a9f07a5..b18759a673c6 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -916,6 +916,24 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, #endif +#define TRACE_EVENT_STR_MAX 512 + +/* + * gcc warns that you can not use a va_list in an inlined + * function. But lets me make it into a macro :-/ + */ +#define __trace_event_vstr_len(fmt, va) \ +({ \ + va_list __ap; \ + int __ret; \ + \ + va_copy(__ap, *(va)); \ + __ret = vsnprintf(NULL, 0, fmt, __ap) + 1; \ + va_end(__ap); \ + \ + min(__ret, TRACE_EVENT_STR_MAX); \ +}) + #endif /* _LINUX_TRACE_EVENT_H */ /* -- cgit v1.2.3 From e68c5dcf0aacc48a23cedcb3ce81b8c60837f48c Mon Sep 17 00:00:00 2001 From: Jaehee Park Date: Wed, 13 Jul 2022 16:40:47 -0700 Subject: net: ipv4: new arp_accept option to accept garp only if in-network In many deployments, we want the option to not learn a neighbor from garp if the src ip is not in the same subnet as an address configured on the interface that received the garp message. net.ipv4.arp_accept sysctl is currently used to control creation of a neigh from a received garp packet. This patch adds a new option '2' to net.ipv4.arp_accept which extends option '1' by including the subnet check. Signed-off-by: Jaehee Park Suggested-by: Roopa Prabhu Signed-off-by: Jakub Kicinski --- include/linux/inetdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ead323243e7b..ddb27fc0ee8c 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -131,7 +131,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) IN_DEV_ORCONF((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) #define IN_DEV_ARPFILTER(in_dev) IN_DEV_ORCONF((in_dev), ARPFILTER) -#define IN_DEV_ARP_ACCEPT(in_dev) IN_DEV_ORCONF((in_dev), ARP_ACCEPT) +#define IN_DEV_ARP_ACCEPT(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ACCEPT) #define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) #define IN_DEV_ARP_IGNORE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_IGNORE) #define IN_DEV_ARP_NOTIFY(in_dev) IN_DEV_MAXCONF((in_dev), ARP_NOTIFY) -- cgit v1.2.3 From 868941b14441282ba08761b770fc6cad69d5bdb7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 29 Jun 2022 15:07:00 +0200 Subject: fs: remove no_llseek Now that all callers of ->llseek are going through vfs_llseek(), we don't gain anything by keeping no_llseek around. Nothing actually calls it and setting ->llseek to no_lseek is completely equivalent to leaving it NULL. Longer term (== by the end of merge window) we want to remove all such intializations. To simplify the merge window this commit does *not* touch initializers - it only defines no_llseek as NULL (and simplifies the tests on file opening). At -rc1 we'll need do a mechanical removal of no_llseek - git grep -l -w no_llseek | grep -v porting.rst | while read i; do sed -i '/\/d' $i done would do it. Signed-off-by: Jason A. Donenfeld Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ad5e3520fae..294932167335 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3022,7 +3022,7 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t noop_llseek(struct file *file, loff_t offset, int whence); -extern loff_t no_llseek(struct file *file, loff_t offset, int whence); +#define no_llseek NULL extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, -- cgit v1.2.3 From bc72d938c149197688ae3b3ecaa25d4aee8653cb Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Wed, 1 Jun 2022 17:48:32 +0000 Subject: iio: trigger: move trig->owner init to trigger allocate() stage To provide a new IIO trigger to the IIO core, usually driver executes the following pipeline: allocate()/register()/get(). Before, IIO core assigned trig->owner as a pointer to the module which registered this trigger at the register() stage. But actually the trigger object is owned by the module earlier, on the allocate() stage, when trigger object is successfully allocated for the driver. This patch moves trig->owner initialization from register() stage of trigger initialization pipeline to allocate() stage to eliminate all misunderstandings and time gaps between trigger object creation and owner acquiring. Signed-off-by: Dmitry Rokosov Link: https://lore.kernel.org/r/20220601174837.20292-1-ddrokosov@sberdevices.ru Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 9 ++++++--- include/linux/iio/trigger.h | 21 ++++++++++----------- 2 files changed, 16 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index d9b4a9ca9a0f..5dfbfc991c69 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -727,10 +727,13 @@ static inline void *iio_priv(const struct iio_dev *indio_dev) void iio_device_free(struct iio_dev *indio_dev); struct iio_dev *devm_iio_device_alloc(struct device *parent, int sizeof_priv); -__printf(2, 3) -struct iio_trigger *devm_iio_trigger_alloc(struct device *parent, - const char *fmt, ...); +#define devm_iio_trigger_alloc(parent, fmt, ...) \ + __devm_iio_trigger_alloc((parent), THIS_MODULE, (fmt), ##__VA_ARGS__) +__printf(3, 4) +struct iio_trigger *__devm_iio_trigger_alloc(struct device *parent, + struct module *this_mod, + const char *fmt, ...); /** * iio_get_debugfs_dentry() - helper function to get the debugfs_dentry * @indio_dev: IIO device structure for device diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index 03b1d6863436..f6360d9a492d 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -131,16 +131,10 @@ static inline void *iio_trigger_get_drvdata(struct iio_trigger *trig) * iio_trigger_register() - register a trigger with the IIO core * @trig_info: trigger to be registered **/ -#define iio_trigger_register(trig_info) \ - __iio_trigger_register((trig_info), THIS_MODULE) -int __iio_trigger_register(struct iio_trigger *trig_info, - struct module *this_mod); +int iio_trigger_register(struct iio_trigger *trig_info); -#define devm_iio_trigger_register(dev, trig_info) \ - __devm_iio_trigger_register((dev), (trig_info), THIS_MODULE) -int __devm_iio_trigger_register(struct device *dev, - struct iio_trigger *trig_info, - struct module *this_mod); +int devm_iio_trigger_register(struct device *dev, + struct iio_trigger *trig_info); /** * iio_trigger_unregister() - unregister a trigger from the core @@ -168,8 +162,13 @@ void iio_trigger_poll_chained(struct iio_trigger *trig); irqreturn_t iio_trigger_generic_data_rdy_poll(int irq, void *private); -__printf(2, 3) -struct iio_trigger *iio_trigger_alloc(struct device *parent, const char *fmt, ...); +#define iio_trigger_alloc(parent, fmt, ...) \ + __iio_trigger_alloc((parent), THIS_MODULE, (fmt), ##__VA_ARGS__) + +__printf(3, 4) +struct iio_trigger *__iio_trigger_alloc(struct device *parent, + struct module *this_mod, + const char *fmt, ...); void iio_trigger_free(struct iio_trigger *trig); /** -- cgit v1.2.3 From f484da847a01936e1d087a80cc96e8254492527c Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Sun, 3 Jul 2022 13:54:03 -0700 Subject: net/mlx5: Expose the ability to point to any UID from shared UID Expose shared_object_to_user_object_allowed, this capability means an object created with shared UID can point to any UID. Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8e87eb47f9dc..9321d774e2d8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1371,7 +1371,9 @@ enum { }; struct mlx5_ifc_cmd_hca_cap_bits { - u8 reserved_at_0[0x1f]; + u8 reserved_at_0[0x10]; + u8 shared_object_to_user_object_allowed[0x1]; + u8 reserved_at_13[0xe]; u8 vhca_resource_manager[0x1]; u8 hca_cap_2[0x1]; @@ -8507,7 +8509,7 @@ struct mlx5_ifc_create_flow_table_out_bits { struct mlx5_ifc_create_flow_table_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; -- cgit v1.2.3 From 6c27c56cdc69608211617eea217e1a6cecccc675 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Sun, 3 Jul 2022 13:54:04 -0700 Subject: net/mlx5: fs, expose flow table ID to users Expose the flow table ID to users. This will be used by downstream patches to allow creating steering rules that point to a flow table ID. Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index ece3e35622d7..eee07d416b56 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -315,4 +315,5 @@ struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, struct mlx5_pkt_reformat *reformat); +u32 mlx5_flow_table_id(struct mlx5_flow_table *ft); #endif -- cgit v1.2.3 From b0bb369ee451323968b31392a86398f15a2ba183 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Sun, 3 Jul 2022 13:54:05 -0700 Subject: net/mlx5: fs, allow flow table creation with a UID Add UID field to flow table attributes to allow creating flow tables with a non default (zero) uid. Signed-off-by: Mark Bloch Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index eee07d416b56..8e73c377da2c 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -178,6 +178,7 @@ struct mlx5_flow_table_attr { int max_fte; u32 level; u32 flags; + u16 uid; struct mlx5_flow_table *next_ft; struct { -- cgit v1.2.3 From 335e52c28cf9954d65b819cb68912fd32de3c844 Mon Sep 17 00:00:00 2001 From: David Gow Date: Fri, 1 Jul 2022 17:16:19 +0800 Subject: mm: Add PAGE_ALIGN_DOWN macro This is just the same as PAGE_ALIGN(), but rounds the address down, not up. Suggested-by: Dmitry Vyukov Signed-off-by: David Gow Acked-by: Andrew Morton Signed-off-by: Richard Weinberger --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index cf3d0d673f6b..bd58ee018555 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -221,6 +221,9 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) +/* to align the pointer to the (prev) page boundary */ +#define PAGE_ALIGN_DOWN(addr) ALIGN_DOWN(addr, PAGE_SIZE) + /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE) -- cgit v1.2.3 From 6077c943beee407168f72ece745b0aeaef6b896f Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Fri, 15 Jul 2022 10:05:08 -0500 Subject: mm: rename is_pinnable_page() to is_longterm_pinnable_page() Patch series "Add MEMORY_DEVICE_COHERENT for coherent device memory mapping", v9. This patch series introduces MEMORY_DEVICE_COHERENT, a type of memory owned by a device that can be mapped into CPU page tables like MEMORY_DEVICE_GENERIC and can also be migrated like MEMORY_DEVICE_PRIVATE. This patch series is mostly self-contained except for a few places where it needs to update other subsystems to handle the new memory type. System stability and performance are not affected according to our ongoing testing, including xfstests. How it works: The system BIOS advertises the GPU device memory (aka VRAM) as SPM (special purpose memory) in the UEFI system address map. The amdgpu driver registers the memory with devmap as MEMORY_DEVICE_COHERENT using devm_memremap_pages. The initial user for this hardware page migration capability is the Frontier supercomputer project. This functionality is not AMD-specific. We expect other GPU vendors to find this functionality useful, and possibly other hardware types in the future. Our test nodes in the lab are similar to the Frontier configuration, with .5 TB of system memory plus 256 GB of device memory split across 4 GPUs, all in a single coherent address space. Page migration is expected to improve application efficiency significantly. We will report empirical results as they become available. Coherent device type pages at gup are now migrated back to system memory if they are being pinned long-term (FOLL_LONGTERM). The reason is, that long-term pinning would interfere with the device memory manager owning the device-coherent pages (e.g. evictions in TTM). These series incorporate Alistair Popple patches to do this migration from pin_user_pages() calls. hmm_gup_test has been added to hmm-test to test different get user pages calls. This series includes handling of device-managed anonymous pages returned by vm_normal_pages. Although they behave like normal pages for purposes of mapping in CPU page tables and for COW, they do not support LRU lists, NUMA migration or THP. We also introduced a FOLL_LRU flag that adds the same behaviour to follow_page and related APIs, to allow callers to specify that they expect to put pages on an LRU list. This patch (of 14): is_pinnable_page() and folio_is_pinnable() are renamed to is_longterm_pinnable_page() and folio_is_longterm_pinnable() respectively. These functions are used in the FOLL_LONGTERM flag context. Link: https://lkml.kernel.org/r/20220715150521.18165-1-alex.sierra@amd.com Link: https://lkml.kernel.org/r/20220715150521.18165-2-alex.sierra@amd.com Signed-off-by: Alex Sierra Reviewed-by: David Hildenbrand Cc: Jason Gunthorpe Cc: Felix Kuehling Cc: Ralph Campbell Cc: Christoph Hellwig Cc: Jerome Glisse Cc: Alistair Popple Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/mm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9cc02a7e503b..3c044e38958c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1607,7 +1607,7 @@ static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */ #ifdef CONFIG_MIGRATION -static inline bool is_pinnable_page(struct page *page) +static inline bool is_longterm_pinnable_page(struct page *page) { #ifdef CONFIG_CMA int mt = get_pageblock_migratetype(page); @@ -1618,15 +1618,15 @@ static inline bool is_pinnable_page(struct page *page) return !is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page)); } #else -static inline bool is_pinnable_page(struct page *page) +static inline bool is_longterm_pinnable_page(struct page *page) { return true; } #endif -static inline bool folio_is_pinnable(struct folio *folio) +static inline bool folio_is_longterm_pinnable(struct folio *folio) { - return is_pinnable_page(&folio->page); + return is_longterm_pinnable_page(&folio->page); } static inline void set_page_zone(struct page *page, enum zone_type zone) -- cgit v1.2.3 From 5bb88dc571b1cbf0284100a317fb21ab7d03e40c Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Fri, 15 Jul 2022 10:05:09 -0500 Subject: mm: move page zone helpers from mm.h to mmzone.h It makes more sense to have these helpers in zone specific header file, rather than the generic mm.h Link: https://lkml.kernel.org/r/20220715150521.18165-3-alex.sierra@amd.com Signed-off-by: Alex Sierra Cc: Alistair Popple Cc: Christoph Hellwig Cc: David Hildenbrand Cc: Felix Kuehling Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: Matthew Wilcox Cc: Ralph Campbell Signed-off-by: Andrew Morton --- include/linux/memremap.h | 2 +- include/linux/mm.h | 78 ---------------------------------------------- include/linux/mmzone.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 79 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 9f5ee49482de..732dde5988fb 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -2,7 +2,7 @@ #ifndef _LINUX_MEMREMAP_H_ #define _LINUX_MEMREMAP_H_ -#include +#include #include #include #include diff --git a/include/linux/mm.h b/include/linux/mm.h index 3c044e38958c..a2d01e49253b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1045,84 +1045,6 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); * back into memory. */ -/* - * The zone field is never updated after free_area_init_core() - * sets it, so none of the operations on it need to be atomic. - */ - -/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ -#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) -#define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) -#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) -#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) -#define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) - -/* - * Define the bit shifts to access each section. For non-existent - * sections we define the shift as 0; that plus a 0 mask ensures - * the compiler will optimise away reference to them. - */ -#define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) -#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) -#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) -#define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0)) -#define KASAN_TAG_PGSHIFT (KASAN_TAG_PGOFF * (KASAN_TAG_WIDTH != 0)) - -/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ -#ifdef NODE_NOT_IN_PAGE_FLAGS -#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) -#define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF)? \ - SECTIONS_PGOFF : ZONES_PGOFF) -#else -#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT) -#define ZONEID_PGOFF ((NODES_PGOFF < ZONES_PGOFF)? \ - NODES_PGOFF : ZONES_PGOFF) -#endif - -#define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0)) - -#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) -#define NODES_MASK ((1UL << NODES_WIDTH) - 1) -#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) -#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_SHIFT) - 1) -#define KASAN_TAG_MASK ((1UL << KASAN_TAG_WIDTH) - 1) -#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) - -static inline enum zone_type page_zonenum(const struct page *page) -{ - ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); - return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; -} - -static inline enum zone_type folio_zonenum(const struct folio *folio) -{ - return page_zonenum(&folio->page); -} - -#ifdef CONFIG_ZONE_DEVICE -static inline bool is_zone_device_page(const struct page *page) -{ - return page_zonenum(page) == ZONE_DEVICE; -} -extern void memmap_init_zone_device(struct zone *, unsigned long, - unsigned long, struct dev_pagemap *); -#else -static inline bool is_zone_device_page(const struct page *page) -{ - return false; -} -#endif - -static inline bool folio_is_zone_device(const struct folio *folio) -{ - return is_zone_device_page(&folio->page); -} - -static inline bool is_zone_movable_page(const struct page *page) -{ - return page_zonenum(page) == ZONE_MOVABLE; -} - #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) DECLARE_STATIC_KEY_FALSE(devmap_managed_key); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 735bf5b37949..5da1135e6755 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -730,6 +730,86 @@ static inline bool zone_is_empty(struct zone *zone) return zone->spanned_pages == 0; } +#ifndef BUILD_VDSO32_64 +/* + * The zone field is never updated after free_area_init_core() + * sets it, so none of the operations on it need to be atomic. + */ + +/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ +#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) +#define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) +#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) +#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) +#define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) + +/* + * Define the bit shifts to access each section. For non-existent + * sections we define the shift as 0; that plus a 0 mask ensures + * the compiler will optimise away reference to them. + */ +#define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) +#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) +#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) +#define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0)) +#define KASAN_TAG_PGSHIFT (KASAN_TAG_PGOFF * (KASAN_TAG_WIDTH != 0)) + +/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ +#ifdef NODE_NOT_IN_PAGE_FLAGS +#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) +#define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF) ? \ + SECTIONS_PGOFF : ZONES_PGOFF) +#else +#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT) +#define ZONEID_PGOFF ((NODES_PGOFF < ZONES_PGOFF) ? \ + NODES_PGOFF : ZONES_PGOFF) +#endif + +#define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0)) + +#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) +#define NODES_MASK ((1UL << NODES_WIDTH) - 1) +#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) +#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_SHIFT) - 1) +#define KASAN_TAG_MASK ((1UL << KASAN_TAG_WIDTH) - 1) +#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) + +static inline enum zone_type page_zonenum(const struct page *page) +{ + ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); + return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; +} + +static inline enum zone_type folio_zonenum(const struct folio *folio) +{ + return page_zonenum(&folio->page); +} + +#ifdef CONFIG_ZONE_DEVICE +static inline bool is_zone_device_page(const struct page *page) +{ + return page_zonenum(page) == ZONE_DEVICE; +} +extern void memmap_init_zone_device(struct zone *, unsigned long, + unsigned long, struct dev_pagemap *); +#else +static inline bool is_zone_device_page(const struct page *page) +{ + return false; +} +#endif + +static inline bool folio_is_zone_device(const struct folio *folio) +{ + return is_zone_device_page(&folio->page); +} + +static inline bool is_zone_movable_page(const struct page *page) +{ + return page_zonenum(page) == ZONE_MOVABLE; +} +#endif + /* * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty * intersection with the given zone -- cgit v1.2.3 From f25cbb7a95a24ff9a2a3bebd308e303942ae6b2c Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Fri, 15 Jul 2022 10:05:10 -0500 Subject: mm: add zone device coherent type memory support Device memory that is cache coherent from device and CPU point of view. This is used on platforms that have an advanced system bus (like CAPI or CXL). Any page of a process can be migrated to such memory. However, no one should be allowed to pin such memory so that it can always be evicted. [hch@lst.de: rebased ontop of the refcount changes, remove is_dev_private_or_coherent_page] Link: https://lkml.kernel.org/r/20220715150521.18165-4-alex.sierra@amd.com Signed-off-by: Alex Sierra Signed-off-by: Christoph Hellwig Acked-by: Felix Kuehling Reviewed-by: Alistair Popple Acked-by: David Hildenbrand Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: Matthew Wilcox Cc: Ralph Campbell Signed-off-by: Andrew Morton --- include/linux/memremap.h | 19 +++++++++++++++++++ include/linux/mm.h | 5 ++++- 2 files changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 732dde5988fb..09320b7f706c 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -41,6 +41,13 @@ struct vmem_altmap { * A more complete discussion of unaddressable memory may be found in * include/linux/hmm.h and Documentation/mm/hmm.rst. * + * MEMORY_DEVICE_COHERENT: + * Device memory that is cache coherent from device and CPU point of view. This + * is used on platforms that have an advanced system bus (like CAPI or CXL). A + * driver can hotplug the device memory using ZONE_DEVICE and with that memory + * type. Any page of a process can be migrated to such memory. However no one + * should be allowed to pin such memory so that it can always be evicted. + * * MEMORY_DEVICE_FS_DAX: * Host memory that has similar access semantics as System RAM i.e. DMA * coherent and supports page pinning. In support of coordinating page @@ -61,6 +68,7 @@ struct vmem_altmap { enum memory_type { /* 0 is reserved to catch uninitialized type fields */ MEMORY_DEVICE_PRIVATE = 1, + MEMORY_DEVICE_COHERENT, MEMORY_DEVICE_FS_DAX, MEMORY_DEVICE_GENERIC, MEMORY_DEVICE_PCI_P2PDMA, @@ -150,6 +158,17 @@ static inline bool is_pci_p2pdma_page(const struct page *page) page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; } +static inline bool is_device_coherent_page(const struct page *page) +{ + return is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_COHERENT; +} + +static inline bool folio_is_device_coherent(const struct folio *folio) +{ + return is_device_coherent_page(&folio->page); +} + #ifdef CONFIG_ZONE_DEVICE void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); diff --git a/include/linux/mm.h b/include/linux/mm.h index a2d01e49253b..64393ed3330a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -28,6 +28,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -1537,7 +1538,9 @@ static inline bool is_longterm_pinnable_page(struct page *page) if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE) return false; #endif - return !is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page)); + return !(is_device_coherent_page(page) || + is_zone_movable_page(page) || + is_zero_pfn(page_to_pfn(page))); } #else static inline bool is_longterm_pinnable_page(struct page *page) -- cgit v1.2.3 From dd19e6d8ffaa1289d75d7833de97faf1b6b2c8e4 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Fri, 15 Jul 2022 10:05:12 -0500 Subject: mm: add device coherent vma selection for memory migration This case is used to migrate pages from device memory, back to system memory. Device coherent type memory is cache coherent from device and CPU point of view. Link: https://lkml.kernel.org/r/20220715150521.18165-6-alex.sierra@amd.com Signed-off-by: Alex Sierra Signed-off-by: Christoph Hellwig Acked-by: Felix Kuehling Reviewed-by: Alistair Poppple Reviewed-by: David Hildenbrand Cc: Jason Gunthorpe Cc: Jerome Glisse Cc: Matthew Wilcox Cc: Ralph Campbell Signed-off-by: Andrew Morton --- include/linux/migrate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 069a89e847f3..b84908debe5c 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -148,6 +148,7 @@ static inline unsigned long migrate_pfn(unsigned long pfn) enum migrate_vma_direction { MIGRATE_VMA_SELECT_SYSTEM = 1 << 0, MIGRATE_VMA_SELECT_DEVICE_PRIVATE = 1 << 1, + MIGRATE_VMA_SELECT_DEVICE_COHERENT = 1 << 2, }; struct migrate_vma { -- cgit v1.2.3 From 8012b866085523758780850087102421dbcce522 Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Fri, 3 Jun 2022 13:37:25 +0800 Subject: dax: introduce holder for dax_device Patch series "v14 fsdax-rmap + v11 fsdax-reflink", v2. The patchset fsdax-rmap is aimed to support shared pages tracking for fsdax. It moves owner tracking from dax_assocaite_entry() to pmem device driver, by introducing an interface ->memory_failure() for struct pagemap. This interface is called by memory_failure() in mm, and implemented by pmem device. Then call holder operations to find the filesystem which the corrupted data located in, and call filesystem handler to track files or metadata associated with this page. Finally we are able to try to fix the corrupted data in filesystem and do other necessary processing, such as killing processes who are using the files affected. The call trace is like this: memory_failure() |* fsdax case |------------ |pgmap->ops->memory_failure() => pmem_pgmap_memory_failure() | dax_holder_notify_failure() => | dax_device->holder_ops->notify_failure() => | - xfs_dax_notify_failure() | |* xfs_dax_notify_failure() | |-------------------------- | | xfs_rmap_query_range() | | xfs_dax_failure_fn() | | * corrupted on metadata | | try to recover data, call xfs_force_shutdown() | | * corrupted on file data | | try to recover data, call mf_dax_kill_procs() |* normal case |------------- |mf_generic_kill_procs() The patchset fsdax-reflink attempts to add CoW support for fsdax, and takes XFS, which has both reflink and fsdax features, as an example. One of the key mechanisms needed to be implemented in fsdax is CoW. Copy the data from srcmap before we actually write data to the destination iomap. And we just copy range in which data won't be changed. Another mechanism is range comparison. In page cache case, readpage() is used to load data on disk to page cache in order to be able to compare data. In fsdax case, readpage() does not work. So, we need another compare data with direct access support. With the two mechanisms implemented in fsdax, we are able to make reflink and fsdax work together in XFS. This patch (of 14): To easily track filesystem from a pmem device, we introduce a holder for dax_device structure, and also its operation. This holder is used to remember who is using this dax_device: - When it is the backend of a filesystem, the holder will be the instance of this filesystem. - When this pmem device is one of the targets in a mapped device, the holder will be this mapped device. In this case, the mapped device has its own dax_device and it will follow the first rule. So that we can finally track to the filesystem we needed. The holder and holder_ops will be set when filesystem is being mounted, or an target device is being activated. Link: https://lkml.kernel.org/r/20220603053738.1218681-1-ruansy.fnst@fujitsu.com Link: https://lkml.kernel.org/r/20220603053738.1218681-2-ruansy.fnst@fujitsu.com Signed-off-by: Shiyang Ruan Reviewed-by: Christoph Hellwig Reviewed-by: Dan Williams Reviewed-by: Darrick J. Wong Cc: Dave Chinner Cc: Jane Chu Cc: Goldwyn Rodrigues Cc: Al Viro Cc: Matthew Wilcox Cc: Naoya Horiguchi Cc: Miaohe Lin Cc: Dan Williams Cc: Goldwyn Rodrigues Cc: Ritesh Harjani Signed-off-by: Andrew Morton --- include/linux/dax.h | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index e7b81634c52a..cf85fc36da5f 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -43,8 +43,21 @@ struct dax_operations { void *addr, size_t bytes, struct iov_iter *iter); }; +struct dax_holder_operations { + /* + * notify_failure - notify memory failure into inner holder device + * @dax_dev: the dax device which contains the holder + * @offset: offset on this dax device where memory failure occurs + * @len: length of this memory failure event + * @flags: action flags for memory failure handler + */ + int (*notify_failure)(struct dax_device *dax_dev, u64 offset, + u64 len, int mf_flags); +}; + #if IS_ENABLED(CONFIG_DAX) struct dax_device *alloc_dax(void *private, const struct dax_operations *ops); +void *dax_holder(struct dax_device *dax_dev); void put_dax(struct dax_device *dax_dev); void kill_dax(struct dax_device *dax_dev); void dax_write_cache(struct dax_device *dax_dev, bool wc); @@ -66,6 +79,10 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, return dax_synchronous(dax_dev); } #else +static inline void *dax_holder(struct dax_device *dax_dev) +{ + return NULL; +} static inline struct dax_device *alloc_dax(void *private, const struct dax_operations *ops) { @@ -114,12 +131,9 @@ struct writeback_control; #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); void dax_remove_host(struct gendisk *disk); -struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, - u64 *start_off); -static inline void fs_put_dax(struct dax_device *dax_dev) -{ - put_dax(dax_dev); -} +struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off, + void *holder, const struct dax_holder_operations *ops); +void fs_put_dax(struct dax_device *dax_dev, void *holder); #else static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk) { @@ -129,11 +143,12 @@ static inline void dax_remove_host(struct gendisk *disk) { } static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, - u64 *start_off) + u64 *start_off, void *holder, + const struct dax_holder_operations *ops) { return NULL; } -static inline void fs_put_dax(struct dax_device *dax_dev) +static inline void fs_put_dax(struct dax_device *dax_dev, void *holder) { } #endif /* CONFIG_BLOCK && CONFIG_FS_DAX */ @@ -203,6 +218,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, size_t nr_pages); +int dax_holder_notify_failure(struct dax_device *dax_dev, u64 off, u64 len, + int mf_flags); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, -- cgit v1.2.3 From 33a8f7f2b3a3437d016d1b4047a4fd37eb6951b3 Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Fri, 3 Jun 2022 13:37:27 +0800 Subject: pagemap,pmem: introduce ->memory_failure() When memory-failure occurs, we call this function which is implemented by each kind of devices. For the fsdax case, pmem device driver implements it. Pmem device driver will find out the filesystem in which the corrupted page located in. With dax_holder notify support, we are able to notify the memory failure from pmem driver to upper layers. If there is something not support in the notify routine, memory_failure will fall back to the generic hanlder. Link: https://lkml.kernel.org/r/20220603053738.1218681-4-ruansy.fnst@fujitsu.com Signed-off-by: Shiyang Ruan Reviewed-by: Christoph Hellwig Reviewed-by: Dan Williams Reviewed-by: Darrick J. Wong Reviewed-by: Naoya Horiguchi Cc: Al Viro Cc: Dan Williams Cc: Dave Chinner Cc: Goldwyn Rodrigues Cc: Goldwyn Rodrigues Cc: Jane Chu Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Ritesh Harjani Signed-off-by: Andrew Morton --- include/linux/memremap.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 09320b7f706c..19010491a603 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -87,6 +87,18 @@ struct dev_pagemap_ops { * the page back to a CPU accessible page. */ vm_fault_t (*migrate_to_ram)(struct vm_fault *vmf); + + /* + * Handle the memory failure happens on a range of pfns. Notify the + * processes who are using these pfns, and try to recover the data on + * them if necessary. The mf_flags is finally passed to the recover + * function through the whole notify routine. + * + * When this is not implemented, or it returns -EOPNOTSUPP, the caller + * will fall back to a common handler called mf_generic_kill_procs(). + */ + int (*memory_failure)(struct dev_pagemap *pgmap, unsigned long pfn, + unsigned long nr_pages, int mf_flags); }; #define PGMAP_ALTMAP_VALID (1 << 0) -- cgit v1.2.3 From 2f437effc689ef913fbe5e31110580b4e7cf04be Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Fri, 3 Jun 2022 13:37:28 +0800 Subject: fsdax: introduce dax_lock_mapping_entry() The current dax_lock_page() locks dax entry by obtaining mapping and index in page. To support 1-to-N RMAP in NVDIMM, we need a new function to lock a specific dax entry corresponding to this file's mapping,index. And output the page corresponding to the specific dax entry for caller use. Link: https://lkml.kernel.org/r/20220603053738.1218681-5-ruansy.fnst@fujitsu.com Signed-off-by: Shiyang Ruan Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Cc: Al Viro Cc: Dan Williams Cc: Dan Williams Cc: Dave Chinner Cc: Goldwyn Rodrigues Cc: Goldwyn Rodrigues Cc: Jane Chu Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Naoya Horiguchi Cc: Ritesh Harjani Signed-off-by: Andrew Morton --- include/linux/dax.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index cf85fc36da5f..7116681b48c0 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -161,6 +161,10 @@ struct page *dax_layout_busy_page(struct address_space *mapping); struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); dax_entry_t dax_lock_page(struct page *page); void dax_unlock_page(struct page *page, dax_entry_t cookie); +dax_entry_t dax_lock_mapping_entry(struct address_space *mapping, + unsigned long index, struct page **page); +void dax_unlock_mapping_entry(struct address_space *mapping, + unsigned long index, dax_entry_t cookie); #else static inline struct page *dax_layout_busy_page(struct address_space *mapping) { @@ -188,6 +192,17 @@ static inline dax_entry_t dax_lock_page(struct page *page) static inline void dax_unlock_page(struct page *page, dax_entry_t cookie) { } + +static inline dax_entry_t dax_lock_mapping_entry(struct address_space *mapping, + unsigned long index, struct page **page) +{ + return 0; +} + +static inline void dax_unlock_mapping_entry(struct address_space *mapping, + unsigned long index, dax_entry_t cookie) +{ +} #endif int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, -- cgit v1.2.3 From c36e2024957120566efd99395b5c8cc95b5175c1 Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Fri, 3 Jun 2022 13:37:29 +0800 Subject: mm: introduce mf_dax_kill_procs() for fsdax case This new function is a variant of mf_generic_kill_procs that accepts a file, offset pair instead of a struct to support multiple files sharing a DAX mapping. It is intended to be called by the file systems as part of the memory_failure handler after the file system performed a reverse mapping from the storage address to the file and file offset. Link: https://lkml.kernel.org/r/20220603053738.1218681-6-ruansy.fnst@fujitsu.com Signed-off-by: Shiyang Ruan Reviewed-by: Dan Williams Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Miaohe Lin Cc: Al Viro Cc: Dan Williams Cc: Dave Chinner Cc: Goldwyn Rodrigues Cc: Goldwyn Rodrigues Cc: Jane Chu Cc: Matthew Wilcox Cc: Naoya Horiguchi Cc: Ritesh Harjani Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 64393ed3330a..d4ebfc206e2b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3178,6 +3178,8 @@ enum mf_flags { MF_UNPOISON = 1 << 4, MF_SW_SIMULATED = 1 << 5, }; +int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, + unsigned long count, int mf_flags); extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue(unsigned long pfn, int flags); extern void memory_failure_queue_kick(int cpu); -- cgit v1.2.3 From 6061b69b9a550a2ab84e805d0d2315ba6215f112 Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Fri, 3 Jun 2022 13:37:31 +0800 Subject: fsdax: set a CoW flag when associate reflink mappings Introduce a PAGE_MAPPING_DAX_COW flag to support association with CoW file mappings. In this case, since the dax-rmap has already took the responsibility to look up for shared files by given dax page, the page->mapping is no longer to used for rmap but for marking that this dax page is shared. And to make sure disassociation works fine, we use page->index as refcount, and clear page->mapping to the initial state when page->index is decreased to 0. With the help of this new flag, it is able to distinguish normal case and CoW case, and keep the warning in normal case. Link: https://lkml.kernel.org/r/20220603053738.1218681-8-ruansy.fnst@fujitsu.com Signed-off-by: Shiyang Ruan Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Cc: Al Viro Cc: Dan Williams Cc: Dan Williams Cc: Dave Chinner Cc: Goldwyn Rodrigues Cc: Goldwyn Rodrigues Cc: Jane Chu Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Naoya Horiguchi Cc: Ritesh Harjani Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 82719d33c0f1..f2ff65f1bf83 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -661,6 +661,12 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) #define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) #define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) +/* + * Different with flags above, this flag is used only for fsdax mode. It + * indicates that this page->mapping is now under reflink case. + */ +#define PAGE_MAPPING_DAX_COW 0x1 + static __always_inline bool folio_mapping_flags(struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0; -- cgit v1.2.3 From 6f7db3894ae23eb5d40af4efb404aa0c072a68d2 Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Fri, 3 Jun 2022 13:37:36 +0800 Subject: fsdax: dedup file range to use a compare function With dax we cannot deal with readpage() etc. So, we create a dax comparison function which is similar with vfs_dedupe_file_range_compare(). And introduce dax_remap_file_range_prep() for filesystem use. Link: https://lkml.kernel.org/r/20220603053738.1218681-13-ruansy.fnst@fujitsu.com Signed-off-by: Goldwyn Rodrigues Signed-off-by: Shiyang Ruan Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Cc: Al Viro Cc: Dan Williams Cc: Dan Williams Cc: Dave Chinner Cc: Goldwyn Rodrigues Cc: Jane Chu Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Naoya Horiguchi Cc: Ritesh Harjani Signed-off-by: Andrew Morton --- include/linux/dax.h | 8 ++++++++ include/linux/fs.h | 12 ++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 7116681b48c0..ba985333e26b 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -246,6 +246,14 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); +int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, + struct inode *dest, loff_t destoff, + loff_t len, bool *is_same, + const struct iomap_ops *ops); +int dax_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags, + const struct iomap_ops *ops); static inline bool dax_mapping(struct address_space *mapping) { return mapping->host && IS_DAX(mapping->host); diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ad5e3520fae..134e9d7ad5d6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -74,6 +74,7 @@ struct fsverity_operations; struct fs_context; struct fs_parameter_spec; struct fileattr; +struct iomap_ops; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -2070,10 +2071,13 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags); -extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t *count, - unsigned int remap_flags); +int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags, + const struct iomap_ops *dax_read_ops); +int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *count, unsigned int remap_flags); extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); -- cgit v1.2.3 From 4fa6893faeaaea4fe4440512d2a708527ef47051 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 16 Jun 2022 10:48:35 -0700 Subject: mm: thp: consolidate vma size check to transhuge_vma_suitable There are couple of places that check whether the vma size is ok for THP or whether address fits, they are open coded and duplicate, use transhuge_vma_suitable() to do the job by passing in (vma->end - HPAGE_PMD_SIZE). Move vma size check into hugepage_vma_check(). This will make khugepaged_enter() is as same as khugepaged_enter_vma(). There is just one caller for khugepaged_enter(), replace it to khugepaged_enter_vma() and remove khugepaged_enter(). Link: https://lkml.kernel.org/r/20220616174840.1202070-3-shy828301@gmail.com Signed-off-by: Yang Shi Reviewed-by: Zach O'Keefe Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 11 +++++++++++ include/linux/khugepaged.h | 14 -------------- 2 files changed, 11 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 648cb3ce7099..8a5a8bfce0f5 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -116,6 +116,17 @@ extern struct kobj_attribute shmem_enabled_attr; extern unsigned long transparent_hugepage_flags; +/* + * Do the below checks: + * - For file vma, check if the linear page offset of vma is + * HPAGE_PMD_NR aligned within the file. The hugepage is + * guaranteed to be hugepage-aligned within the file, but we must + * check that the PMD-aligned addresses in the VMA map to + * PMD-aligned offsets within the file, else the hugepage will + * not be PMD-mappable. + * - For all vmas, check if the haddr is in an aligned HPAGE_PMD_SIZE + * area. + */ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, unsigned long addr) { diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 392d34c3c59a..31ca8a7f78f4 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -51,16 +51,6 @@ static inline void khugepaged_exit(struct mm_struct *mm) if (test_bit(MMF_VM_HUGEPAGE, &mm->flags)) __khugepaged_exit(mm); } - -static inline void khugepaged_enter(struct vm_area_struct *vma, - unsigned long vm_flags) -{ - if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) && - khugepaged_enabled()) { - if (hugepage_vma_check(vma, vm_flags)) - __khugepaged_enter(vma->vm_mm); - } -} #else /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) { @@ -68,10 +58,6 @@ static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm static inline void khugepaged_exit(struct mm_struct *mm) { } -static inline void khugepaged_enter(struct vm_area_struct *vma, - unsigned long vm_flags) -{ -} static inline void khugepaged_enter_vma(struct vm_area_struct *vma, unsigned long vm_flags) { -- cgit v1.2.3 From 9fec51689ff60d9766b38051a0b1692f93d95364 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 16 Jun 2022 10:48:37 -0700 Subject: mm: thp: kill transparent_hugepage_active() The transparent_hugepage_active() was introduced to show THP eligibility bit in smaps in proc, smaps is the only user. But it actually does the similar check as hugepage_vma_check() which is used by khugepaged. We definitely don't have to maintain two similar checks, so kill transparent_hugepage_active(). This patch also fixed the wrong behavior for VM_NO_KHUGEPAGED vmas. Also move hugepage_vma_check() to huge_memory.c and huge_mm.h since it is not only for khugepaged anymore. [akpm@linux-foundation.org: check vma->vm_mm, per Zach] [akpm@linux-foundation.org: add comment to vdso check] Link: https://lkml.kernel.org/r/20220616174840.1202070-5-shy828301@gmail.com Signed-off-by: Yang Shi Reviewed-by: Zach O'Keefe Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 16 ++++++++++------ include/linux/khugepaged.h | 2 -- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 8a5a8bfce0f5..64487bcd0c7b 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -202,7 +202,9 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma) !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode); } -bool transparent_hugepage_active(struct vm_area_struct *vma); +bool hugepage_vma_check(struct vm_area_struct *vma, + unsigned long vm_flags, + bool smaps); #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ @@ -351,11 +353,6 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) return false; } -static inline bool transparent_hugepage_active(struct vm_area_struct *vma) -{ - return false; -} - static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, unsigned long addr) { @@ -368,6 +365,13 @@ static inline bool transhuge_vma_enabled(struct vm_area_struct *vma, return false; } +static inline bool hugepage_vma_check(struct vm_area_struct *vma, + unsigned long vm_flags, + bool smaps) +{ + return false; +} + static inline void prep_transhuge_page(struct page *page) {} #define transparent_hugepage_flags 0UL diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 31ca8a7f78f4..ea5fd4c398f7 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -10,8 +10,6 @@ extern struct attribute_group khugepaged_attr_group; extern int khugepaged_init(void); extern void khugepaged_destroy(void); extern int start_stop_khugepaged(void); -extern bool hugepage_vma_check(struct vm_area_struct *vma, - unsigned long vm_flags); extern void __khugepaged_enter(struct mm_struct *mm); extern void __khugepaged_exit(struct mm_struct *mm); extern void khugepaged_enter_vma(struct vm_area_struct *vma, -- cgit v1.2.3 From 7da4e2cb8b1ff8221759bfc7512d651ee69516dc Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 16 Jun 2022 10:48:38 -0700 Subject: mm: thp: kill __transhuge_page_enabled() The page fault path checks THP eligibility with __transhuge_page_enabled() which does the similar thing as hugepage_vma_check(), so use hugepage_vma_check() instead. However page fault allows DAX and !anon_vma cases, so added a new flag, in_pf, to hugepage_vma_check() to make page fault work correctly. The in_pf flag is also used to skip shmem and file THP for page fault since shmem handles THP in its own shmem_fault() and file THP allocation on fault is not supported yet. Also remove hugepage_vma_enabled() since hugepage_vma_check() is the only caller now, it is not necessary to have a helper function. Link: https://lkml.kernel.org/r/20220616174840.1202070-6-shy828301@gmail.com Signed-off-by: Yang Shi Reviewed-by: Zach O'Keefe Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 57 ++----------------------------------------------- 1 file changed, 2 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 64487bcd0c7b..cd8a6c5d9fe5 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -146,48 +146,6 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, return true; } -static inline bool transhuge_vma_enabled(struct vm_area_struct *vma, - unsigned long vm_flags) -{ - /* Explicitly disabled through madvise. */ - if ((vm_flags & VM_NOHUGEPAGE) || - test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) - return false; - return true; -} - -/* - * to be used on vmas which are known to support THP. - * Use transparent_hugepage_active otherwise - */ -static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) -{ - - /* - * If the hardware/firmware marked hugepage support disabled. - */ - if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_NEVER_DAX)) - return false; - - if (!transhuge_vma_enabled(vma, vma->vm_flags)) - return false; - - if (vma_is_temporary_stack(vma)) - return false; - - if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG)) - return true; - - if (vma_is_dax(vma)) - return true; - - if (transparent_hugepage_flags & - (1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)) - return !!(vma->vm_flags & VM_HUGEPAGE); - - return false; -} - static inline bool file_thp_enabled(struct vm_area_struct *vma) { struct inode *inode; @@ -204,7 +162,7 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma) bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, - bool smaps); + bool smaps, bool in_pf); #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ @@ -348,26 +306,15 @@ static inline bool folio_test_pmd_mappable(struct folio *folio) return false; } -static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) -{ - return false; -} - static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, unsigned long addr) { return false; } -static inline bool transhuge_vma_enabled(struct vm_area_struct *vma, - unsigned long vm_flags) -{ - return false; -} - static inline bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, - bool smaps) + bool smaps, bool in_pf) { return false; } -- cgit v1.2.3 From 1064026bab9f011bdea1251d44d66bbbcee04f6e Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 16 Jun 2022 10:48:39 -0700 Subject: mm: khugepaged: reorg some khugepaged helpers The khugepaged_{enabled|always|req_madv} are not khugepaged only anymore, move them to huge_mm.h and rename to hugepage_flags_xxx, and remove khugepaged_req_madv due to no users. Also move khugepaged_defrag to khugepaged.c since its only caller is in that file, it doesn't have to be in a header file. Link: https://lkml.kernel.org/r/20220616174840.1202070-7-shy828301@gmail.com Signed-off-by: Yang Shi Reviewed-by: Zach O'Keefe Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 8 ++++++++ include/linux/khugepaged.h | 14 -------------- 2 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index cd8a6c5d9fe5..ae3d8e2fd9e2 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -116,6 +116,14 @@ extern struct kobj_attribute shmem_enabled_attr; extern unsigned long transparent_hugepage_flags; +#define hugepage_flags_enabled() \ + (transparent_hugepage_flags & \ + ((1<flags)) -- cgit v1.2.3 From e95a9851787bbb3cd4deb40fe8bab03f731852d1 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 21 Jun 2022 16:56:17 -0700 Subject: hugetlb: skip to end of PT page mapping when pte not present Patch series "hugetlb: speed up linear address scanning", v2. At unmap, fork and remap time hugetlb address ranges are linearly scanned. We can optimize these scans if the ranges are sparsely populated. Also, enable page table "Lazy copy" for hugetlb at fork. NOTE: Architectures not defining CONFIG_ARCH_WANT_GENERAL_HUGETLB need to add an arch specific version hugetlb_mask_last_page() to take advantage of sparse address scanning improvements. Baolin Wang added the routine for arm64. Other architectures which could be optimized are: ia64, mips, parisc, powerpc, s390, sh and sparc. This patch (of 4): HugeTLB address ranges are linearly scanned during fork, unmap and remap operations. If a non-present entry is encountered, the code currently continues to the next huge page aligned address. However, a non-present entry implies that the page table page for that entry is not present. Therefore, the linear scan can skip to the end of range mapped by the page table page. This can speed operations on large sparsely populated hugetlb mappings. Create a new routine hugetlb_mask_last_page() that will return an address mask. When the mask is ORed with an address, the result will be the address of the last huge page mapped by the associated page table page. Use this mask to update addresses in routines which linearly scan hugetlb address ranges when a non-present pte is encountered. hugetlb_mask_last_page is related to the implementation of huge_pte_offset as hugetlb_mask_last_page is called when huge_pte_offset returns NULL. This patch only provides a complete hugetlb_mask_last_page implementation when CONFIG_ARCH_WANT_GENERAL_HUGETLB is defined. Architectures which provide their own versions of huge_pte_offset can also provide their own version of hugetlb_mask_last_page. Link: https://lkml.kernel.org/r/20220621235620.291305-1-mike.kravetz@oracle.com Link: https://lkml.kernel.org/r/20220621235620.291305-2-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Tested-by: Baolin Wang Reviewed-by: Baolin Wang Acked-by: Muchun Song Reported-by: kernel test robot Cc: Michal Hocko Cc: Peter Xu Cc: Naoya Horiguchi Cc: James Houghton Cc: Mina Almasry Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Cc: Paul Walmsley Cc: Christian Borntraeger Cc: Catalin Marinas Cc: Will Deacon Cc: Rolf Eike Beer Cc: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c6cccfaf8708..ce30fad5fd13 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -194,6 +194,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long sz); pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); +unsigned long hugetlb_mask_last_page(struct hstate *h); int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long *addr, pte_t *ptep); void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, -- cgit v1.2.3 From 4ddb4d91b82f4b64458fe35bc8e395c7c082ea2b Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 21 Jun 2022 16:56:19 -0700 Subject: hugetlb: do not update address in huge_pmd_unshare As an optimization for loops sequentially processing hugetlb address ranges, huge_pmd_unshare would update a passed address if it unshared a pmd. Updating a loop control variable outside the loop like this is generally a bad idea. These loops are now using hugetlb_mask_last_page to optimize scanning when non-present ptes are discovered. The same can be done when huge_pmd_unshare returns 1 indicating a pmd was unshared. Remove address update from huge_pmd_unshare. Change the passed argument type and update all callers. In loops sequentially processing addresses use hugetlb_mask_last_page to update address if pmd is unshared. [sfr@canb.auug.org.au: fix an unused variable warning/error] Link: https://lkml.kernel.org/r/20220622171117.70850960@canb.auug.org.au Link: https://lkml.kernel.org/r/20220621235620.291305-4-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Signed-off-by: Stephen Rothwell Acked-by: Muchun Song Reviewed-by: Baolin Wang Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Christian Borntraeger Cc: David Hildenbrand Cc: James Houghton Cc: kernel test robot Cc: Michal Hocko Cc: Mina Almasry Cc: Naoya Horiguchi Cc: Paul Walmsley Cc: Peter Xu Cc: Rolf Eike Beer Cc: Will Deacon Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ce30fad5fd13..75ee739d815b 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -196,7 +196,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); unsigned long hugetlb_mask_last_page(struct hstate *h); int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long *addr, pte_t *ptep); + unsigned long addr, pte_t *ptep); void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, @@ -243,7 +243,7 @@ static inline struct address_space *hugetlb_page_mapping_lock_write( static inline int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long *addr, pte_t *ptep) + unsigned long addr, pte_t *ptep) { return 0; } -- cgit v1.2.3 From bf75f200569dd05ac2112797f44548beb6b4be26 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 24 Jun 2022 13:54:17 +0100 Subject: mm/page_alloc: add page->buddy_list and page->pcp_list Patch series "Drain remote per-cpu directly", v5. Some setups, notably NOHZ_FULL CPUs, may be running realtime or latency-sensitive applications that cannot tolerate interference due to per-cpu drain work queued by __drain_all_pages(). Introduce a new mechanism to remotely drain the per-cpu lists. It is made possible by remotely locking 'struct per_cpu_pages' new per-cpu spinlocks. This has two advantages, the time to drain is more predictable and other unrelated tasks are not interrupted. This series has the same intent as Nicolas' series "mm/page_alloc: Remote per-cpu lists drain support" -- avoid interference of a high priority task due to a workqueue item draining per-cpu page lists. While many workloads can tolerate a brief interruption, it may cause a real-time task running on a NOHZ_FULL CPU to miss a deadline and at minimum, the draining is non-deterministic. Currently an IRQ-safe local_lock protects the page allocator per-cpu lists. The local_lock on its own prevents migration and the IRQ disabling protects from corruption due to an interrupt arriving while a page allocation is in progress. This series adjusts the locking. A spinlock is added to struct per_cpu_pages to protect the list contents while local_lock_irq is ultimately replaced by just the spinlock in the final patch. This allows a remote CPU to safely. Follow-on work should allow the spin_lock_irqsave to be converted to spin_lock to avoid IRQs being disabled/enabled in most cases. The follow-on patch will be one kernel release later as it is relatively high risk and it'll make bisections more clear if there are any problems. Patch 1 is a cosmetic patch to clarify when page->lru is storing buddy pages and when it is storing per-cpu pages. Patch 2 shrinks per_cpu_pages to make room for a spin lock. Strictly speaking this is not necessary but it avoids per_cpu_pages consuming another cache line. Patch 3 is a preparation patch to avoid code duplication. Patch 4 is a minor correction. Patch 5 uses a spin_lock to protect the per_cpu_pages contents while still relying on local_lock to prevent migration, stabilise the pcp lookup and prevent IRQ reentrancy. Patch 6 remote drains per-cpu pages directly instead of using a workqueue. Patch 7 uses a normal spinlock instead of local_lock for remote draining This patch (of 7): The page allocator uses page->lru for storing pages on either buddy or PCP lists. Create page->buddy_list and page->pcp_list as a union with page->lru. This is simply to clarify what type of list a page is on in the page allocator. No functional change intended. [minchan@kernel.org: fix page lru fields in macros] Link: https://lkml.kernel.org/r/20220624125423.6126-2-mgorman@techsingularity.net Signed-off-by: Mel Gorman Tested-by: Minchan Kim Acked-by: Minchan Kim Reviewed-by: Nicolas Saenz Julienne Acked-by: Vlastimil Babka Tested-by: Yu Zhao Cc: Marcelo Tosatti Cc: Michal Hocko Cc: Hugh Dickins Cc: Marek Szyprowski Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6b961a29bf26..cf97f3884fda 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -87,6 +87,7 @@ struct page { */ union { struct list_head lru; + /* Or, for the Unevictable "LRU list" slot */ struct { /* Always even, to negate PageTail */ @@ -94,6 +95,10 @@ struct page { /* Count page's or folio's mlocks */ unsigned int mlock_count; }; + + /* Or, free page */ + struct list_head buddy_list; + struct list_head pcp_list; }; /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; -- cgit v1.2.3 From 5d0a661d808fc8ddc26940b1a12b82ae356f3ae2 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 24 Jun 2022 13:54:18 +0100 Subject: mm/page_alloc: use only one PCP list for THP-sized allocations The per_cpu_pages is cache-aligned on a standard x86-64 distribution configuration but a later patch will add a new field which would push the structure into the next cache line. Use only one list to store THP-sized pages on the per-cpu list. This assumes that the vast majority of THP-sized allocations are GFP_MOVABLE but even if it was another type, it would not contribute to serious fragmentation that potentially causes a later THP allocation failure. Align per_cpu_pages on the cacheline boundary to ensure there is no false cache sharing. After this patch, the structure sizing is; struct per_cpu_pages { int count; /* 0 4 */ int high; /* 4 4 */ int batch; /* 8 4 */ short int free_factor; /* 12 2 */ short int expire; /* 14 2 */ struct list_head lists[13]; /* 16 208 */ /* size: 256, cachelines: 4, members: 6 */ /* padding: 32 */ } __attribute__((__aligned__(64))); Link: https://lkml.kernel.org/r/20220624125423.6126-3-mgorman@techsingularity.net Signed-off-by: Mel Gorman Tested-by: Minchan Kim Acked-by: Minchan Kim Acked-by: Vlastimil Babka Tested-by: Yu Zhao Cc: Hugh Dickins Cc: Marcelo Tosatti Cc: Marek Szyprowski Cc: Michal Hocko Cc: Nicolas Saenz Julienne Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5da1135e6755..041136b5628a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -355,15 +355,18 @@ enum zone_watermarks { }; /* - * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER plus one additional - * for pageblock size for THP if configured. + * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. One additional list + * for THP which will usually be GFP_MOVABLE. Even if it is another type, + * it should not contribute to serious fragmentation causing THP allocation + * failures. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define NR_PCP_THP 1 #else #define NR_PCP_THP 0 #endif -#define NR_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1 + NR_PCP_THP)) +#define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1)) +#define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP) /* * Shift to encode migratetype and order in the same integer, with order @@ -389,7 +392,7 @@ struct per_cpu_pages { /* Lists of pages, one per migrate type stored on the pcp-lists */ struct list_head lists[NR_PCP_LISTS]; -}; +} ____cacheline_aligned_in_smp; struct per_cpu_zonestat { #ifdef CONFIG_SMP -- cgit v1.2.3 From 4b23a68f953628eb4e4b7fe1294ebf93d4b8ceee Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 24 Jun 2022 13:54:21 +0100 Subject: mm/page_alloc: protect PCP lists with a spinlock Currently the PCP lists are protected by using local_lock_irqsave to prevent migration and IRQ reentrancy but this is inconvenient. Remote draining of the lists is impossible and a workqueue is required and every task allocation/free must disable then enable interrupts which is expensive. As preparation for dealing with both of those problems, protect the lists with a spinlock. The IRQ-unsafe version of the lock is used because IRQs are already disabled by local_lock_irqsave. spin_trylock is used in combination with local_lock_irqsave() but later will be replaced with a spin_trylock_irqsave when the local_lock is removed. The per_cpu_pages still fits within the same number of cache lines after this patch relative to before the series. struct per_cpu_pages { spinlock_t lock; /* 0 4 */ int count; /* 4 4 */ int high; /* 8 4 */ int batch; /* 12 4 */ short int free_factor; /* 16 2 */ short int expire; /* 18 2 */ /* XXX 4 bytes hole, try to pack */ struct list_head lists[13]; /* 24 208 */ /* size: 256, cachelines: 4, members: 7 */ /* sum members: 228, holes: 1, sum holes: 4 */ /* padding: 24 */ } __attribute__((__aligned__(64))); There is overhead in the fast path due to acquiring the spinlock even though the spinlock is per-cpu and uncontended in the common case. Page Fault Test (PFT) running on a 1-socket reported the following results on a 1 socket machine. 5.19.0-rc3 5.19.0-rc3 vanilla mm-pcpspinirq-v5r16 Hmean faults/sec-1 869275.7381 ( 0.00%) 874597.5167 * 0.61%* Hmean faults/sec-3 2370266.6681 ( 0.00%) 2379802.0362 * 0.40%* Hmean faults/sec-5 2701099.7019 ( 0.00%) 2664889.7003 * -1.34%* Hmean faults/sec-7 3517170.9157 ( 0.00%) 3491122.8242 * -0.74%* Hmean faults/sec-8 3965729.6187 ( 0.00%) 3939727.0243 * -0.66%* There is a small hit in the number of faults per second but given that the results are more stable, it's borderline noise. [akpm@linux-foundation.org: add missing local_unlock_irqrestore() on contention path] Link: https://lkml.kernel.org/r/20220624125423.6126-6-mgorman@techsingularity.net Signed-off-by: Mel Gorman Tested-by: Yu Zhao Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne Acked-by: Vlastimil Babka Cc: Hugh Dickins Cc: Marcelo Tosatti Cc: Marek Szyprowski Cc: Michal Hocko Cc: Minchan Kim Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 041136b5628a..578247a341b2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -382,6 +382,7 @@ enum zone_watermarks { /* Fields and list protected by pagesets local_lock in page_alloc.c */ struct per_cpu_pages { + spinlock_t lock; /* Protects lists field */ int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ int batch; /* chunk size for buddy add/remove */ -- cgit v1.2.3 From 840532711d7299d7e937952482ec899d4622c452 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 11 Jul 2022 12:35:35 +0530 Subject: mm/mmap: build protect protection_map[] with __P000 Patch series "mm/mmap: Drop __SXXX/__PXXX macros from across platforms", v7. __SXXX/__PXXX macros are unnecessary abstraction layer in creating the generic protection_map[] array which is used for vm_get_page_prot(). This abstraction layer can be avoided, if the platforms just define the array protection_map[] for all possible vm_flags access permission combinations and also export vm_get_page_prot() implementation. This series drops __SXXX/__PXXX macros from across platforms in the tree. First it build protects generic protection_map[] array with '#ifdef __P000' and moves it inside platforms which enable ARCH_HAS_VM_GET_PAGE_PROT. Later this build protects same array with '#ifdef ARCH_HAS_VM_GET_PAGE_PROT' and moves inside remaining platforms while enabling ARCH_HAS_VM_GET_PAGE_PROT. This adds a new macro DECLARE_VM_GET_PAGE_PROT defining the current generic vm_get_page_prot(), in order for it to be reused on platforms that do not require custom implementation. Finally, ARCH_HAS_VM_GET_PAGE_PROT can just be dropped, as all platforms now define and export vm_get_page_prot(), via looking up a private and static protection_map[] array. protection_map[] data type has been changed as 'static const' on all platforms that do not change it during boot. This patch (of 26): Build protect generic protection_map[] array with __P000, so that it can be moved inside all the platforms one after the other. Otherwise there will be build failures during this process. CONFIG_ARCH_HAS_VM_GET_PAGE_PROT cannot be used for this purpose as only certain platforms enable this config now. Link: https://lkml.kernel.org/r/20220711070600.2378316-1-anshuman.khandual@arm.com Link: https://lkml.kernel.org/r/20220711070600.2378316-2-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: Christoph Hellwig Reviewed-by: Christophe Leroy Suggested-by: Christophe Leroy Cc: Arnd Bergmann Cc: Brian Cain Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Chris Zankel Cc: "David S. Miller" Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: Jonas Bonn Cc: Michael Ellerman Cc: Michal Simek Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Sam Ravnborg Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: WANG Xuerui Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index d4ebfc206e2b..1a435ce146a2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -425,7 +425,9 @@ extern unsigned int kobjsize(const void *objp); * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. */ +#ifdef __P000 extern pgprot_t protection_map[16]; +#endif /* * The default fault flags that should be used by most of the -- cgit v1.2.3 From 43957b5d11037a651d162f65c682ec3c76777fc8 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 11 Jul 2022 12:35:36 +0530 Subject: mm/mmap: define DECLARE_VM_GET_PAGE_PROT This just converts the generic vm_get_page_prot() implementation into a new macro i.e DECLARE_VM_GET_PAGE_PROT which later can be used across platforms when enabling them with ARCH_HAS_VM_GET_PAGE_PROT. This does not create any functional change. Link: https://lkml.kernel.org/r/20220711070600.2378316-3-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: Christophe Leroy Suggested-by: Christoph Hellwig Cc: Arnd Bergmann Cc: Brian Cain Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Chris Zankel Cc: "David S. Miller" Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: Jonas Bonn Cc: Michael Ellerman Cc: Michal Simek Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Sam Ravnborg Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: WANG Xuerui Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 3cdc16cfd867..014ee8f0fbaa 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1689,4 +1689,32 @@ typedef unsigned int pgtbl_mod_mask; #define MAX_PTRS_PER_P4D PTRS_PER_P4D #endif +/* description of effects of mapping type and prot in current implementation. + * this is due to the limited x86 page protection hardware. The expected + * behavior is in parens: + * + * map_type prot + * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC + * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes + * w: (no) no w: (no) no w: (yes) yes w: (no) no + * x: (no) no x: (no) yes x: (no) yes x: (yes) yes + * + * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes + * w: (no) no w: (no) no w: (copy) copy w: (no) no + * x: (no) no x: (no) yes x: (no) yes x: (yes) yes + * + * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and + * MAP_PRIVATE (with Enhanced PAN supported): + * r: (no) no + * w: (no) no + * x: (yes) yes + */ +#define DECLARE_VM_GET_PAGE_PROT \ +pgprot_t vm_get_page_prot(unsigned long vm_flags) \ +{ \ + return protection_map[vm_flags & \ + (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)]; \ +} \ +EXPORT_SYMBOL(vm_get_page_prot); + #endif /* _LINUX_PGTABLE_H */ -- cgit v1.2.3 From 09095f74130dfb2110ef2bcdd9ad0d42addaa1d5 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 11 Jul 2022 12:35:41 +0530 Subject: mm/mmap: build protect protection_map[] with ARCH_HAS_VM_GET_PAGE_PROT Now that protection_map[] has been moved inside those platforms that enable ARCH_HAS_VM_GET_PAGE_PROT. Hence generic protection_map[] array now can be protected with CONFIG_ARCH_HAS_VM_GET_PAGE_PROT intead of __P000. Link: https://lkml.kernel.org/r/20220711070600.2378316-8-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: Christophe Leroy Cc: Arnd Bergmann Cc: Brian Cain Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Christoph Hellwig Cc: Chris Zankel Cc: "David S. Miller" Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: Jonas Bonn Cc: Michael Ellerman Cc: Michal Simek Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Sam Ravnborg Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: WANG Xuerui Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1a435ce146a2..4b4dc93f9bc3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -425,7 +425,7 @@ extern unsigned int kobjsize(const void *objp); * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. */ -#ifdef __P000 +#ifndef CONFIG_ARCH_HAS_VM_GET_PAGE_PROT extern pgprot_t protection_map[16]; #endif -- cgit v1.2.3 From 3d923c5f1e21ad491acd4c0d62bf2481ce94016c Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 11 Jul 2022 12:36:00 +0530 Subject: mm/mmap: drop ARCH_HAS_VM_GET_PAGE_PROT Now all the platforms enable ARCH_HAS_GET_PAGE_PROT. They define and export own vm_get_page_prot() whether custom or standard DECLARE_VM_GET_PAGE_PROT. Hence there is no need for default generic fallback for vm_get_page_prot(). Just drop this fallback and also ARCH_HAS_GET_PAGE_PROT mechanism. Link: https://lkml.kernel.org/r/20220711070600.2378316-27-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: Geert Uytterhoeven Reviewed-by: Christoph Hellwig Reviewed-by: Christophe Leroy Acked-by: Geert Uytterhoeven Cc: Arnd Bergmann Cc: Brian Cain Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Chris Zankel Cc: "David S. Miller" Cc: Dinh Nguyen Cc: Guo Ren Cc: Heiko Carstens Cc: Huacai Chen Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: Jonas Bonn Cc: Michael Ellerman Cc: Michal Simek Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Sam Ravnborg Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: WANG Xuerui Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- include/linux/mm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 4b4dc93f9bc3..61e3101c44ea 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -425,9 +425,6 @@ extern unsigned int kobjsize(const void *objp); * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. */ -#ifndef CONFIG_ARCH_HAS_VM_GET_PAGE_PROT -extern pgprot_t protection_map[16]; -#endif /* * The default fault flags that should be used by most of the -- cgit v1.2.3 From 3ce4fee4401206cf5a2c476ec0ee6c90191dfade Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 4 Jul 2022 21:21:55 +0800 Subject: mm/huge_memory: check pmd_present first in is_huge_zero_pmd When pmd is non-present, pmd_pfn returns an insane value. So we should check pmd_present first to avoid acquiring such insane value and also avoid touching possible cold huge_zero_pfn cache line when pmd isn't present. Link: https://lkml.kernel.org/r/20220704132201.14611-11-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Reviewed-by: Muchun Song Cc: Matthew Wilcox Cc: Yang Shi Cc: Zach O'Keefe Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ae3d8e2fd9e2..12b297f9951d 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -273,7 +273,7 @@ static inline bool is_huge_zero_page(struct page *page) static inline bool is_huge_zero_pmd(pmd_t pmd) { - return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd); + return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd); } static inline bool is_huge_zero_pud(pud_t pud) -- cgit v1.2.3 From 121c1781aeb00475d163246d9ae7d8746e377040 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 4 Jul 2022 21:21:58 +0800 Subject: mm/huge_memory: fix comment of page_deferred_list The current comment is confusing because if global or memcg deferred list in the second tail page is occupied by compound_head, why we still use page[2].deferred_list here? I think it wants to say that Global or memcg deferred list in the first tail page is occupied by compound_mapcount and compound_pincount so we use the second tail page's deferred_list instead. Link: https://lkml.kernel.org/r/20220704132201.14611-14-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Reviewed-by: Muchun Song Cc: Matthew Wilcox Cc: Yang Shi Cc: Zach O'Keefe Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 12b297f9951d..37f2f11a6d7e 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -294,8 +294,8 @@ static inline bool thp_migration_supported(void) static inline struct list_head *page_deferred_list(struct page *page) { /* - * Global or memcg deferred list in the second tail pages is - * occupied by compound_head. + * See organization of tail pages of compound page in + * "struct page" definition. */ return &page[2].deferred_list; } -- cgit v1.2.3 From dcadcf1c30619ead2f3280bfb7f74de8304be2bb Mon Sep 17 00:00:00 2001 From: Gang Li Date: Wed, 6 Jul 2022 11:46:54 +0800 Subject: mm, hugetlb: skip irrelevant nodes in show_free_areas() show_free_areas() allows to filter out node specific data which is irrelevant to the allocation request. But hugetlb_show_meminfo() still shows hugetlb on all nodes, which is redundant and unnecessary. Use show_mem_node_skip() to skip irrelevant nodes. And replace hugetlb_show_meminfo() with hugetlb_show_meminfo_node(nid). before-and-after sample output of OOM: before: ``` [ 214.362453] Node 1 active_anon:148kB inactive_anon:4050920kB active_file:112kB inactive_file:100kB [ 214.375429] Node 1 Normal free:45100kB boost:0kB min:45576kB low:56968kB high:68360kB reserved_hig [ 214.388334] lowmem_reserve[]: 0 0 0 0 0 [ 214.390251] Node 1 Normal: 423*4kB (UE) 320*8kB (UME) 187*16kB (UE) 117*32kB (UE) 57*64kB (UME) 20 [ 214.397626] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB [ 214.401518] Node 1 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB ``` after: ``` [ 145.069705] Node 1 active_anon:128kB inactive_anon:4049412kB active_file:56kB inactive_file:84kB u [ 145.110319] Node 1 Normal free:45424kB boost:0kB min:45576kB low:56968kB high:68360kB reserved_hig [ 145.152315] lowmem_reserve[]: 0 0 0 0 0 [ 145.155244] Node 1 Normal: 470*4kB (UME) 373*8kB (UME) 247*16kB (UME) 168*32kB (UE) 86*64kB (UME) [ 145.164119] Node 1 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB ``` Link: https://lkml.kernel.org/r/20220706034655.1834-1-ligang.bdlg@bytedance.com Signed-off-by: Gang Li Reviewed-by: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 75ee739d815b..4cdfce976644 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -152,7 +152,7 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, struct page *ref_page, zap_flags_t zap_flags); void hugetlb_report_meminfo(struct seq_file *); int hugetlb_report_node_meminfo(char *buf, int len, int nid); -void hugetlb_show_meminfo(void); +void hugetlb_show_meminfo_node(int nid); unsigned long hugetlb_total_pages(void); vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); @@ -298,7 +298,7 @@ static inline int hugetlb_report_node_meminfo(char *buf, int len, int nid) return 0; } -static inline void hugetlb_show_meminfo(void) +static inline void hugetlb_show_meminfo_node(int nid) { } -- cgit v1.2.3 From 0d8bc0b10aeab543bdccb86180f58db1f79f7cee Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Wed, 13 Jul 2022 20:53:14 +0800 Subject: writeback: cleanup bdi_sched_wait() bdi_sched_wait() is no longer used since commit 839a8e8660b6 ("writeback: replace custom worker pool implementation with unbound workqueue"), so remove it. Link: https://lkml.kernel.org/r/20220713125314.171345-1-xiujianfeng@huawei.com Signed-off-by: Xiu Jianfeng Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Acked-by: Jens Axboe Signed-off-by: Andrew Morton --- include/linux/backing-dev.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index d452071db572..e84b745a6811 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -140,12 +140,6 @@ static inline bool mapping_can_writeback(struct address_space *mapping) return inode_to_bdi(mapping->host)->capabilities & BDI_CAP_WRITEBACK; } -static inline int bdi_sched_wait(void *word) -{ - schedule(); - return 0; -} - #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi, -- cgit v1.2.3 From 62df90b53e6f332bb69b73621998826c49a17323 Mon Sep 17 00:00:00 2001 From: wuchi Date: Sun, 19 Jun 2022 15:46:41 +0800 Subject: net, lib/once: remove {net_}get_random_once_wait macro DO_ONCE(func, ...) will call func with spinlock which acquired by spin_lock_irqsave in __do_once_start. But the get_random_once_wait will sleep in get_random_bytes_wait -> wait_for_random_bytes. Fortunately, there is no place to use {net_}get_random_once_wait, so we could remove them simply. Link: https://lkml.kernel.org/r/20220619074641.40916-1-wuchi.zero@gmail.com Signed-off-by: wuchi Acked-by: Jakub Kicinski Cc: David S. Miller Cc: Eric Dumazet Cc: Paolo Abeni Signed-off-by: Andrew Morton --- include/linux/net.h | 2 -- include/linux/once.h | 2 -- 2 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 12093f4db50c..8613772a1f58 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -303,8 +303,6 @@ do { \ #define net_get_random_once(buf, nbytes) \ get_random_once((buf), (nbytes)) -#define net_get_random_once_wait(buf, nbytes) \ - get_random_once_wait((buf), (nbytes)) /* * E.g. XFS meta- & log-data is in slab pages, or bcache meta diff --git a/include/linux/once.h b/include/linux/once.h index f54523052bbc..b14d8b309d52 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -54,7 +54,5 @@ void __do_once_done(bool *done, struct static_key_true *once_key, #define get_random_once(buf, nbytes) \ DO_ONCE(get_random_bytes, (buf), (nbytes)) -#define get_random_once_wait(buf, nbytes) \ - DO_ONCE(get_random_bytes_wait, (buf), (nbytes)) \ #endif /* _LINUX_ONCE_H */ -- cgit v1.2.3 From 43c249ea0b1e10baac4a1264a25d69723ce5d2c2 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 24 Jun 2022 16:14:12 +0200 Subject: compiler-gcc.h: remove ancient workaround for gcc PR 58670 The workaround for 'asm goto' miscompilation introduces a compiler barrier quirk that inhibits many useful compiler optimizations. For example, __try_cmpxchg_user compiles to: 11375: 41 8b 4d 00 mov 0x0(%r13),%ecx 11379: 41 8b 02 mov (%r10),%eax 1137c: f0 0f b1 0a lock cmpxchg %ecx,(%rdx) 11380: 0f 94 c2 sete %dl 11383: 84 d2 test %dl,%dl 11385: 75 c4 jne 1134b <...> 11387: 41 89 02 mov %eax,(%r10) where the barrier inhibits flags propagation from asm when compiled with gcc-12. When the mentioned quirk is removed, the following code is generated: 11553: 41 8b 4d 00 mov 0x0(%r13),%ecx 11557: 41 8b 02 mov (%r10),%eax 1155a: f0 0f b1 0a lock cmpxchg %ecx,(%rdx) 1155e: 74 c9 je 11529 <...> 11560: 41 89 02 mov %eax,(%r10) The refered compiler bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 was fixed for gcc-4.8.2. Current minimum required version of GCC is version 5.1 which has the above 'asm goto' miscompilation fixed, so remove the workaround. Link: https://lkml.kernel.org/r/20220624141412.72274-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/compiler-gcc.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index a0c55eeaeaf1..9b157b71036f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,17 +66,6 @@ __builtin_unreachable(); \ } while (0) -/* - * GCC 'asm goto' miscompiles certain code sequences: - * - * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 - * - * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. - * - * (asm goto is automatically volatile - the naming reflects this.) - */ -#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) - #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ -- cgit v1.2.3 From 045ed31e23aea840648c290dbde04797064960db Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 24 Jun 2022 08:30:04 +0300 Subject: kfifo: fix kfifo_to_user() return type The kfifo_to_user() macro is supposed to return zero for success or negative error codes. Unfortunately, there is a signedness bug so it returns unsigned int. This only affects callers which try to save the result in ssize_t and as far as I can see the only place which does that is line6_hwdep_read(). TL;DR: s/_uint/_int/. Link: https://lkml.kernel.org/r/YrVL3OJVLlNhIMFs@kili Fixes: 144ecf310eb5 ("kfifo: fix kfifo_alloc() to return a signed int value") Signed-off-by: Dan Carpenter Cc: Stefani Seibold Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/kfifo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 86249476b57f..0b35a41440ff 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -688,7 +688,7 @@ __kfifo_uint_must_check_helper( \ * writer, you don't need extra locking to use these macro. */ #define kfifo_to_user(fifo, to, len, copied) \ -__kfifo_uint_must_check_helper( \ +__kfifo_int_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ void __user *__to = (to); \ -- cgit v1.2.3 From 4f09903078eeb9138cddce8db06100b82f8620e8 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Sat, 2 Jul 2022 18:08:27 +0200 Subject: cpumask: add UP optimised for_each_*_cpu versions On uniprocessor builds, the following loops will always run over a mask that contains one enabled CPU (cpu0): - for_each_possible_cpu - for_each_online_cpu - for_each_present_cpu Provide uniprocessor-specific macros for these loops, that always run exactly once. Link: https://lkml.kernel.org/r/3a92869b902a075b97be5d1452c9c6badbbff0df.1656777646.git.sander@svanheule.net Signed-off-by: Sander Vanheule Acked-by: Yury Norov Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Marco Elver Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Valentin Schneider Signed-off-by: Andrew Morton --- include/linux/cpumask.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index fe29ac7cc469..533612770bc0 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -811,9 +811,16 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); /* First bits of cpu_bit_bitmap are in fact unset. */ #define cpu_none_mask to_cpumask(cpu_bit_bitmap[0]) +#if NR_CPUS == 1 +/* Uniprocessor: the possible/online/present masks are always "1" */ +#define for_each_possible_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++) +#define for_each_online_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++) +#define for_each_present_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++) +#else #define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask) #define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask) #define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) +#endif /* Wrappers for arch boot code to manipulate normally-constant masks */ void init_cpu_present(const struct cpumask *src); -- cgit v1.2.3 From b81dce77cedcea6f00292f02d4b1ebbfc2c5988d Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Sat, 2 Jul 2022 18:08:25 +0200 Subject: cpumask: Fix invalid uniprocessor mask assumption On uniprocessor builds, any CPU mask is assumed to contain exactly one CPU (cpu0). This assumption ignores the existence of empty masks, resulting in incorrect behaviour. cpumask_first_zero(), cpumask_next_zero(), and for_each_cpu_not() don't provide behaviour matching the assumption that a UP mask is always "1", and instead provide behaviour matching the empty mask. Drop the incorrectly optimised code and use the generic implementations in all cases. Link: https://lkml.kernel.org/r/86bf3f005abba2d92120ddd0809235cab4f759a6.1656777646.git.sander@svanheule.net Signed-off-by: Sander Vanheule Suggested-by: Yury Norov Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Marco Elver Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Valentin Schneider Signed-off-by: Andrew Morton --- include/linux/cpumask.h | 99 ++++++++++--------------------------------------- 1 file changed, 19 insertions(+), 80 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 533612770bc0..6c5b4ee000f2 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -116,85 +116,6 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu) return cpu; } -#if NR_CPUS == 1 -/* Uniprocessor. Assume all masks are "1". */ -static inline unsigned int cpumask_first(const struct cpumask *srcp) -{ - return 0; -} - -static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) -{ - return 0; -} - -static inline unsigned int cpumask_first_and(const struct cpumask *srcp1, - const struct cpumask *srcp2) -{ - return 0; -} - -static inline unsigned int cpumask_last(const struct cpumask *srcp) -{ - return 0; -} - -/* Valid inputs for n are -1 and 0. */ -static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) -{ - return n+1; -} - -static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) -{ - return n+1; -} - -static inline unsigned int cpumask_next_and(int n, - const struct cpumask *srcp, - const struct cpumask *andp) -{ - return n+1; -} - -static inline unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, - int start, bool wrap) -{ - /* cpu0 unless stop condition, wrap and at cpu0, then nr_cpumask_bits */ - return (wrap && n == 0); -} - -/* cpu must be a valid cpu, ie 0, so there's no other choice. */ -static inline unsigned int cpumask_any_but(const struct cpumask *mask, - unsigned int cpu) -{ - return 1; -} - -static inline unsigned int cpumask_local_spread(unsigned int i, int node) -{ - return 0; -} - -static inline int cpumask_any_and_distribute(const struct cpumask *src1p, - const struct cpumask *src2p) { - return cpumask_first_and(src1p, src2p); -} - -static inline int cpumask_any_distribute(const struct cpumask *srcp) -{ - return cpumask_first(srcp); -} - -#define for_each_cpu(cpu, mask) \ - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) -#define for_each_cpu_not(cpu, mask) \ - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) -#define for_each_cpu_wrap(cpu, mask, start) \ - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start)) -#define for_each_cpu_and(cpu, mask1, mask2) \ - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask1, (void)mask2) -#else /** * cpumask_first - get the first cpu in a cpumask * @srcp: the cpumask pointer @@ -260,10 +181,29 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) int __pure cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int __pure cpumask_any_but(const struct cpumask *mask, unsigned int cpu); + +#if NR_CPUS == 1 +/* Uniprocessor: there is only one valid CPU */ +static inline unsigned int cpumask_local_spread(unsigned int i, int node) +{ + return 0; +} + +static inline int cpumask_any_and_distribute(const struct cpumask *src1p, + const struct cpumask *src2p) { + return cpumask_first_and(src1p, src2p); +} + +static inline int cpumask_any_distribute(const struct cpumask *srcp) +{ + return cpumask_first(srcp); +} +#else unsigned int cpumask_local_spread(unsigned int i, int node); int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p); int cpumask_any_distribute(const struct cpumask *srcp); +#endif /* NR_CPUS */ /** * for_each_cpu - iterate over every cpu in a mask @@ -324,7 +264,6 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool for ((cpu) = -1; \ (cpu) = cpumask_next_and((cpu), (mask1), (mask2)), \ (cpu) < nr_cpu_ids;) -#endif /* SMP */ #define CPU_BITS_NONE \ { \ -- cgit v1.2.3 From 953257a9252a9b3c58ca68fc5bf26fc65e5b1cb8 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Sat, 2 Jul 2022 18:08:28 +0200 Subject: cpumask: update cpumask_next_wrap() signature The extern specifier is not needed for this declaration, so drop it. The function also depends only on the input parameters, and has no side effects, so it can be marked __pure like other functions in cpumask.h. Link: https://lkml.kernel.org/r/72ab755695b74bb5fbaa756ae4c0edd708d172f1.1656777646.git.sander@svanheule.net Signed-off-by: Sander Vanheule Reviewed-by: Andy Shevchenko Cc: Borislav Petkov Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Marco Elver Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Valentin Schneider Cc: Yury Norov Signed-off-by: Andrew Morton --- include/linux/cpumask.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 6c5b4ee000f2..523857884ae4 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -229,7 +229,7 @@ int cpumask_any_distribute(const struct cpumask *srcp); (cpu) = cpumask_next_zero((cpu), (mask)), \ (cpu) < nr_cpu_ids;) -extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); +int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); /** * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location -- cgit v1.2.3 From 91561d4ecb755f056f8ff04f9dcaec210140e55c Mon Sep 17 00:00:00 2001 From: Chao Gao Date: Fri, 15 Jul 2022 18:45:33 +0800 Subject: swiotlb: remove unused fields in io_tlb_mem Commit 20347fca71a3 ("swiotlb: split up the global swiotlb lock") splits io_tlb_mem into multiple areas. Each area has its own lock and index. The global ones are not used so remove them. Signed-off-by: Chao Gao Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index f65ff1930120..d3ae03edbbd2 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -79,11 +79,8 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, * @used: The number of used IO TLB block. * @list: The free list describing the number of free entries available * from each index. - * @index: The index to start searching in the next round. * @orig_addr: The original address corresponding to a mapped entry. * @alloc_size: Size of the allocated buffer. - * @lock: The lock to protect the above data structures in the map and - * unmap calls. * @debugfs: The dentry to debugfs. * @late_alloc: %true if allocated using the page allocator * @force_bounce: %true if swiotlb bouncing is forced @@ -97,8 +94,6 @@ struct io_tlb_mem { void *vaddr; unsigned long nslabs; unsigned long used; - unsigned int index; - spinlock_t lock; struct dentry *debugfs; bool late_alloc; bool force_bounce; -- cgit v1.2.3 From 942a8186eb4451700dadd1d60a2e43ce67605991 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Jul 2022 08:43:07 +0200 Subject: swiotlb: move struct io_tlb_slot to swiotlb.c No need to expose this structure definition in the header. Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index d3ae03edbbd2..35bc4e281c21 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -101,11 +101,7 @@ struct io_tlb_mem { unsigned int nareas; unsigned int area_nslabs; struct io_tlb_area *areas; - struct io_tlb_slot { - phys_addr_t orig_addr; - size_t alloc_size; - unsigned int list; - } *slots; + struct io_tlb_slot *slots; }; extern struct io_tlb_mem io_tlb_default_mem; -- cgit v1.2.3 From 76c16d3e19446deea98b7883f261758b96b8781a Mon Sep 17 00:00:00 2001 From: Wong Vee Khee Date: Thu, 14 Jul 2022 15:54:27 +0800 Subject: net: stmmac: switch to use interrupt for hw crosstimestamping Using current implementation of polling mode, there is high chances we will hit into timeout error when running phc2sys. Hence, update the implementation of hardware crosstimestamping to use the MAC interrupt service routine instead of polling for TSIS bit in the MAC Timestamp Interrupt Status register to be set. Cc: Richard Cochran Signed-off-by: Wong Vee Khee Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 29917850f079..8df475db88c0 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -260,6 +260,7 @@ struct plat_stmmacenet_data { bool has_crossts; int int_snapshot_num; int ext_snapshot_num; + bool int_snapshot_en; bool ext_snapshot_en; bool multi_msi_en; int msi_mac_vec; -- cgit v1.2.3 From 9592eef7c16ec5fb9f36c4d9abe8eeffc2e1d2f3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 5 Jul 2022 20:48:41 +0200 Subject: random: remove CONFIG_ARCH_RANDOM When RDRAND was introduced, there was much discussion on whether it should be trusted and how the kernel should handle that. Initially, two mechanisms cropped up, CONFIG_ARCH_RANDOM, a compile time switch, and "nordrand", a boot-time switch. Later the thinking evolved. With a properly designed RNG, using RDRAND values alone won't harm anything, even if the outputs are malicious. Rather, the issue is whether those values are being *trusted* to be good or not. And so a new set of options were introduced as the real ones that people use -- CONFIG_RANDOM_TRUST_CPU and "random.trust_cpu". With these options, RDRAND is used, but it's not always credited. So in the worst case, it does nothing, and in the best case, maybe it helps. Along the way, CONFIG_ARCH_RANDOM's meaning got sort of pulled into the center and became something certain platforms force-select. The old options don't really help with much, and it's a bit odd to have special handling for these instructions when the kernel can deal fine with the existence or untrusted existence or broken existence or non-existence of that CPU capability. Simplify the situation by removing CONFIG_ARCH_RANDOM and using the ordinary asm-generic fallback pattern instead, keeping the two options that are actually used. For now it leaves "nordrand" for now, as the removal of that will take a different route. Acked-by: Michael Ellerman Acked-by: Catalin Marinas Acked-by: Borislav Petkov Acked-by: Heiko Carstens Acked-by: Greg Kroah-Hartman Signed-off-by: Jason A. Donenfeld --- include/linux/random.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 20e389a14e5c..865770e29f3e 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -106,14 +106,7 @@ declare_get_random_var_wait(long, unsigned long) */ #include -#ifdef CONFIG_ARCH_RANDOM -# include -#else -static inline bool __must_check arch_get_random_long(unsigned long *v) { return false; } -static inline bool __must_check arch_get_random_int(unsigned int *v) { return false; } -static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { return false; } -static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { return false; } -#endif +#include /* * Called from the boot CPU during startup; not valid to call once -- cgit v1.2.3 From 0b4ae3f6d1210c11f9baf159009c7227eacf90f2 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 11 Jul 2022 07:47:16 -0700 Subject: iio: cros: Register FIFO callback after sensor is registered Instead of registering callback to process sensor events right at initialization time, wait for the sensor to be register in the iio subsystem. Events can come at probe time (in case the kernel rebooted abruptly without switching the sensor off for instance), and be sent to IIO core before the sensor is fully registered. Fixes: aa984f1ba4a4 ("iio: cros_ec: Register to cros_ec_sensorhub when EC supports FIFO") Reported-by: Douglas Anderson Signed-off-by: Gwendal Grignou Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20220711144716.642617-1-gwendal@chromium.org Signed-off-by: Jonathan Cameron --- include/linux/iio/common/cros_ec_sensors_core.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h index a8259c8822f5..e72167b96d27 100644 --- a/include/linux/iio/common/cros_ec_sensors_core.h +++ b/include/linux/iio/common/cros_ec_sensors_core.h @@ -93,8 +93,11 @@ int cros_ec_sensors_read_cmd(struct iio_dev *indio_dev, unsigned long scan_mask, struct platform_device; int cros_ec_sensors_core_init(struct platform_device *pdev, struct iio_dev *indio_dev, bool physical_device, - cros_ec_sensors_capture_t trigger_capture, - cros_ec_sensorhub_push_data_cb_t push_data); + cros_ec_sensors_capture_t trigger_capture); + +int cros_ec_sensors_core_register(struct device *dev, + struct iio_dev *indio_dev, + cros_ec_sensorhub_push_data_cb_t push_data); irqreturn_t cros_ec_sensors_capture(int irq, void *p); int cros_ec_sensors_push_data(struct iio_dev *indio_dev, -- cgit v1.2.3 From f4f451a16dd1f478fdb966bcbb612c1e4ce6b962 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 5 Jul 2022 20:35:32 +0800 Subject: mm: fix missing wake-up event for FSDAX pages FSDAX page refcounts are 1-based, rather than 0-based: if refcount is 1, then the page is freed. The FSDAX pages can be pinned through GUP, then they will be unpinned via unpin_user_page() using a folio variant to put the page, however, folio variants did not consider this special case, the result will be to miss a wakeup event (like the user of __fuse_dax_break_layouts()). This results in a task being permanently stuck in TASK_INTERRUPTIBLE state. Since FSDAX pages are only possibly obtained by GUP users, so fix GUP instead of folio_put() to lower overhead. Link: https://lkml.kernel.org/r/20220705123532.283-1-songmuchun@bytedance.com Fixes: d8ddc099c6b3 ("mm/gup: Add gup_put_folio()") Signed-off-by: Muchun Song Suggested-by: Matthew Wilcox Cc: Jason Gunthorpe Cc: John Hubbard Cc: William Kucharski Cc: Dan Williams Cc: Jan Kara Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index cf3d0d673f6b..7898e29bcfb5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1130,23 +1130,27 @@ static inline bool is_zone_movable_page(const struct page *page) #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) DECLARE_STATIC_KEY_FALSE(devmap_managed_key); -bool __put_devmap_managed_page(struct page *page); -static inline bool put_devmap_managed_page(struct page *page) +bool __put_devmap_managed_page_refs(struct page *page, int refs); +static inline bool put_devmap_managed_page_refs(struct page *page, int refs) { if (!static_branch_unlikely(&devmap_managed_key)) return false; if (!is_zone_device_page(page)) return false; - return __put_devmap_managed_page(page); + return __put_devmap_managed_page_refs(page, refs); } - #else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ -static inline bool put_devmap_managed_page(struct page *page) +static inline bool put_devmap_managed_page_refs(struct page *page, int refs) { return false; } #endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ +static inline bool put_devmap_managed_page(struct page *page) +{ + return put_devmap_managed_page_refs(page, 1); +} + /* 127: arbitrary random number, small enough to assemble well */ #define folio_ref_zero_or_close_to_overflow(folio) \ ((unsigned int) folio_ref_count(folio) + 127u <= 127u) -- cgit v1.2.3 From 2e07a521e1e424787af3bfc59615de4220856c35 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:28 +0100 Subject: skbuff: add SKBFL_DONT_ORPHAN flag We don't want to list every single ubuf_info callback in skb_orphan_frags(), add a flag controlling the behaviour. Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d3d10556f0fa..8e12b3b9ad6c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -686,10 +686,13 @@ enum { * charged to the kernel memory. */ SKBFL_PURE_ZEROCOPY = BIT(2), + + SKBFL_DONT_ORPHAN = BIT(3), }; #define SKBFL_ZEROCOPY_FRAG (SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG) -#define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY) +#define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY | \ + SKBFL_DONT_ORPHAN) /* * The callback notifies userspace to release buffers when skb DMA is done in @@ -3182,8 +3185,7 @@ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask) { if (likely(!skb_zcopy(skb))) return 0; - if (!skb_zcopy_is_nouarg(skb) && - skb_uarg(skb)->callback == msg_zerocopy_callback) + if (skb_shinfo(skb)->flags & SKBFL_DONT_ORPHAN) return 0; return skb_copy_ubufs(skb, gfp_mask); } -- cgit v1.2.3 From a229cc14f3395311b899e5e582b71efa8dd01df0 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 14 Jul 2022 19:15:24 +0800 Subject: dma-mapping: add dma_opt_mapping_size() Streaming DMA mapping involving an IOMMU may be much slower for larger total mapping size. This is because every IOMMU DMA mapping requires an IOVA to be allocated and freed. IOVA sizes above a certain limit are not cached, which can have a big impact on DMA mapping performance. Provide an API for device drivers to know this "optimal" limit, such that they may try to produce mapping which don't exceed it. Signed-off-by: John Garry Reviewed-by: Damien Le Moal Acked-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- include/linux/dma-map-ops.h | 1 + include/linux/dma-mapping.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 0d5b06b3a4a6..98ceba6fa848 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -69,6 +69,7 @@ struct dma_map_ops { int (*dma_supported)(struct device *dev, u64 mask); u64 (*get_required_mask)(struct device *dev); size_t (*max_mapping_size)(struct device *dev); + size_t (*opt_mapping_size)(void); unsigned long (*get_merge_boundary)(struct device *dev); }; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index dca2b1355bb1..fe3849434b2a 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -144,6 +144,7 @@ int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); size_t dma_max_mapping_size(struct device *dev); +size_t dma_opt_mapping_size(struct device *dev); bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); unsigned long dma_get_merge_boundary(struct device *dev); struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, @@ -266,6 +267,10 @@ static inline size_t dma_max_mapping_size(struct device *dev) { return 0; } +static inline size_t dma_opt_mapping_size(struct device *dev) +{ + return 0; +} static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) { return false; -- cgit v1.2.3 From 6d9870b7e5def2450e21316515b9efc0529204dd Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 14 Jul 2022 19:15:25 +0800 Subject: dma-iommu: add iommu_dma_opt_mapping_size() Add the IOMMU callback for DMA mapping API dma_opt_mapping_size(), which allows the drivers to know the optimal mapping limit and thus limit the requested IOVA lengths. This value is based on the IOVA rcache range limit, as IOVAs allocated above this limit must always be newly allocated, which may be quite slow. Signed-off-by: John Garry Reviewed-by: Damien Le Moal Acked-by: Robin Murphy Acked-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- include/linux/iova.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index 320a70e40233..c6ba6d95d79c 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -79,6 +79,8 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) int iova_cache_get(void); void iova_cache_put(void); +unsigned long iova_rcache_range(void); + void free_iova(struct iova_domain *iovad, unsigned long pfn); void __free_iova(struct iova_domain *iovad, struct iova *iova); struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, -- cgit v1.2.3 From 2b038e786f8338a3bc22d791000753e0ec113e00 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Jun 2022 20:28:42 +0300 Subject: gpiolib: devres: Get rid of unused devm_gpio_free() The last user, which in fact was a dead code, has gone a year ago, previous one 3 years ago. On top of that we want to drop away the legacy GPIO APIs in the kernel, so take a chance to get rid of unused devm_gpio_free() and accompanying stuff. Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- include/linux/gpio.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 008ad3ee56b7..a370387fa406 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -95,7 +95,6 @@ struct device; int devm_gpio_request(struct device *dev, unsigned gpio, const char *label); int devm_gpio_request_one(struct device *dev, unsigned gpio, unsigned long flags, const char *label); -void devm_gpio_free(struct device *dev, unsigned int gpio); #else /* ! CONFIG_GPIOLIB */ @@ -240,11 +239,6 @@ static inline int devm_gpio_request_one(struct device *dev, unsigned gpio, return -EINVAL; } -static inline void devm_gpio_free(struct device *dev, unsigned int gpio) -{ - WARN_ON(1); -} - #endif /* ! CONFIG_GPIOLIB */ #endif /* __LINUX_GPIO_H */ -- cgit v1.2.3 From 2a1192ff0835cb4b0ccd6f6e85c93aa0dc6f66b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 14 Jun 2022 17:23:38 +0200 Subject: gpio: twl4030: Drop platform teardown callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no machine providing a teardown callback, so drop the unused code. This is a preparation for making platform remove callbacks return void. Signed-off-by: Uwe Kleine-König Signed-off-by: Bartosz Golaszewski --- include/linux/mfd/twl.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/twl.h b/include/linux/mfd/twl.h index 8871cc5188a0..c8cd31756037 100644 --- a/include/linux/mfd/twl.h +++ b/include/linux/mfd/twl.h @@ -594,8 +594,6 @@ struct twl4030_gpio_platform_data { int (*setup)(struct device *dev, unsigned gpio, unsigned ngpio); - int (*teardown)(struct device *dev, - unsigned gpio, unsigned ngpio); }; struct twl4030_madc_platform_data { -- cgit v1.2.3 From 7e55b33d3f18fde5c7a57b6c52d80499485c737f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 14 Jun 2022 21:48:02 +0200 Subject: gpio: ucb1400: Remove platform setup and teardown support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no user of these callbacks. The motivation for this change is to stop returning an error code from the remove callback. This is a preparation for making platform remove callbacks return void. Signed-off-by: Uwe Kleine-König Signed-off-by: Bartosz Golaszewski --- include/linux/ucb1400.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h index 0968ef458447..22345391350b 100644 --- a/include/linux/ucb1400.h +++ b/include/linux/ucb1400.h @@ -84,8 +84,6 @@ struct ucb1400_gpio { struct gpio_chip gc; struct snd_ac97 *ac97; int gpio_offset; - int (*gpio_setup)(struct device *dev, int ngpio); - int (*gpio_teardown)(struct device *dev, int ngpio); }; struct ucb1400_ts { -- cgit v1.2.3 From c269df8c5ad316eac47a9078e7a2339e1956637a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Wed, 13 Jul 2022 15:14:18 +0200 Subject: gpiolib: add support for bias pull disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change prepares the gpio core to look at firmware flags and set 'FLAG_BIAS_DISABLE' if necessary. It works in similar way to 'GPIO_PULL_DOWN' and 'GPIO_PULL_UP'. Signed-off-by: Nuno Sá Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/machine.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h index 4d55da28e664..0b619eb7ae83 100644 --- a/include/linux/gpio/machine.h +++ b/include/linux/gpio/machine.h @@ -14,6 +14,7 @@ enum gpio_lookup_flags { GPIO_TRANSITORY = (1 << 3), GPIO_PULL_UP = (1 << 4), GPIO_PULL_DOWN = (1 << 5), + GPIO_PULL_DISABLE = (1 << 6), GPIO_LOOKUP_FLAGS_DEFAULT = GPIO_ACTIVE_HIGH | GPIO_PERSISTENT, }; -- cgit v1.2.3 From 31bea23119cda87088c6bd4085a1e442c6c5974c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Wed, 13 Jul 2022 15:14:19 +0200 Subject: gpiolib: of: support bias pull disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On top of looking at PULL_UP and PULL_DOWN flags, also look at PULL_DISABLE and set the appropriate GPIO flag. The GPIO core will then pass down this to controllers that support it. Signed-off-by: Nuno Sá Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- include/linux/of_gpio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index 8bf2ea859653..a5166eb93437 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -29,6 +29,7 @@ enum of_gpio_flags { OF_GPIO_TRANSITORY = 0x8, OF_GPIO_PULL_UP = 0x10, OF_GPIO_PULL_DOWN = 0x20, + OF_GPIO_PULL_DISABLE = 0x40, }; #ifdef CONFIG_OF_GPIO -- cgit v1.2.3 From 62fa5c9800a0b9aecf9ce0743f12a16057c9400d Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Fri, 3 Jun 2022 17:57:25 +0200 Subject: mfd: max77714: Update Luca Ceresoli's e-mail address My Bootlin address is preferred from now on. Signed-off-by: Luca Ceresoli Signed-off-by: Luca Ceresoli Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220603155727.1232061-4-luca@lucaceresoli.net --- include/linux/mfd/max77714.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max77714.h b/include/linux/mfd/max77714.h index a970dc455426..7947e0d697a5 100644 --- a/include/linux/mfd/max77714.h +++ b/include/linux/mfd/max77714.h @@ -3,7 +3,7 @@ * Maxim MAX77714 Register and data structures definition. * * Copyright (C) 2022 Luca Ceresoli - * Author: Luca Ceresoli + * Author: Luca Ceresoli */ #ifndef __LINUX_MFD_MAX77714_H_ -- cgit v1.2.3 From 128ac294e1b437cb8a7f2ff8ede1cde9082bddbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 30 May 2022 21:24:28 +0200 Subject: mfd: t7l66xb: Drop platform disable callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit None of the in-tree instantiations of struct t7l66xb_platform_data provides a disable callback. So better don't dereference this function pointer unconditionally. As there is no user, drop it completely instead of calling it conditional. This is a preparation for making platform remove callbacks return void. Fixes: 1f192015ca5b ("mfd: driver for the T7L66XB TMIO SoC") Signed-off-by: Uwe Kleine-König Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220530192430.2108217-3-u.kleine-koenig@pengutronix.de --- include/linux/mfd/t7l66xb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/t7l66xb.h b/include/linux/mfd/t7l66xb.h index 69632c1b07bd..ae3e7a5c5219 100644 --- a/include/linux/mfd/t7l66xb.h +++ b/include/linux/mfd/t7l66xb.h @@ -12,7 +12,6 @@ struct t7l66xb_platform_data { int (*enable)(struct platform_device *dev); - int (*disable)(struct platform_device *dev); int (*suspend)(struct platform_device *dev); int (*resume)(struct platform_device *dev); -- cgit v1.2.3 From 6e1f1b1c93ceec1d9bfa9213775abc19161de5f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 30 May 2022 21:24:29 +0200 Subject: mfd: tc6387xb: Drop disable callback that is never called MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver never calls the disable callback, so drop the member from the platform struct and all callbacks from the actual platform datas. Signed-off-by: Uwe Kleine-König Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220530192430.2108217-4-u.kleine-koenig@pengutronix.de --- include/linux/mfd/tc6387xb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tc6387xb.h b/include/linux/mfd/tc6387xb.h index b4888209494a..aacf1dcc86b9 100644 --- a/include/linux/mfd/tc6387xb.h +++ b/include/linux/mfd/tc6387xb.h @@ -12,7 +12,6 @@ struct tc6387xb_platform_data { int (*enable)(struct platform_device *dev); - int (*disable)(struct platform_device *dev); int (*suspend)(struct platform_device *dev); int (*resume)(struct platform_device *dev); }; -- cgit v1.2.3 From de58cee8c6b803dda3304eace346919fe880a40a Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Tue, 31 May 2022 14:49:56 +0200 Subject: mfd: mt6397-core: Add MT6357 PMIC support Adds support for PMIC keys, Regulator, and RTC for the MT6357 PMIC. Signed-off-by: Fabien Parent Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220531124959.202787-5-fparent@baylibre.com --- include/linux/mfd/mt6357/core.h | 119 +++ include/linux/mfd/mt6357/registers.h | 1574 ++++++++++++++++++++++++++++++++++ 2 files changed, 1693 insertions(+) create mode 100644 include/linux/mfd/mt6357/core.h create mode 100644 include/linux/mfd/mt6357/registers.h (limited to 'include/linux') diff --git a/include/linux/mfd/mt6357/core.h b/include/linux/mfd/mt6357/core.h new file mode 100644 index 000000000000..2441611264fd --- /dev/null +++ b/include/linux/mfd/mt6357/core.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 BayLibre, SAS + * Author: Fabien Parent + */ + +#ifndef __MFD_MT6357_CORE_H__ +#define __MFD_MT6357_CORE_H__ + +enum mt6357_irq_top_status_shift { + MT6357_BUCK_TOP = 0, + MT6357_LDO_TOP, + MT6357_PSC_TOP, + MT6357_SCK_TOP, + MT6357_BM_TOP, + MT6357_HK_TOP, + MT6357_XPP_TOP, + MT6357_AUD_TOP, + MT6357_MISC_TOP, +}; + +enum mt6357_irq_numbers { + MT6357_IRQ_VPROC_OC = 0, + MT6357_IRQ_VCORE_OC, + MT6357_IRQ_VMODEM_OC, + MT6357_IRQ_VS1_OC, + MT6357_IRQ_VPA_OC, + MT6357_IRQ_VCORE_PREOC, + MT6357_IRQ_VFE28_OC = 16, + MT6357_IRQ_VXO22_OC, + MT6357_IRQ_VRF18_OC, + MT6357_IRQ_VRF12_OC, + MT6357_IRQ_VEFUSE_OC, + MT6357_IRQ_VCN33_OC, + MT6357_IRQ_VCN28_OC, + MT6357_IRQ_VCN18_OC, + MT6357_IRQ_VCAMA_OC, + MT6357_IRQ_VCAMD_OC, + MT6357_IRQ_VCAMIO_OC, + MT6357_IRQ_VLDO28_OC, + MT6357_IRQ_VUSB33_OC, + MT6357_IRQ_VAUX18_OC, + MT6357_IRQ_VAUD28_OC, + MT6357_IRQ_VIO28_OC, + MT6357_IRQ_VIO18_OC, + MT6357_IRQ_VSRAM_PROC_OC, + MT6357_IRQ_VSRAM_OTHERS_OC, + MT6357_IRQ_VIBR_OC, + MT6357_IRQ_VDRAM_OC, + MT6357_IRQ_VMC_OC, + MT6357_IRQ_VMCH_OC, + MT6357_IRQ_VEMC_OC, + MT6357_IRQ_VSIM1_OC, + MT6357_IRQ_VSIM2_OC, + MT6357_IRQ_PWRKEY = 48, + MT6357_IRQ_HOMEKEY, + MT6357_IRQ_PWRKEY_R, + MT6357_IRQ_HOMEKEY_R, + MT6357_IRQ_NI_LBAT_INT, + MT6357_IRQ_CHRDET, + MT6357_IRQ_CHRDET_EDGE, + MT6357_IRQ_VCDT_HV_DET, + MT6357_IRQ_WATCHDOG, + MT6357_IRQ_VBATON_UNDET, + MT6357_IRQ_BVALID_DET, + MT6357_IRQ_OV, + MT6357_IRQ_RTC = 64, + MT6357_IRQ_FG_BAT0_H = 80, + MT6357_IRQ_FG_BAT0_L, + MT6357_IRQ_FG_CUR_H, + MT6357_IRQ_FG_CUR_L, + MT6357_IRQ_FG_ZCV, + MT6357_IRQ_BATON_LV = 96, + MT6357_IRQ_BATON_HT, + MT6357_IRQ_BAT_H = 112, + MT6357_IRQ_BAT_L, + MT6357_IRQ_AUXADC_IMP, + MT6357_IRQ_NAG_C_DLTV, + MT6357_IRQ_AUDIO = 128, + MT6357_IRQ_ACCDET = 133, + MT6357_IRQ_ACCDET_EINT0, + MT6357_IRQ_ACCDET_EINT1, + MT6357_IRQ_SPI_CMD_ALERT = 144, + MT6357_IRQ_NR, +}; + +#define MT6357_IRQ_BUCK_BASE MT6357_IRQ_VPROC_OC +#define MT6357_IRQ_LDO_BASE MT6357_IRQ_VFE28_OC +#define MT6357_IRQ_PSC_BASE MT6357_IRQ_PWRKEY +#define MT6357_IRQ_SCK_BASE MT6357_IRQ_RTC +#define MT6357_IRQ_BM_BASE MT6357_IRQ_FG_BAT0_H +#define MT6357_IRQ_HK_BASE MT6357_IRQ_BAT_H +#define MT6357_IRQ_AUD_BASE MT6357_IRQ_AUDIO +#define MT6357_IRQ_MISC_BASE MT6357_IRQ_SPI_CMD_ALERT + +#define MT6357_IRQ_BUCK_BITS (MT6357_IRQ_VCORE_PREOC - MT6357_IRQ_BUCK_BASE + 1) +#define MT6357_IRQ_LDO_BITS (MT6357_IRQ_VSIM2_OC - MT6357_IRQ_LDO_BASE + 1) +#define MT6357_IRQ_PSC_BITS (MT6357_IRQ_VCDT_HV_DET - MT6357_IRQ_PSC_BASE + 1) +#define MT6357_IRQ_SCK_BITS (MT6357_IRQ_RTC - MT6357_IRQ_SCK_BASE + 1) +#define MT6357_IRQ_BM_BITS (MT6357_IRQ_BATON_HT - MT6357_IRQ_BM_BASE + 1) +#define MT6357_IRQ_HK_BITS (MT6357_IRQ_NAG_C_DLTV - MT6357_IRQ_HK_BASE + 1) +#define MT6357_IRQ_AUD_BITS (MT6357_IRQ_ACCDET_EINT1 - MT6357_IRQ_AUD_BASE + 1) +#define MT6357_IRQ_MISC_BITS \ + (MT6357_IRQ_SPI_CMD_ALERT - MT6357_IRQ_MISC_BASE + 1) + +#define MT6357_TOP_GEN(sp) \ +{ \ + .hwirq_base = MT6357_IRQ_##sp##_BASE, \ + .num_int_regs = \ + ((MT6357_IRQ_##sp##_BITS - 1) / \ + MTK_PMIC_REG_WIDTH) + 1, \ + .en_reg = MT6357_##sp##_TOP_INT_CON0, \ + .en_reg_shift = 0x6, \ + .sta_reg = MT6357_##sp##_TOP_INT_STATUS0, \ + .sta_reg_shift = 0x2, \ + .top_offset = MT6357_##sp##_TOP, \ +} + +#endif /* __MFD_MT6357_CORE_H__ */ diff --git a/include/linux/mfd/mt6357/registers.h b/include/linux/mfd/mt6357/registers.h new file mode 100644 index 000000000000..e24af83b618d --- /dev/null +++ b/include/linux/mfd/mt6357/registers.h @@ -0,0 +1,1574 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 MediaTek Inc. + */ + +#ifndef __MFD_MT6357_REGISTERS_H__ +#define __MFD_MT6357_REGISTERS_H__ + +/* PMIC Registers */ +#define MT6357_TOP0_ID 0x0 +#define MT6357_TOP0_REV0 0x2 +#define MT6357_TOP0_DSN_DBI 0x4 +#define MT6357_TOP0_DSN_DXI 0x6 +#define MT6357_HWCID 0x8 +#define MT6357_SWCID 0xa +#define MT6357_PONSTS 0xc +#define MT6357_POFFSTS 0xe +#define MT6357_PSTSCTL 0x10 +#define MT6357_PG_DEB_STS0 0x12 +#define MT6357_PG_SDN_STS0 0x14 +#define MT6357_OC_SDN_STS0 0x16 +#define MT6357_THERMALSTATUS 0x18 +#define MT6357_TOP_CON 0x1a +#define MT6357_TEST_OUT 0x1c +#define MT6357_TEST_CON0 0x1e +#define MT6357_TEST_CON1 0x20 +#define MT6357_TESTMODE_SW 0x22 +#define MT6357_TOPSTATUS 0x24 +#define MT6357_TDSEL_CON 0x26 +#define MT6357_RDSEL_CON 0x28 +#define MT6357_SMT_CON0 0x2a +#define MT6357_SMT_CON1 0x2c +#define MT6357_TOP_RSV0 0x2e +#define MT6357_TOP_RSV1 0x30 +#define MT6357_DRV_CON0 0x32 +#define MT6357_DRV_CON1 0x34 +#define MT6357_DRV_CON2 0x36 +#define MT6357_DRV_CON3 0x38 +#define MT6357_FILTER_CON0 0x3a +#define MT6357_FILTER_CON1 0x3c +#define MT6357_FILTER_CON2 0x3e +#define MT6357_FILTER_CON3 0x40 +#define MT6357_TOP_STATUS 0x42 +#define MT6357_TOP_STATUS_SET 0x44 +#define MT6357_TOP_STATUS_CLR 0x46 +#define MT6357_TOP_TRAP 0x48 +#define MT6357_TOP1_ID 0x80 +#define MT6357_TOP1_REV0 0x82 +#define MT6357_TOP1_DSN_DBI 0x84 +#define MT6357_TOP1_DSN_DXI 0x86 +#define MT6357_GPIO_DIR0 0x88 +#define MT6357_GPIO_DIR0_SET 0x8a +#define MT6357_GPIO_DIR0_CLR 0x8c +#define MT6357_GPIO_PULLEN0 0x8e +#define MT6357_GPIO_PULLEN0_SET 0x90 +#define MT6357_GPIO_PULLEN0_CLR 0x92 +#define MT6357_GPIO_PULLSEL0 0x94 +#define MT6357_GPIO_PULLSEL0_SET 0x96 +#define MT6357_GPIO_PULLSEL0_CLR 0x98 +#define MT6357_GPIO_DINV0 0x9a +#define MT6357_GPIO_DINV0_SET 0x9c +#define MT6357_GPIO_DINV0_CLR 0x9e +#define MT6357_GPIO_DOUT0 0xa0 +#define MT6357_GPIO_DOUT0_SET 0xa2 +#define MT6357_GPIO_DOUT0_CLR 0xa4 +#define MT6357_GPIO_PI0 0xa6 +#define MT6357_GPIO_POE0 0xa8 +#define MT6357_GPIO_MODE0 0xaa +#define MT6357_GPIO_MODE0_SET 0xac +#define MT6357_GPIO_MODE0_CLR 0xae +#define MT6357_GPIO_MODE1 0xb0 +#define MT6357_GPIO_MODE1_SET 0xb2 +#define MT6357_GPIO_MODE1_CLR 0xb4 +#define MT6357_GPIO_MODE2 0xb6 +#define MT6357_GPIO_MODE2_SET 0xb8 +#define MT6357_GPIO_MODE2_CLR 0xba +#define MT6357_GPIO_MODE3 0xbc +#define MT6357_GPIO_MODE3_SET 0xbe +#define MT6357_GPIO_MODE3_CLR 0xc0 +#define MT6357_GPIO_RSV 0xc2 +#define MT6357_TOP2_ID 0x100 +#define MT6357_TOP2_REV0 0x102 +#define MT6357_TOP2_DSN_DBI 0x104 +#define MT6357_TOP2_DSN_DXI 0x106 +#define MT6357_TOP_PAM0 0x108 +#define MT6357_TOP_PAM1 0x10a +#define MT6357_TOP_CKPDN_CON0 0x10c +#define MT6357_TOP_CKPDN_CON0_SET 0x10e +#define MT6357_TOP_CKPDN_CON0_CLR 0x110 +#define MT6357_TOP_CKPDN_CON1 0x112 +#define MT6357_TOP_CKPDN_CON1_SET 0x114 +#define MT6357_TOP_CKPDN_CON1_CLR 0x116 +#define MT6357_TOP_CKSEL_CON0 0x118 +#define MT6357_TOP_CKSEL_CON0_SET 0x11a +#define MT6357_TOP_CKSEL_CON0_CLR 0x11c +#define MT6357_TOP_CKSEL_CON1 0x11e +#define MT6357_TOP_CKSEL_CON1_SET 0x120 +#define MT6357_TOP_CKSEL_CON1_CLR 0x122 +#define MT6357_TOP_CKDIVSEL_CON0 0x124 +#define MT6357_TOP_CKDIVSEL_CON0_SET 0x126 +#define MT6357_TOP_CKDIVSEL_CON0_CLR 0x128 +#define MT6357_TOP_CKHWEN_CON0 0x12a +#define MT6357_TOP_CKHWEN_CON0_SET 0x12c +#define MT6357_TOP_CKHWEN_CON0_CLR 0x12e +#define MT6357_TOP_CKTST_CON0 0x130 +#define MT6357_TOP_CKTST_CON1 0x132 +#define MT6357_TOP_CLK_CON0 0x134 +#define MT6357_TOP_CLK_CON0_SET 0x136 +#define MT6357_TOP_CLK_CON0_CLR 0x138 +#define MT6357_TOP_DCM_CON0 0x13a +#define MT6357_TOP_HANDOVER_DEBUG0 0x13c +#define MT6357_TOP_RST_CON0 0x13e +#define MT6357_TOP_RST_CON0_SET 0x140 +#define MT6357_TOP_RST_CON0_CLR 0x142 +#define MT6357_TOP_RST_CON1 0x144 +#define MT6357_TOP_RST_CON1_SET 0x146 +#define MT6357_TOP_RST_CON1_CLR 0x148 +#define MT6357_TOP_RST_CON2 0x14a +#define MT6357_TOP_RST_MISC 0x14c +#define MT6357_TOP_RST_MISC_SET 0x14e +#define MT6357_TOP_RST_MISC_CLR 0x150 +#define MT6357_TOP_RST_STATUS 0x152 +#define MT6357_TOP_RST_STATUS_SET 0x154 +#define MT6357_TOP_RST_STATUS_CLR 0x156 +#define MT6357_TOP2_ELR_NUM 0x158 +#define MT6357_TOP2_ELR0 0x15a +#define MT6357_TOP2_ELR1 0x15c +#define MT6357_TOP3_ID 0x180 +#define MT6357_TOP3_REV0 0x182 +#define MT6357_TOP3_DSN_DBI 0x184 +#define MT6357_TOP3_DSN_DXI 0x186 +#define MT6357_MISC_TOP_INT_CON0 0x188 +#define MT6357_MISC_TOP_INT_CON0_SET 0x18a +#define MT6357_MISC_TOP_INT_CON0_CLR 0x18c +#define MT6357_MISC_TOP_INT_MASK_CON0 0x18e +#define MT6357_MISC_TOP_INT_MASK_CON0_SET 0x190 +#define MT6357_MISC_TOP_INT_MASK_CON0_CLR 0x192 +#define MT6357_MISC_TOP_INT_STATUS0 0x194 +#define MT6357_MISC_TOP_INT_RAW_STATUS0 0x196 +#define MT6357_TOP_INT_MASK_CON0 0x198 +#define MT6357_TOP_INT_MASK_CON0_SET 0x19a +#define MT6357_TOP_INT_MASK_CON0_CLR 0x19c +#define MT6357_TOP_INT_STATUS0 0x19e +#define MT6357_TOP_INT_RAW_STATUS0 0x1a0 +#define MT6357_TOP_INT_CON0 0x1a2 +#define MT6357_PLT0_ID 0x380 +#define MT6357_PLT0_REV0 0x382 +#define MT6357_PLT0_REV1 0x384 +#define MT6357_PLT0_DSN_DXI 0x386 +#define MT6357_FQMTR_CON0 0x388 +#define MT6357_FQMTR_CON1 0x38a +#define MT6357_FQMTR_CON2 0x38c +#define MT6357_TOP_CLK_TRIM 0x38e +#define MT6357_OTP_CON0 0x390 +#define MT6357_OTP_CON1 0x392 +#define MT6357_OTP_CON2 0x394 +#define MT6357_OTP_CON3 0x396 +#define MT6357_OTP_CON4 0x398 +#define MT6357_OTP_CON5 0x39a +#define MT6357_OTP_CON6 0x39c +#define MT6357_OTP_CON7 0x39e +#define MT6357_OTP_CON8 0x3a0 +#define MT6357_OTP_CON9 0x3a2 +#define MT6357_OTP_CON10 0x3a4 +#define MT6357_OTP_CON11 0x3a6 +#define MT6357_OTP_CON12 0x3a8 +#define MT6357_OTP_CON13 0x3aa +#define MT6357_OTP_CON14 0x3ac +#define MT6357_TOP_TMA_KEY 0x3ae +#define MT6357_TOP_MDB_CONF0 0x3b0 +#define MT6357_TOP_MDB_CONF1 0x3b2 +#define MT6357_TOP_MDB_CONF2 0x3b4 +#define MT6357_PLT0_ELR_NUM 0x3b6 +#define MT6357_PLT0_ELR0 0x3b8 +#define MT6357_PLT0_ELR1 0x3ba +#define MT6357_SPISLV_ID 0x400 +#define MT6357_SPISLV_REV0 0x402 +#define MT6357_SPISLV_REV1 0x404 +#define MT6357_SPISLV_DSN_DXI 0x406 +#define MT6357_RG_SPI_CON0 0x408 +#define MT6357_DEW_DIO_EN 0x40a +#define MT6357_DEW_READ_TEST 0x40c +#define MT6357_DEW_WRITE_TEST 0x40e +#define MT6357_DEW_CRC_SWRST 0x410 +#define MT6357_DEW_CRC_EN 0x412 +#define MT6357_DEW_CRC_VAL 0x414 +#define MT6357_DEW_DBG_MON_SEL 0x416 +#define MT6357_DEW_CIPHER_KEY_SEL 0x418 +#define MT6357_DEW_CIPHER_IV_SEL 0x41a +#define MT6357_DEW_CIPHER_EN 0x41c +#define MT6357_DEW_CIPHER_RDY 0x41e +#define MT6357_DEW_CIPHER_MODE 0x420 +#define MT6357_DEW_CIPHER_SWRST 0x422 +#define MT6357_DEW_RDDMY_NO 0x424 +#define MT6357_INT_TYPE_CON0 0x426 +#define MT6357_INT_TYPE_CON0_SET 0x428 +#define MT6357_INT_TYPE_CON0_CLR 0x42a +#define MT6357_INT_STA 0x42c +#define MT6357_RG_SPI_CON1 0x42e +#define MT6357_RG_SPI_CON2 0x430 +#define MT6357_RG_SPI_CON3 0x432 +#define MT6357_RG_SPI_CON4 0x434 +#define MT6357_RG_SPI_CON5 0x436 +#define MT6357_RG_SPI_CON6 0x438 +#define MT6357_RG_SPI_CON7 0x43a +#define MT6357_RG_SPI_CON8 0x43c +#define MT6357_RG_SPI_CON9 0x43e +#define MT6357_RG_SPI_CON10 0x440 +#define MT6357_RG_SPI_CON11 0x442 +#define MT6357_RG_SPI_CON12 0x444 +#define MT6357_RG_SPI_CON13 0x446 +#define MT6357_TOP_SPI_CON0 0x448 +#define MT6357_TOP_SPI_CON1 0x44a +#define MT6357_SCK_TOP_DSN_ID 0x500 +#define MT6357_SCK_TOP_DSN_REV0 0x502 +#define MT6357_SCK_TOP_DBI 0x504 +#define MT6357_SCK_TOP_DXI 0x506 +#define MT6357_SCK_TOP_TPM0 0x508 +#define MT6357_SCK_TOP_TPM1 0x50a +#define MT6357_SCK_TOP_CON0 0x50c +#define MT6357_SCK_TOP_CON1 0x50e +#define MT6357_SCK_TOP_TEST_OUT 0x510 +#define MT6357_SCK_TOP_TEST_CON0 0x512 +#define MT6357_SCK_TOP_CKPDN_CON0 0x514 +#define MT6357_SCK_TOP_CKPDN_CON0_SET 0x516 +#define MT6357_SCK_TOP_CKPDN_CON0_CLR 0x518 +#define MT6357_SCK_TOP_CKHWEN_CON0 0x51a +#define MT6357_SCK_TOP_CKHWEN_CON0_SET 0x51c +#define MT6357_SCK_TOP_CKHWEN_CON0_CLR 0x51e +#define MT6357_SCK_TOP_CKTST_CON 0x520 +#define MT6357_SCK_TOP_RST_CON0 0x522 +#define MT6357_SCK_TOP_RST_CON0_SET 0x524 +#define MT6357_SCK_TOP_RST_CON0_CLR 0x526 +#define MT6357_SCK_TOP_INT_CON0 0x528 +#define MT6357_SCK_TOP_INT_CON0_SET 0x52a +#define MT6357_SCK_TOP_INT_CON0_CLR 0x52c +#define MT6357_SCK_TOP_INT_MASK_CON0 0x52e +#define MT6357_SCK_TOP_INT_MASK_CON0_SET 0x530 +#define MT6357_SCK_TOP_INT_MASK_CON0_CLR 0x532 +#define MT6357_SCK_TOP_INT_STATUS0 0x534 +#define MT6357_SCK_TOP_INT_RAW_STATUS0 0x536 +#define MT6357_SCK_TOP_INT_MISC_CON 0x538 +#define MT6357_EOSC_CALI_CON0 0x53a +#define MT6357_EOSC_CALI_CON1 0x53c +#define MT6357_RTC_MIX_CON0 0x53e +#define MT6357_RTC_MIX_CON1 0x540 +#define MT6357_RTC_MIX_CON2 0x542 +#define MT6357_RTC_DSN_ID 0x580 +#define MT6357_RTC_DSN_REV0 0x582 +#define MT6357_RTC_DBI 0x584 +#define MT6357_RTC_DXI 0x586 +#define MT6357_RTC_BBPU 0x588 +#define MT6357_RTC_IRQ_STA 0x58a +#define MT6357_RTC_IRQ_EN 0x58c +#define MT6357_RTC_CII_EN 0x58e +#define MT6357_RTC_AL_MASK 0x590 +#define MT6357_RTC_TC_SEC 0x592 +#define MT6357_RTC_TC_MIN 0x594 +#define MT6357_RTC_TC_HOU 0x596 +#define MT6357_RTC_TC_DOM 0x598 +#define MT6357_RTC_TC_DOW 0x59a +#define MT6357_RTC_TC_MTH 0x59c +#define MT6357_RTC_TC_YEA 0x59e +#define MT6357_RTC_AL_SEC 0x5a0 +#define MT6357_RTC_AL_MIN 0x5a2 +#define MT6357_RTC_AL_HOU 0x5a4 +#define MT6357_RTC_AL_DOM 0x5a6 +#define MT6357_RTC_AL_DOW 0x5a8 +#define MT6357_RTC_AL_MTH 0x5aa +#define MT6357_RTC_AL_YEA 0x5ac +#define MT6357_RTC_OSC32CON 0x5ae +#define MT6357_RTC_POWERKEY1 0x5b0 +#define MT6357_RTC_POWERKEY2 0x5b2 +#define MT6357_RTC_PDN1 0x5b4 +#define MT6357_RTC_PDN2 0x5b6 +#define MT6357_RTC_SPAR0 0x5b8 +#define MT6357_RTC_SPAR1 0x5ba +#define MT6357_RTC_PROT 0x5bc +#define MT6357_RTC_DIFF 0x5be +#define MT6357_RTC_CALI 0x5c0 +#define MT6357_RTC_WRTGR 0x5c2 +#define MT6357_RTC_CON 0x5c4 +#define MT6357_RTC_SEC_CTRL 0x5c6 +#define MT6357_RTC_INT_CNT 0x5c8 +#define MT6357_RTC_SEC_DAT0 0x5ca +#define MT6357_RTC_SEC_DAT1 0x5cc +#define MT6357_RTC_SEC_DAT2 0x5ce +#define MT6357_RTC_SEC_DSN_ID 0x600 +#define MT6357_RTC_SEC_DSN_REV0 0x602 +#define MT6357_RTC_SEC_DBI 0x604 +#define MT6357_RTC_SEC_DXI 0x606 +#define MT6357_RTC_TC_SEC_SEC 0x608 +#define MT6357_RTC_TC_MIN_SEC 0x60a +#define MT6357_RTC_TC_HOU_SEC 0x60c +#define MT6357_RTC_TC_DOM_SEC 0x60e +#define MT6357_RTC_TC_DOW_SEC 0x610 +#define MT6357_RTC_TC_MTH_SEC 0x612 +#define MT6357_RTC_TC_YEA_SEC 0x614 +#define MT6357_RTC_SEC_CK_PDN 0x616 +#define MT6357_RTC_SEC_WRTGR 0x618 +#define MT6357_DCXO_DSN_ID 0x780 +#define MT6357_DCXO_DSN_REV0 0x782 +#define MT6357_DCXO_DSN_DBI 0x784 +#define MT6357_DCXO_DSN_DXI 0x786 +#define MT6357_DCXO_CW00 0x788 +#define MT6357_DCXO_CW00_SET 0x78a +#define MT6357_DCXO_CW00_CLR 0x78c +#define MT6357_DCXO_CW01 0x78e +#define MT6357_DCXO_CW02 0x790 +#define MT6357_DCXO_CW03 0x792 +#define MT6357_DCXO_CW04 0x794 +#define MT6357_DCXO_CW05 0x796 +#define MT6357_DCXO_CW06 0x798 +#define MT6357_DCXO_CW07 0x79a +#define MT6357_DCXO_CW08 0x79c +#define MT6357_DCXO_CW09 0x79e +#define MT6357_DCXO_CW10 0x7a0 +#define MT6357_DCXO_CW11 0x7a2 +#define MT6357_DCXO_CW11_SET 0x7a4 +#define MT6357_DCXO_CW11_CLR 0x7a6 +#define MT6357_DCXO_CW12 0x7a8 +#define MT6357_DCXO_CW13 0x7aa +#define MT6357_DCXO_CW14 0x7ac +#define MT6357_DCXO_CW15 0x7ae +#define MT6357_DCXO_CW16 0x7b0 +#define MT6357_DCXO_CW17 0x7b2 +#define MT6357_DCXO_CW18 0x7b4 +#define MT6357_DCXO_CW19 0x7b6 +#define MT6357_DCXO_CW20 0x7b8 +#define MT6357_DCXO_CW21 0x7ba +#define MT6357_DCXO_CW22 0x7bc +#define MT6357_DCXO_ELR_NUM 0x7be +#define MT6357_DCXO_ELR0 0x7c0 +#define MT6357_PSC_TOP_ID 0x900 +#define MT6357_PSC_TOP_REV0 0x902 +#define MT6357_PSC_TOP_DBI 0x904 +#define MT6357_PSC_TOP_DXI 0x906 +#define MT6357_PSC_TPM0 0x908 +#define MT6357_PSC_TPM1 0x90a +#define MT6357_PSC_TOP_RSTCTL_0 0x90c +#define MT6357_PSC_TOP_INT_CON0 0x90e +#define MT6357_PSC_TOP_INT_CON0_SET 0x910 +#define MT6357_PSC_TOP_INT_CON0_CLR 0x912 +#define MT6357_PSC_TOP_INT_MASK_CON0 0x914 +#define MT6357_PSC_TOP_INT_MASK_CON0_SET 0x916 +#define MT6357_PSC_TOP_INT_MASK_CON0_CLR 0x918 +#define MT6357_PSC_TOP_INT_STATUS0 0x91a +#define MT6357_PSC_TOP_INT_RAW_STATUS0 0x91c +#define MT6357_PSC_TOP_INT_MISC_CON 0x91e +#define MT6357_PSC_TOP_INT_MISC_CON_SET 0x920 +#define MT6357_PSC_TOP_INT_MISC_CON_CLR 0x922 +#define MT6357_PSC_TOP_MON_CTL 0x924 +#define MT6357_STRUP_ID 0x980 +#define MT6357_STRUP_REV0 0x982 +#define MT6357_STRUP_DBI 0x984 +#define MT6357_STRUP_DXI 0x986 +#define MT6357_STRUP_ANA_CON0 0x988 +#define MT6357_STRUP_ANA_CON1 0x98a +#define MT6357_STRUP_ANA_CON2 0x98c +#define MT6357_STRUP_ELR_NUM 0x98e +#define MT6357_STRUP_ELR_0 0x990 +#define MT6357_PSEQ_ID 0xa00 +#define MT6357_PSEQ_REV0 0xa02 +#define MT6357_PSEQ_DBI 0xa04 +#define MT6357_PSEQ_DXI 0xa06 +#define MT6357_PPCCTL0 0xa08 +#define MT6357_PPCCTL1 0xa0a +#define MT6357_PPCCTL2 0xa0c +#define MT6357_PPCCFG0 0xa0e +#define MT6357_PPCTST0 0xa10 +#define MT6357_PORFLAG 0xa12 +#define MT6357_STRUP_CON0 0xa14 +#define MT6357_STRUP_CON1 0xa16 +#define MT6357_STRUP_CON2 0xa18 +#define MT6357_STRUP_CON3 0xa1a +#define MT6357_STRUP_CON4 0xa1c +#define MT6357_STRUP_CON5 0xa1e +#define MT6357_STRUP_CON6 0xa20 +#define MT6357_STRUP_CON7 0xa22 +#define MT6357_CPSCFG0 0xa24 +#define MT6357_STRUP_CON9 0xa26 +#define MT6357_STRUP_CON10 0xa28 +#define MT6357_STRUP_CON11 0xa2a +#define MT6357_STRUP_CON12 0xa2c +#define MT6357_STRUP_CON13 0xa2e +#define MT6357_STRUP_CON14 0xa30 +#define MT6357_STRUP_CON15 0xa32 +#define MT6357_STRUP_CON16 0xa34 +#define MT6357_STRUP_CON19 0xa36 +#define MT6357_PSEQ_ELR_NUM 0xa38 +#define MT6357_PSEQ_ELR7 0xa3a +#define MT6357_PSEQ_ELR8 0xa3c +#define MT6357_PCHR_DIG_DSN_ID 0xa80 +#define MT6357_PCHR_DIG_DSN_REV0 0xa82 +#define MT6357_PCHR_DIG_DSN_DBI 0xa84 +#define MT6357_PCHR_DIG_DSN_DXI 0xa86 +#define MT6357_CHR_TOP_CON0 0xa88 +#define MT6357_CHR_TOP_CON1 0xa8a +#define MT6357_CHR_TOP_CON2 0xa8c +#define MT6357_CHR_TOP_CON3 0xa8e +#define MT6357_CHR_TOP_CON4 0xa90 +#define MT6357_CHR_TOP_CON5 0xa92 +#define MT6357_CHR_TOP_CON6 0xa94 +#define MT6357_PCHR_DIG_ELR_NUM 0xa96 +#define MT6357_PCHR_ELR0 0xa98 +#define MT6357_PCHR_ELR1 0xa9a +#define MT6357_PCHR_MACRO_DSN_ID 0xb80 +#define MT6357_PCHR_MACRO_DSN_REV0 0xb82 +#define MT6357_PCHR_MACRO_DSN_DBI 0xb84 +#define MT6357_PCHR_MACRO_DSN_DXI 0xb86 +#define MT6357_CHR_CON0 0xb88 +#define MT6357_CHR_CON1 0xb8a +#define MT6357_CHR_CON2 0xb8c +#define MT6357_CHR_CON3 0xb8e +#define MT6357_CHR_CON4 0xb90 +#define MT6357_CHR_CON5 0xb92 +#define MT6357_CHR_CON6 0xb94 +#define MT6357_CHR_CON7 0xb96 +#define MT6357_CHR_CON8 0xb98 +#define MT6357_CHR_CON9 0xb9a +#define MT6357_BM_TOP_DSN_ID 0xc00 +#define MT6357_BM_TOP_DSN_REV0 0xc02 +#define MT6357_BM_TOP_DBI 0xc04 +#define MT6357_BM_TOP_DXI 0xc06 +#define MT6357_BM_TPM0 0xc08 +#define MT6357_BM_TPM1 0xc0a +#define MT6357_BM_TOP_CKPDN_CON0 0xc0c +#define MT6357_BM_TOP_CKPDN_CON0_SET 0xc0e +#define MT6357_BM_TOP_CKPDN_CON0_CLR 0xc10 +#define MT6357_BM_TOP_CKSEL_CON0 0xc12 +#define MT6357_BM_TOP_CKSEL_CON0_SET 0xc14 +#define MT6357_BM_TOP_CKSEL_CON0_CLR 0xc16 +#define MT6357_BM_TOP_CKTST_CON0 0xc18 +#define MT6357_BM_TOP_RST_CON0 0xc1a +#define MT6357_BM_TOP_RST_CON0_SET 0xc1c +#define MT6357_BM_TOP_RST_CON0_CLR 0xc1e +#define MT6357_BM_TOP_INT_CON0 0xc20 +#define MT6357_BM_TOP_INT_CON0_SET 0xc22 +#define MT6357_BM_TOP_INT_CON0_CLR 0xc24 +#define MT6357_BM_TOP_INT_CON1 0xc26 +#define MT6357_BM_TOP_INT_CON1_SET 0xc28 +#define MT6357_BM_TOP_INT_CON1_CLR 0xc2a +#define MT6357_BM_TOP_INT_MASK_CON0 0xc2c +#define MT6357_BM_TOP_INT_MASK_CON0_SET 0xc2e +#define MT6357_BM_TOP_INT_MASK_CON0_CLR 0xc30 +#define MT6357_BM_TOP_INT_MASK_CON1 0xc32 +#define MT6357_BM_TOP_INT_MASK_CON1_SET 0xc34 +#define MT6357_BM_TOP_INT_MASK_CON1_CLR 0xc36 +#define MT6357_BM_TOP_INT_STATUS0 0xc38 +#define MT6357_BM_TOP_INT_STATUS1 0xc3a +#define MT6357_BM_TOP_INT_RAW_STATUS0 0xc3c +#define MT6357_BM_TOP_INT_RAW_STATUS1 0xc3e +#define MT6357_BM_TOP_INT_MISC_CON 0xc40 +#define MT6357_BM_TOP_DBG_CON 0xc42 +#define MT6357_BM_TOP_RSV0 0xc44 +#define MT6357_FGADC_ANA_DSN_ID 0xc80 +#define MT6357_FGADC_ANA_DSN_REV0 0xc82 +#define MT6357_FGADC_ANA_DSN_DBI 0xc84 +#define MT6357_FGADC_ANA_DSN_DXI 0xc86 +#define MT6357_FGADC_ANA_CON0 0xc88 +#define MT6357_FGADC_ANA_TEST_CON0 0xc8a +#define MT6357_FGADC_ANA_ELR_NUM 0xc8c +#define MT6357_FGADC_ANA_ELR0 0xc8e +#define MT6357_FGADC_ANA_ELR1 0xc90 +#define MT6357_FGADC0_DSN_ID 0xd00 +#define MT6357_FGADC0_DSN_REV0 0xd02 +#define MT6357_FGADC0_DSN_DBI 0xd04 +#define MT6357_FGADC0_DSN_DXI 0xd06 +#define MT6357_FGADC_CON0 0xd08 +#define MT6357_FGADC_CON1 0xd0a +#define MT6357_FGADC_CON2 0xd0c +#define MT6357_FGADC_CON3 0xd0e +#define MT6357_FGADC_CON4 0xd10 +#define MT6357_FGADC_CAR_CON0 0xd12 +#define MT6357_FGADC_CAR_CON1 0xd14 +#define MT6357_FGADC_CAR_CON2 0xd16 +#define MT6357_FGADC_CARTH_CON0 0xd18 +#define MT6357_FGADC_CARTH_CON1 0xd1a +#define MT6357_FGADC_CARTH_CON2 0xd1c +#define MT6357_FGADC_CARTH_CON3 0xd1e +#define MT6357_FGADC_NTER_CON0 0xd20 +#define MT6357_FGADC_NTER_CON1 0xd22 +#define MT6357_FGADC_NTER_CON2 0xd24 +#define MT6357_FGADC_SON_CON0 0xd26 +#define MT6357_FGADC_SON_CON1 0xd28 +#define MT6357_FGADC_SON_CON2 0xd2a +#define MT6357_FGADC_SON_CON3 0xd2c +#define MT6357_FGADC_ZCV_CON0 0xd2e +#define MT6357_FGADC_ZCV_CON1 0xd30 +#define MT6357_FGADC_ZCV_CON2 0xd32 +#define MT6357_FGADC_ZCV_CON3 0xd34 +#define MT6357_FGADC_ZCV_CON4 0xd36 +#define MT6357_FGADC_ZCVTH_CON0 0xd38 +#define MT6357_FGADC_ZCVTH_CON1 0xd3a +#define MT6357_FGADC_ZCVTH_CON2 0xd3c +#define MT6357_FGADC1_DSN_ID 0xd80 +#define MT6357_FGADC1_DSN_REV0 0xd82 +#define MT6357_FGADC1_DSN_DBI 0xd84 +#define MT6357_FGADC1_DSN_DXI 0xd86 +#define MT6357_FGADC_R_CON0 0xd88 +#define MT6357_FGADC_CUR_CON0 0xd8a +#define MT6357_FGADC_CUR_CON1 0xd8c +#define MT6357_FGADC_CUR_CON2 0xd8e +#define MT6357_FGADC_CUR_CON3 0xd90 +#define MT6357_FGADC_OFFSET_CON0 0xd92 +#define MT6357_FGADC_OFFSET_CON1 0xd94 +#define MT6357_FGADC_GAIN_CON0 0xd96 +#define MT6357_FGADC_TEST_CON0 0xd98 +#define MT6357_SYSTEM_INFO_CON0 0xd9a +#define MT6357_SYSTEM_INFO_CON1 0xd9c +#define MT6357_SYSTEM_INFO_CON2 0xd9e +#define MT6357_SYSTEM_INFO_CON3 0xda0 +#define MT6357_SYSTEM_INFO_CON4 0xda2 +#define MT6357_BATON_ANA_DSN_ID 0xe00 +#define MT6357_BATON_ANA_DSN_REV0 0xe02 +#define MT6357_BATON_ANA_DSN_DBI 0xe04 +#define MT6357_BATON_ANA_DSN_DXI 0xe06 +#define MT6357_BATON_ANA_CON0 0xe08 +#define MT6357_BATON_ANA_ELR_NUM 0xe0a +#define MT6357_BATON_ANA_ELR0 0xe0c +#define MT6357_HK_TOP_ID 0xf80 +#define MT6357_HK_TOP_REV0 0xf82 +#define MT6357_HK_TOP_DBI 0xf84 +#define MT6357_HK_TOP_DXI 0xf86 +#define MT6357_HK_TPM0 0xf88 +#define MT6357_HK_TPM1 0xf8a +#define MT6357_HK_TOP_CLK_CON0 0xf8c +#define MT6357_HK_TOP_CLK_CON1 0xf8e +#define MT6357_HK_TOP_RST_CON0 0xf90 +#define MT6357_HK_TOP_INT_CON0 0xf92 +#define MT6357_HK_TOP_INT_CON0_SET 0xf94 +#define MT6357_HK_TOP_INT_CON0_CLR 0xf96 +#define MT6357_HK_TOP_INT_MASK_CON0 0xf98 +#define MT6357_HK_TOP_INT_MASK_CON0_SET 0xf9a +#define MT6357_HK_TOP_INT_MASK_CON0_CLR 0xf9c +#define MT6357_HK_TOP_INT_STATUS0 0xf9e +#define MT6357_HK_TOP_INT_RAW_STATUS0 0xfa0 +#define MT6357_HK_TOP_MON_CON0 0xfa2 +#define MT6357_HK_TOP_MON_CON1 0xfa4 +#define MT6357_HK_TOP_MON_CON2 0xfa6 +#define MT6357_AUXADC_DSN_ID 0x1000 +#define MT6357_AUXADC_DSN_REV0 0x1002 +#define MT6357_AUXADC_DSN_DBI 0x1004 +#define MT6357_AUXADC_DSN_DXI 0x1006 +#define MT6357_AUXADC_ANA_CON0 0x1008 +#define MT6357_AUXADC_DIG_1_DSN_ID 0x1080 +#define MT6357_AUXADC_DIG_1_DSN_REV0 0x1082 +#define MT6357_AUXADC_DIG_1_DSN_DBI 0x1084 +#define MT6357_AUXADC_DIG_1_DSN_DXI 0x1086 +#define MT6357_AUXADC_ADC0 0x1088 +#define MT6357_AUXADC_ADC1 0x108a +#define MT6357_AUXADC_ADC2 0x108c +#define MT6357_AUXADC_ADC3 0x108e +#define MT6357_AUXADC_ADC4 0x1090 +#define MT6357_AUXADC_ADC5 0x1092 +#define MT6357_AUXADC_ADC6 0x1094 +#define MT6357_AUXADC_ADC7 0x1096 +#define MT6357_AUXADC_ADC8 0x1098 +#define MT6357_AUXADC_ADC9 0x109a +#define MT6357_AUXADC_ADC10 0x109c +#define MT6357_AUXADC_ADC11 0x109e +#define MT6357_AUXADC_ADC12 0x10a0 +#define MT6357_AUXADC_ADC14 0x10a2 +#define MT6357_AUXADC_ADC16 0x10a4 +#define MT6357_AUXADC_ADC17 0x10a6 +#define MT6357_AUXADC_ADC18 0x10a8 +#define MT6357_AUXADC_ADC19 0x10aa +#define MT6357_AUXADC_ADC20 0x10ac +#define MT6357_AUXADC_ADC21 0x10ae +#define MT6357_AUXADC_ADC22 0x10b0 +#define MT6357_AUXADC_ADC23 0x10b2 +#define MT6357_AUXADC_ADC24 0x10b4 +#define MT6357_AUXADC_ADC25 0x10b6 +#define MT6357_AUXADC_ADC26 0x10b8 +#define MT6357_AUXADC_ADC27 0x10ba +#define MT6357_AUXADC_ADC29 0x10bc +#define MT6357_AUXADC_ADC30 0x10be +#define MT6357_AUXADC_ADC31 0x10c0 +#define MT6357_AUXADC_ADC32 0x10c2 +#define MT6357_AUXADC_ADC33 0x10c4 +#define MT6357_AUXADC_ADC34 0x10c6 +#define MT6357_AUXADC_ADC35 0x10c8 +#define MT6357_AUXADC_ADC36 0x10ca +#define MT6357_AUXADC_ADC38 0x10cc +#define MT6357_AUXADC_ADC39 0x10ce +#define MT6357_AUXADC_ADC40 0x10d0 +#define MT6357_AUXADC_ADC41 0x10d2 +#define MT6357_AUXADC_ADC42 0x10d4 +#define MT6357_AUXADC_ADC43 0x10d6 +#define MT6357_AUXADC_ADC46 0x10d8 +#define MT6357_AUXADC_ADC47 0x10da +#define MT6357_AUXADC_DIG_1_ELR_NUM 0x10dc +#define MT6357_AUXADC_DIG_1_ELR0 0x10de +#define MT6357_AUXADC_DIG_1_ELR1 0x10e0 +#define MT6357_AUXADC_DIG_2_DSN_ID 0x1100 +#define MT6357_AUXADC_DIG_2_DSN_REV0 0x1102 +#define MT6357_AUXADC_DIG_2_DSN_DBI 0x1104 +#define MT6357_AUXADC_DIG_2_DSN_DXI 0x1106 +#define MT6357_AUXADC_STA0 0x1108 +#define MT6357_AUXADC_STA1 0x110a +#define MT6357_AUXADC_STA2 0x110c +#define MT6357_AUXADC_RQST0 0x110e +#define MT6357_AUXADC_RQST0_SET 0x1110 +#define MT6357_AUXADC_RQST0_CLR 0x1112 +#define MT6357_AUXADC_RQST2 0x1114 +#define MT6357_AUXADC_RQST2_SET 0x1116 +#define MT6357_AUXADC_RQST2_CLR 0x1118 +#define MT6357_AUXADC_RQST1 0x111a +#define MT6357_AUXADC_RQST1_SET 0x111c +#define MT6357_AUXADC_RQST1_CLR 0x111e +#define MT6357_AUXADC_CON0 0x1120 +#define MT6357_AUXADC_CON0_SET 0x1122 +#define MT6357_AUXADC_CON0_CLR 0x1124 +#define MT6357_AUXADC_CON1 0x1126 +#define MT6357_AUXADC_CON2 0x1128 +#define MT6357_AUXADC_CON3 0x112a +#define MT6357_AUXADC_CON4 0x112c +#define MT6357_AUXADC_CON5 0x112e +#define MT6357_AUXADC_CON6 0x1130 +#define MT6357_AUXADC_CON7 0x1132 +#define MT6357_AUXADC_CON8 0x1134 +#define MT6357_AUXADC_CON9 0x1136 +#define MT6357_AUXADC_CON10 0x1138 +#define MT6357_AUXADC_CON11 0x113a +#define MT6357_AUXADC_CON12 0x113c +#define MT6357_AUXADC_CON13 0x113e +#define MT6357_AUXADC_CON14 0x1140 +#define MT6357_AUXADC_CON15 0x1142 +#define MT6357_AUXADC_CON16 0x1144 +#define MT6357_AUXADC_CON17 0x1146 +#define MT6357_AUXADC_CON18 0x1148 +#define MT6357_AUXADC_CON19 0x114a +#define MT6357_AUXADC_CON20 0x114c +#define MT6357_AUXADC_DIG_3_DSN_ID 0x1180 +#define MT6357_AUXADC_DIG_3_DSN_REV0 0x1182 +#define MT6357_AUXADC_DIG_3_DSN_DBI 0x1184 +#define MT6357_AUXADC_DIG_3_DSN_DXI 0x1186 +#define MT6357_AUXADC_AUTORPT0 0x1188 +#define MT6357_AUXADC_LBAT0 0x118a +#define MT6357_AUXADC_LBAT1 0x118c +#define MT6357_AUXADC_LBAT2 0x118e +#define MT6357_AUXADC_LBAT3 0x1190 +#define MT6357_AUXADC_LBAT4 0x1192 +#define MT6357_AUXADC_LBAT5 0x1194 +#define MT6357_AUXADC_LBAT6 0x1196 +#define MT6357_AUXADC_ACCDET 0x1198 +#define MT6357_AUXADC_DBG0 0x119a +#define MT6357_AUXADC_IMP0 0x119c +#define MT6357_AUXADC_IMP1 0x119e +#define MT6357_AUXADC_DIG_3_ELR_NUM 0x11a0 +#define MT6357_AUXADC_DIG_3_ELR0 0x11a2 +#define MT6357_AUXADC_DIG_3_ELR1 0x11a4 +#define MT6357_AUXADC_DIG_3_ELR2 0x11a6 +#define MT6357_AUXADC_DIG_3_ELR3 0x11a8 +#define MT6357_AUXADC_DIG_3_ELR4 0x11aa +#define MT6357_AUXADC_DIG_3_ELR5 0x11ac +#define MT6357_AUXADC_DIG_3_ELR6 0x11ae +#define MT6357_AUXADC_DIG_3_ELR7 0x11b0 +#define MT6357_AUXADC_DIG_3_ELR8 0x11b2 +#define MT6357_AUXADC_DIG_3_ELR9 0x11b4 +#define MT6357_AUXADC_DIG_3_ELR10 0x11b6 +#define MT6357_AUXADC_DIG_3_ELR11 0x11b8 +#define MT6357_AUXADC_DIG_4_DSN_ID 0x1200 +#define MT6357_AUXADC_DIG_4_DSN_REV0 0x1202 +#define MT6357_AUXADC_DIG_4_DSN_DBI 0x1204 +#define MT6357_AUXADC_DIG_4_DSN_DXI 0x1206 +#define MT6357_AUXADC_MDRT_0 0x1208 +#define MT6357_AUXADC_MDRT_1 0x120a +#define MT6357_AUXADC_MDRT_2 0x120c +#define MT6357_AUXADC_MDRT_3 0x120e +#define MT6357_AUXADC_MDRT_4 0x1210 +#define MT6357_AUXADC_DCXO_MDRT_0 0x1212 +#define MT6357_AUXADC_DCXO_MDRT_1 0x1214 +#define MT6357_AUXADC_DCXO_MDRT_2 0x1216 +#define MT6357_AUXADC_NAG_0 0x1218 +#define MT6357_AUXADC_NAG_1 0x121a +#define MT6357_AUXADC_NAG_2 0x121c +#define MT6357_AUXADC_NAG_3 0x121e +#define MT6357_AUXADC_NAG_4 0x1220 +#define MT6357_AUXADC_NAG_5 0x1222 +#define MT6357_AUXADC_NAG_6 0x1224 +#define MT6357_AUXADC_NAG_7 0x1226 +#define MT6357_AUXADC_NAG_8 0x1228 +#define MT6357_AUXADC_RSV_1 0x122a +#define MT6357_AUXADC_ANA_0 0x122c +#define MT6357_AUXADC_IMP_CG0 0x122e +#define MT6357_AUXADC_LBAT_CG0 0x1230 +#define MT6357_AUXADC_NAG_CG0 0x1232 +#define MT6357_AUXADC_PRI_NEW 0x1234 +#define MT6357_AUXADC_CHR_TOP_CON2 0x1236 +#define MT6357_BUCK_TOP_DSN_ID 0x1400 +#define MT6357_BUCK_TOP_DSN_REV0 0x1402 +#define MT6357_BUCK_TOP_DBI 0x1404 +#define MT6357_BUCK_TOP_DXI 0x1406 +#define MT6357_BUCK_TOP_PAM0 0x1408 +#define MT6357_BUCK_TOP_PAM1 0x140a +#define MT6357_BUCK_TOP_CLK_CON0 0x140c +#define MT6357_BUCK_TOP_CLK_CON0_SET 0x140e +#define MT6357_BUCK_TOP_CLK_CON0_CLR 0x1410 +#define MT6357_BUCK_TOP_CLK_HWEN_CON0 0x1412 +#define MT6357_BUCK_TOP_CLK_HWEN_CON0_SET 0x1414 +#define MT6357_BUCK_TOP_CLK_HWEN_CON0_CLR 0x1416 +#define MT6357_BUCK_TOP_CLK_MISC_CON0 0x1418 +#define MT6357_BUCK_TOP_INT_CON0 0x141a +#define MT6357_BUCK_TOP_INT_CON0_SET 0x141c +#define MT6357_BUCK_TOP_INT_CON0_CLR 0x141e +#define MT6357_BUCK_TOP_INT_MASK_CON0 0x1420 +#define MT6357_BUCK_TOP_INT_MASK_CON0_SET 0x1422 +#define MT6357_BUCK_TOP_INT_MASK_CON0_CLR 0x1424 +#define MT6357_BUCK_TOP_INT_STATUS0 0x1426 +#define MT6357_BUCK_TOP_INT_RAW_STATUS0 0x1428 +#define MT6357_BUCK_TOP_STB_CON 0x142a +#define MT6357_BUCK_TOP_SLP_CON0 0x142c +#define MT6357_BUCK_TOP_SLP_CON1 0x142e +#define MT6357_BUCK_TOP_SLP_CON2 0x1430 +#define MT6357_BUCK_TOP_MINFREQ_CON 0x1432 +#define MT6357_BUCK_TOP_OC_CON0 0x1434 +#define MT6357_BUCK_TOP_K_CON0 0x1436 +#define MT6357_BUCK_TOP_K_CON1 0x1438 +#define MT6357_BUCK_TOP_K_CON2 0x143a +#define MT6357_BUCK_TOP_WDTDBG0 0x143c +#define MT6357_BUCK_TOP_WDTDBG1 0x143e +#define MT6357_BUCK_TOP_WDTDBG2 0x1440 +#define MT6357_BUCK_TOP_ELR_NUM 0x1442 +#define MT6357_BUCK_TOP_ELR0 0x1444 +#define MT6357_BUCK_TOP_ELR1 0x1446 +#define MT6357_BUCK_VPROC_DSN_ID 0x1480 +#define MT6357_BUCK_VPROC_DSN_REV0 0x1482 +#define MT6357_BUCK_VPROC_DSN_DBI 0x1484 +#define MT6357_BUCK_VPROC_DSN_DXI 0x1486 +#define MT6357_BUCK_VPROC_CON0 0x1488 +#define MT6357_BUCK_VPROC_CON1 0x148a +#define MT6357_BUCK_VPROC_CFG0 0x148c +#define MT6357_BUCK_VPROC_CFG1 0x148e +#define MT6357_BUCK_VPROC_OP_EN 0x1490 +#define MT6357_BUCK_VPROC_OP_EN_SET 0x1492 +#define MT6357_BUCK_VPROC_OP_EN_CLR 0x1494 +#define MT6357_BUCK_VPROC_OP_CFG 0x1496 +#define MT6357_BUCK_VPROC_OP_CFG_SET 0x1498 +#define MT6357_BUCK_VPROC_OP_CFG_CLR 0x149a +#define MT6357_BUCK_VPROC_SP_CON 0x149c +#define MT6357_BUCK_VPROC_SP_CFG 0x149e +#define MT6357_BUCK_VPROC_OC_CFG 0x14a0 +#define MT6357_BUCK_VPROC_DBG0 0x14a2 +#define MT6357_BUCK_VPROC_DBG1 0x14a4 +#define MT6357_BUCK_VPROC_DBG2 0x14a6 +#define MT6357_BUCK_VPROC_ELR_NUM 0x14a8 +#define MT6357_BUCK_VPROC_ELR0 0x14aa +#define MT6357_BUCK_VCORE_DSN_ID 0x1500 +#define MT6357_BUCK_VCORE_DSN_REV0 0x1502 +#define MT6357_BUCK_VCORE_DSN_DBI 0x1504 +#define MT6357_BUCK_VCORE_DSN_DXI 0x1506 +#define MT6357_BUCK_VCORE_CON0 0x1508 +#define MT6357_BUCK_VCORE_CON1 0x150a +#define MT6357_BUCK_VCORE_CFG0 0x150c +#define MT6357_BUCK_VCORE_CFG1 0x150e +#define MT6357_BUCK_VCORE_OP_EN 0x1510 +#define MT6357_BUCK_VCORE_OP_EN_SET 0x1512 +#define MT6357_BUCK_VCORE_OP_EN_CLR 0x1514 +#define MT6357_BUCK_VCORE_OP_CFG 0x1516 +#define MT6357_BUCK_VCORE_OP_CFG_SET 0x1518 +#define MT6357_BUCK_VCORE_OP_CFG_CLR 0x151a +#define MT6357_BUCK_VCORE_SP_CON 0x151c +#define MT6357_BUCK_VCORE_SP_CFG 0x151e +#define MT6357_BUCK_VCORE_OC_CFG 0x1520 +#define MT6357_BUCK_VCORE_DBG0 0x1522 +#define MT6357_BUCK_VCORE_DBG1 0x1524 +#define MT6357_BUCK_VCORE_DBG2 0x1526 +#define MT6357_BUCK_VCORE_ELR_NUM 0x1528 +#define MT6357_BUCK_VCORE_ELR0 0x152a +#define MT6357_BUCK_VMODEM_DSN_ID 0x1580 +#define MT6357_BUCK_VMODEM_DSN_REV0 0x1582 +#define MT6357_BUCK_VMODEM_DSN_DBI 0x1584 +#define MT6357_BUCK_VMODEM_DSN_DXI 0x1586 +#define MT6357_BUCK_VMODEM_CON0 0x1588 +#define MT6357_BUCK_VMODEM_CON1 0x158a +#define MT6357_BUCK_VMODEM_CFG0 0x158c +#define MT6357_BUCK_VMODEM_CFG1 0x158e +#define MT6357_BUCK_VMODEM_OP_EN 0x1590 +#define MT6357_BUCK_VMODEM_OP_EN_SET 0x1592 +#define MT6357_BUCK_VMODEM_OP_EN_CLR 0x1594 +#define MT6357_BUCK_VMODEM_OP_CFG 0x1596 +#define MT6357_BUCK_VMODEM_OP_CFG_SET 0x1598 +#define MT6357_BUCK_VMODEM_OP_CFG_CLR 0x159a +#define MT6357_BUCK_VMODEM_SP_CON 0x159c +#define MT6357_BUCK_VMODEM_SP_CFG 0x159e +#define MT6357_BUCK_VMODEM_OC_CFG 0x15a0 +#define MT6357_BUCK_VMODEM_DBG0 0x15a2 +#define MT6357_BUCK_VMODEM_DBG1 0x15a4 +#define MT6357_BUCK_VMODEM_DBG2 0x15a6 +#define MT6357_BUCK_VMODEM_ELR_NUM 0x15a8 +#define MT6357_BUCK_VMODEM_ELR0 0x15aa +#define MT6357_BUCK_VS1_DSN_ID 0x1600 +#define MT6357_BUCK_VS1_DSN_REV0 0x1602 +#define MT6357_BUCK_VS1_DSN_DBI 0x1604 +#define MT6357_BUCK_VS1_DSN_DXI 0x1606 +#define MT6357_BUCK_VS1_CON0 0x1608 +#define MT6357_BUCK_VS1_CON1 0x160a +#define MT6357_BUCK_VS1_CFG0 0x160c +#define MT6357_BUCK_VS1_CFG1 0x160e +#define MT6357_BUCK_VS1_OP_EN 0x1610 +#define MT6357_BUCK_VS1_OP_EN_SET 0x1612 +#define MT6357_BUCK_VS1_OP_EN_CLR 0x1614 +#define MT6357_BUCK_VS1_OP_CFG 0x1616 +#define MT6357_BUCK_VS1_OP_CFG_SET 0x1618 +#define MT6357_BUCK_VS1_OP_CFG_CLR 0x161a +#define MT6357_BUCK_VS1_SP_CON 0x161c +#define MT6357_BUCK_VS1_SP_CFG 0x161e +#define MT6357_BUCK_VS1_OC_CFG 0x1620 +#define MT6357_BUCK_VS1_DBG0 0x1622 +#define MT6357_BUCK_VS1_DBG1 0x1624 +#define MT6357_BUCK_VS1_DBG2 0x1626 +#define MT6357_BUCK_VS1_VOTER 0x1628 +#define MT6357_BUCK_VS1_VOTER_SET 0x162a +#define MT6357_BUCK_VS1_VOTER_CLR 0x162c +#define MT6357_BUCK_VS1_VOTER_CFG 0x162e +#define MT6357_BUCK_VS1_ELR_NUM 0x1630 +#define MT6357_BUCK_VS1_ELR0 0x1632 +#define MT6357_BUCK_VPA_DSN_ID 0x1680 +#define MT6357_BUCK_VPA_DSN_REV0 0x1682 +#define MT6357_BUCK_VPA_DSN_DBI 0x1684 +#define MT6357_BUCK_VPA_DSN_DXI 0x1686 +#define MT6357_BUCK_VPA_CON0 0x1688 +#define MT6357_BUCK_VPA_CON1 0x168a +#define MT6357_BUCK_VPA_CFG0 0x168c +#define MT6357_BUCK_VPA_CFG1 0x168e +#define MT6357_BUCK_VPA_OC_CFG 0x1690 +#define MT6357_BUCK_VPA_DBG0 0x1692 +#define MT6357_BUCK_VPA_DBG1 0x1694 +#define MT6357_BUCK_VPA_DBG2 0x1696 +#define MT6357_BUCK_VPA_DLC_CON0 0x1698 +#define MT6357_BUCK_VPA_DLC_CON1 0x169a +#define MT6357_BUCK_VPA_DLC_CON2 0x169c +#define MT6357_BUCK_VPA_MSFG_CON0 0x169e +#define MT6357_BUCK_VPA_MSFG_CON1 0x16a0 +#define MT6357_BUCK_VPA_MSFG_RRATE0 0x16a2 +#define MT6357_BUCK_VPA_MSFG_RRATE1 0x16a4 +#define MT6357_BUCK_VPA_MSFG_RRATE2 0x16a6 +#define MT6357_BUCK_VPA_MSFG_RTHD0 0x16a8 +#define MT6357_BUCK_VPA_MSFG_RTHD1 0x16aa +#define MT6357_BUCK_VPA_MSFG_RTHD2 0x16ac +#define MT6357_BUCK_VPA_MSFG_FRATE0 0x16ae +#define MT6357_BUCK_VPA_MSFG_FRATE1 0x16b0 +#define MT6357_BUCK_VPA_MSFG_FRATE2 0x16b2 +#define MT6357_BUCK_VPA_MSFG_FTHD0 0x16b4 +#define MT6357_BUCK_VPA_MSFG_FTHD1 0x16b6 +#define MT6357_BUCK_VPA_MSFG_FTHD2 0x16b8 +#define MT6357_BUCK_ANA_DSN_ID 0x1700 +#define MT6357_BUCK_ANA_DSN_REV0 0x1702 +#define MT6357_BUCK_ANA_DSN_DBI 0x1704 +#define MT6357_BUCK_ANA_DSN_FPI 0x1706 +#define MT6357_SMPS_ANA_CON0 0x1708 +#define MT6357_SMPS_ANA_CON1 0x170a +#define MT6357_SMPS_ANA_CON2 0x170c +#define MT6357_VCORE_VPROC_ANA_CON0 0x170e +#define MT6357_VCORE_VPROC_ANA_CON1 0x1710 +#define MT6357_VCORE_VPROC_ANA_CON2 0x1712 +#define MT6357_VCORE_VPROC_ANA_CON3 0x1714 +#define MT6357_VCORE_VPROC_ANA_CON4 0x1716 +#define MT6357_VCORE_VPROC_ANA_CON5 0x1718 +#define MT6357_VCORE_VPROC_ANA_CON6 0x171a +#define MT6357_VCORE_VPROC_ANA_CON7 0x171c +#define MT6357_VCORE_VPROC_ANA_CON8 0x171e +#define MT6357_VCORE_VPROC_ANA_CON9 0x1720 +#define MT6357_VCORE_VPROC_ANA_CON10 0x1722 +#define MT6357_VCORE_VPROC_ANA_CON11 0x1724 +#define MT6357_VMODEM_ANA_CON0 0x1726 +#define MT6357_VMODEM_ANA_CON1 0x1728 +#define MT6357_VMODEM_ANA_CON2 0x172a +#define MT6357_VMODEM_ANA_CON3 0x172c +#define MT6357_VMODEM_ANA_CON4 0x172e +#define MT6357_VMODEM_ANA_CON5 0x1730 +#define MT6357_VS1_ANA_CON0 0x1732 +#define MT6357_VS1_ANA_CON1 0x1734 +#define MT6357_VS1_ANA_CON2 0x1736 +#define MT6357_VS1_ANA_CON3 0x1738 +#define MT6357_VS1_ANA_CON4 0x173a +#define MT6357_VS1_ANA_CON5 0x173c +#define MT6357_VPA_ANA_CON0 0x173e +#define MT6357_VPA_ANA_CON1 0x1740 +#define MT6357_VPA_ANA_CON2 0x1742 +#define MT6357_VPA_ANA_CON3 0x1744 +#define MT6357_VPA_ANA_CON4 0x1746 +#define MT6357_VPA_ANA_CON5 0x1748 +#define MT6357_BUCK_ANA_ELR_NUM 0x174a +#define MT6357_SMPS_ELR_0 0x174c +#define MT6357_SMPS_ELR_1 0x174e +#define MT6357_SMPS_ELR_2 0x1750 +#define MT6357_SMPS_ELR_3 0x1752 +#define MT6357_SMPS_ELR_4 0x1754 +#define MT6357_SMPS_ELR_5 0x1756 +#define MT6357_VCORE_VPROC_ELR_0 0x1758 +#define MT6357_VCORE_VPROC_ELR_1 0x175a +#define MT6357_VCORE_VPROC_ELR_2 0x175c +#define MT6357_VCORE_VPROC_ELR_3 0x175e +#define MT6357_VCORE_VPROC_ELR_4 0x1760 +#define MT6357_VMODEM_ELR_0 0x1762 +#define MT6357_VMODEM_ELR_1 0x1764 +#define MT6357_VMODEM_ELR_2 0x1766 +#define MT6357_VS1_ELR_0 0x1768 +#define MT6357_VS1_ELR_1 0x176a +#define MT6357_VPA_ELR_0 0x176c +#define MT6357_LDO_TOP_ID 0x1880 +#define MT6357_LDO_TOP_REV0 0x1882 +#define MT6357_LDO_TOP_DBI 0x1884 +#define MT6357_LDO_TOP_DXI 0x1886 +#define MT6357_LDO_TPM0 0x1888 +#define MT6357_LDO_TPM1 0x188a +#define MT6357_LDO_TOP_CLK_DCM_CON0 0x188c +#define MT6357_LDO_TOP_CLK_VIO28_CON0 0x188e +#define MT6357_LDO_TOP_CLK_VIO18_CON0 0x1890 +#define MT6357_LDO_TOP_CLK_VAUD28_CON0 0x1892 +#define MT6357_LDO_TOP_CLK_VDRAM_CON0 0x1894 +#define MT6357_LDO_TOP_CLK_VSRAM_PROC_CON0 0x1896 +#define MT6357_LDO_TOP_CLK_VSRAM_OTHERS_CON0 0x1898 +#define MT6357_LDO_TOP_CLK_VAUX18_CON0 0x189a +#define MT6357_LDO_TOP_CLK_VUSB33_CON0 0x189c +#define MT6357_LDO_TOP_CLK_VEMC_CON0 0x189e +#define MT6357_LDO_TOP_CLK_VXO22_CON0 0x18a0 +#define MT6357_LDO_TOP_CLK_VSIM1_CON0 0x18a2 +#define MT6357_LDO_TOP_CLK_VSIM2_CON0 0x18a4 +#define MT6357_LDO_TOP_CLK_VCAMD_CON0 0x18a6 +#define MT6357_LDO_TOP_CLK_VCAMIO_CON0 0x18a8 +#define MT6357_LDO_TOP_CLK_VEFUSE_CON0 0x18aa +#define MT6357_LDO_TOP_CLK_VCN33_CON0 0x18ac +#define MT6357_LDO_TOP_CLK_VCN18_CON0 0x18ae +#define MT6357_LDO_TOP_CLK_VCN28_CON0 0x18b0 +#define MT6357_LDO_TOP_CLK_VIBR_CON0 0x18b2 +#define MT6357_LDO_TOP_CLK_VFE28_CON0 0x18b4 +#define MT6357_LDO_TOP_CLK_VMCH_CON0 0x18b6 +#define MT6357_LDO_TOP_CLK_VMC_CON0 0x18b8 +#define MT6357_LDO_TOP_CLK_VRF18_CON0 0x18ba +#define MT6357_LDO_TOP_CLK_VLDO28_CON0 0x18bc +#define MT6357_LDO_TOP_CLK_VRF12_CON0 0x18be +#define MT6357_LDO_TOP_CLK_VCAMA_CON0 0x18c0 +#define MT6357_LDO_TOP_CLK_TREF_CON0 0x18c2 +#define MT6357_LDO_TOP_INT_CON0 0x18c4 +#define MT6357_LDO_TOP_INT_CON0_SET 0x18c6 +#define MT6357_LDO_TOP_INT_CON0_CLR 0x18c8 +#define MT6357_LDO_TOP_INT_CON1 0x18ca +#define MT6357_LDO_TOP_INT_CON1_SET 0x18cc +#define MT6357_LDO_TOP_INT_CON1_CLR 0x18ce +#define MT6357_LDO_TOP_INT_MASK_CON0 0x18d0 +#define MT6357_LDO_TOP_INT_MASK_CON0_SET 0x18d2 +#define MT6357_LDO_TOP_INT_MASK_CON0_CLR 0x18d4 +#define MT6357_LDO_TOP_INT_MASK_CON1 0x18d6 +#define MT6357_LDO_TOP_INT_MASK_CON1_SET 0x18d8 +#define MT6357_LDO_TOP_INT_MASK_CON1_CLR 0x18da +#define MT6357_LDO_TOP_INT_STATUS0 0x18dc +#define MT6357_LDO_TOP_INT_STATUS1 0x18de +#define MT6357_LDO_TOP_INT_RAW_STATUS0 0x18e0 +#define MT6357_LDO_TOP_INT_RAW_STATUS1 0x18e2 +#define MT6357_LDO_TEST_CON0 0x18e4 +#define MT6357_LDO_TOP_WDT_CON0 0x18e6 +#define MT6357_LDO_TOP_RSV_CON0 0x18e8 +#define MT6357_LDO_TOP_RSV_CON1 0x18ea +#define MT6357_LDO_OCFB0 0x18ec +#define MT6357_LDO_LP_PROTECTION 0x18ee +#define MT6357_LDO_DUMMY_LOAD_GATED 0x18f0 +#define MT6357_LDO_GON0_DSN_ID 0x1900 +#define MT6357_LDO_GON0_DSN_REV0 0x1902 +#define MT6357_LDO_GON0_DSN_DBI 0x1904 +#define MT6357_LDO_GON0_DSN_DXI 0x1906 +#define MT6357_LDO_VXO22_CON0 0x1908 +#define MT6357_LDO_VXO22_OP_EN 0x190a +#define MT6357_LDO_VXO22_OP_EN_SET 0x190c +#define MT6357_LDO_VXO22_OP_EN_CLR 0x190e +#define MT6357_LDO_VXO22_OP_CFG 0x1910 +#define MT6357_LDO_VXO22_OP_CFG_SET 0x1912 +#define MT6357_LDO_VXO22_OP_CFG_CLR 0x1914 +#define MT6357_LDO_VXO22_CON1 0x1916 +#define MT6357_LDO_VXO22_CON2 0x1918 +#define MT6357_LDO_VXO22_CON3 0x191a +#define MT6357_LDO_VAUX18_CON0 0x191c +#define MT6357_LDO_VAUX18_OP_EN 0x191e +#define MT6357_LDO_VAUX18_OP_EN_SET 0x1920 +#define MT6357_LDO_VAUX18_OP_EN_CLR 0x1922 +#define MT6357_LDO_VAUX18_OP_CFG 0x1924 +#define MT6357_LDO_VAUX18_OP_CFG_SET 0x1926 +#define MT6357_LDO_VAUX18_OP_CFG_CLR 0x1928 +#define MT6357_LDO_VAUX18_CON1 0x192a +#define MT6357_LDO_VAUX18_CON2 0x192c +#define MT6357_LDO_VAUX18_CON3 0x192e +#define MT6357_LDO_VAUD28_CON0 0x1930 +#define MT6357_LDO_VAUD28_OP_EN 0x1932 +#define MT6357_LDO_VAUD28_OP_EN_SET 0x1934 +#define MT6357_LDO_VAUD28_OP_EN_CLR 0x1936 +#define MT6357_LDO_VAUD28_OP_CFG 0x1938 +#define MT6357_LDO_VAUD28_OP_CFG_SET 0x193a +#define MT6357_LDO_VAUD28_OP_CFG_CLR 0x193c +#define MT6357_LDO_VAUD28_CON1 0x193e +#define MT6357_LDO_VAUD28_CON2 0x1940 +#define MT6357_LDO_VAUD28_CON3 0x1942 +#define MT6357_LDO_VIO28_CON0 0x1944 +#define MT6357_LDO_VIO28_OP_EN 0x1946 +#define MT6357_LDO_VIO28_OP_EN_SET 0x1948 +#define MT6357_LDO_VIO28_OP_EN_CLR 0x194a +#define MT6357_LDO_VIO28_OP_CFG 0x194c +#define MT6357_LDO_VIO28_OP_CFG_SET 0x194e +#define MT6357_LDO_VIO28_OP_CFG_CLR 0x1950 +#define MT6357_LDO_VIO28_CON1 0x1952 +#define MT6357_LDO_VIO28_CON2 0x1954 +#define MT6357_LDO_VIO28_CON3 0x1956 +#define MT6357_LDO_VIO18_CON0 0x1958 +#define MT6357_LDO_VIO18_OP_EN 0x195a +#define MT6357_LDO_VIO18_OP_EN_SET 0x195c +#define MT6357_LDO_VIO18_OP_EN_CLR 0x195e +#define MT6357_LDO_VIO18_OP_CFG 0x1960 +#define MT6357_LDO_VIO18_OP_CFG_SET 0x1962 +#define MT6357_LDO_VIO18_OP_CFG_CLR 0x1964 +#define MT6357_LDO_VIO18_CON1 0x1966 +#define MT6357_LDO_VIO18_CON2 0x1968 +#define MT6357_LDO_VIO18_CON3 0x196a +#define MT6357_LDO_VDRAM_CON0 0x196c +#define MT6357_LDO_VDRAM_OP_EN 0x196e +#define MT6357_LDO_VDRAM_OP_EN_SET 0x1970 +#define MT6357_LDO_VDRAM_OP_EN_CLR 0x1972 +#define MT6357_LDO_VDRAM_OP_CFG 0x1974 +#define MT6357_LDO_VDRAM_OP_CFG_SET 0x1976 +#define MT6357_LDO_VDRAM_OP_CFG_CLR 0x1978 +#define MT6357_LDO_VDRAM_CON1 0x197a +#define MT6357_LDO_VDRAM_CON2 0x197c +#define MT6357_LDO_VDRAM_CON3 0x197e +#define MT6357_LDO_GON1_DSN_ID 0x1980 +#define MT6357_LDO_GON1_DSN_REV0 0x1982 +#define MT6357_LDO_GON1_DSN_DBI 0x1984 +#define MT6357_LDO_GON1_DSN_DXI 0x1986 +#define MT6357_LDO_VEMC_CON0 0x1988 +#define MT6357_LDO_VEMC_OP_EN 0x198a +#define MT6357_LDO_VEMC_OP_EN_SET 0x198c +#define MT6357_LDO_VEMC_OP_EN_CLR 0x198e +#define MT6357_LDO_VEMC_OP_CFG 0x1990 +#define MT6357_LDO_VEMC_OP_CFG_SET 0x1992 +#define MT6357_LDO_VEMC_OP_CFG_CLR 0x1994 +#define MT6357_LDO_VEMC_CON1 0x1996 +#define MT6357_LDO_VEMC_CON2 0x1998 +#define MT6357_LDO_VEMC_CON3 0x199a +#define MT6357_LDO_VUSB33_CON0_0 0x199c +#define MT6357_LDO_VUSB33_OP_EN 0x199e +#define MT6357_LDO_VUSB33_OP_EN_SET 0x19a0 +#define MT6357_LDO_VUSB33_OP_EN_CLR 0x19a2 +#define MT6357_LDO_VUSB33_OP_CFG 0x19a4 +#define MT6357_LDO_VUSB33_OP_CFG_SET 0x19a6 +#define MT6357_LDO_VUSB33_OP_CFG_CLR 0x19a8 +#define MT6357_LDO_VUSB33_CON0_1 0x19aa +#define MT6357_LDO_VUSB33_CON1 0x19ac +#define MT6357_LDO_VUSB33_CON2 0x19ae +#define MT6357_LDO_VUSB33_CON3 0x19b0 +#define MT6357_LDO_VSRAM_PROC_CON0 0x19b2 +#define MT6357_LDO_VSRAM_PROC_CON2 0x19b4 +#define MT6357_LDO_VSRAM_PROC_CFG0 0x19b6 +#define MT6357_LDO_VSRAM_PROC_CFG1 0x19b8 +#define MT6357_LDO_VSRAM_PROC_OP_EN 0x19ba +#define MT6357_LDO_VSRAM_PROC_OP_EN_SET 0x19bc +#define MT6357_LDO_VSRAM_PROC_OP_EN_CLR 0x19be +#define MT6357_LDO_VSRAM_PROC_OP_CFG 0x19c0 +#define MT6357_LDO_VSRAM_PROC_OP_CFG_SET 0x19c2 +#define MT6357_LDO_VSRAM_PROC_OP_CFG_CLR 0x19c4 +#define MT6357_LDO_VSRAM_PROC_CON3 0x19c6 +#define MT6357_LDO_VSRAM_PROC_CON4 0x19c8 +#define MT6357_LDO_VSRAM_PROC_CON5 0x19ca +#define MT6357_LDO_VSRAM_PROC_DBG0 0x19cc +#define MT6357_LDO_VSRAM_PROC_DBG1 0x19ce +#define MT6357_LDO_VSRAM_OTHERS_CON0 0x19d0 +#define MT6357_LDO_VSRAM_OTHERS_CON2 0x19d2 +#define MT6357_LDO_VSRAM_OTHERS_CFG0 0x19d4 +#define MT6357_LDO_VSRAM_OTHERS_CFG1 0x19d6 +#define MT6357_LDO_VSRAM_OTHERS_OP_EN 0x19d8 +#define MT6357_LDO_VSRAM_OTHERS_OP_EN_SET 0x19da +#define MT6357_LDO_VSRAM_OTHERS_OP_EN_CLR 0x19dc +#define MT6357_LDO_VSRAM_OTHERS_OP_CFG 0x19de +#define MT6357_LDO_VSRAM_OTHERS_OP_CFG_SET 0x19e0 +#define MT6357_LDO_VSRAM_OTHERS_OP_CFG_CLR 0x19e2 +#define MT6357_LDO_VSRAM_OTHERS_CON3 0x19e4 +#define MT6357_LDO_VSRAM_OTHERS_CON4 0x19e6 +#define MT6357_LDO_VSRAM_OTHERS_CON5 0x19e8 +#define MT6357_LDO_VSRAM_OTHERS_DBG0 0x19ea +#define MT6357_LDO_VSRAM_OTHERS_DBG1 0x19ec +#define MT6357_LDO_VSRAM_PROC_SP 0x19ee +#define MT6357_LDO_VSRAM_OTHERS_SP 0x19f0 +#define MT6357_LDO_VSRAM_PROC_R2R_PDN_DIS 0x19f2 +#define MT6357_LDO_VSRAM_OTHERS_R2R_PDN_DIS 0x19f4 +#define MT6357_LDO_VSRAM_WDT_DBG0 0x19f6 +#define MT6357_LDO_GON1_ELR_NUM 0x19f8 +#define MT6357_LDO_VSRAM_CON0 0x19fa +#define MT6357_LDO_VSRAM_CON1 0x19fc +#define MT6357_LDO_VSRAM_CON2 0x19fe +#define MT6357_LDO_GOFF0_DSN_ID 0x1a00 +#define MT6357_LDO_GOFF0_DSN_REV0 0x1a02 +#define MT6357_LDO_GOFF0_DSN_DBI 0x1a04 +#define MT6357_LDO_GOFF0_DSN_DXI 0x1a06 +#define MT6357_LDO_VFE28_CON0 0x1a08 +#define MT6357_LDO_VFE28_OP_EN 0x1a0a +#define MT6357_LDO_VFE28_OP_EN_SET 0x1a0c +#define MT6357_LDO_VFE28_OP_EN_CLR 0x1a0e +#define MT6357_LDO_VFE28_OP_CFG 0x1a10 +#define MT6357_LDO_VFE28_OP_CFG_SET 0x1a12 +#define MT6357_LDO_VFE28_OP_CFG_CLR 0x1a14 +#define MT6357_LDO_VFE28_CON1 0x1a16 +#define MT6357_LDO_VFE28_CON2 0x1a18 +#define MT6357_LDO_VFE28_CON3 0x1a1a +#define MT6357_LDO_VRF18_CON0 0x1a1c +#define MT6357_LDO_VRF18_OP_EN 0x1a1e +#define MT6357_LDO_VRF18_OP_EN_SET 0x1a20 +#define MT6357_LDO_VRF18_OP_EN_CLR 0x1a22 +#define MT6357_LDO_VRF18_OP_CFG 0x1a24 +#define MT6357_LDO_VRF18_OP_CFG_SET 0x1a26 +#define MT6357_LDO_VRF18_OP_CFG_CLR 0x1a28 +#define MT6357_LDO_VRF18_CON1 0x1a2a +#define MT6357_LDO_VRF18_CON2 0x1a2c +#define MT6357_LDO_VRF18_CON3 0x1a2e +#define MT6357_LDO_VRF12_CON0 0x1a30 +#define MT6357_LDO_VRF12_OP_EN 0x1a32 +#define MT6357_LDO_VRF12_OP_EN_SET 0x1a34 +#define MT6357_LDO_VRF12_OP_EN_CLR 0x1a36 +#define MT6357_LDO_VRF12_OP_CFG 0x1a38 +#define MT6357_LDO_VRF12_OP_CFG_SET 0x1a3a +#define MT6357_LDO_VRF12_OP_CFG_CLR 0x1a3c +#define MT6357_LDO_VRF12_CON1 0x1a3e +#define MT6357_LDO_VRF12_CON2 0x1a40 +#define MT6357_LDO_VRF12_CON3 0x1a42 +#define MT6357_LDO_VEFUSE_CON0 0x1a44 +#define MT6357_LDO_VEFUSE_OP_EN 0x1a46 +#define MT6357_LDO_VEFUSE_OP_EN_SET 0x1a48 +#define MT6357_LDO_VEFUSE_OP_EN_CLR 0x1a4a +#define MT6357_LDO_VEFUSE_OP_CFG 0x1a4c +#define MT6357_LDO_VEFUSE_OP_CFG_SET 0x1a4e +#define MT6357_LDO_VEFUSE_OP_CFG_CLR 0x1a50 +#define MT6357_LDO_VEFUSE_CON1 0x1a52 +#define MT6357_LDO_VEFUSE_CON2 0x1a54 +#define MT6357_LDO_VEFUSE_CON3 0x1a56 +#define MT6357_LDO_VCN18_CON0 0x1a58 +#define MT6357_LDO_VCN18_OP_EN 0x1a5a +#define MT6357_LDO_VCN18_OP_EN_SET 0x1a5c +#define MT6357_LDO_VCN18_OP_EN_CLR 0x1a5e +#define MT6357_LDO_VCN18_OP_CFG 0x1a60 +#define MT6357_LDO_VCN18_OP_CFG_SET 0x1a62 +#define MT6357_LDO_VCN18_OP_CFG_CLR 0x1a64 +#define MT6357_LDO_VCN18_CON1 0x1a66 +#define MT6357_LDO_VCN18_CON2 0x1a68 +#define MT6357_LDO_VCN18_CON3 0x1a6a +#define MT6357_LDO_VCAMA_CON0 0x1a6c +#define MT6357_LDO_VCAMA_OP_EN 0x1a6e +#define MT6357_LDO_VCAMA_OP_EN_SET 0x1a70 +#define MT6357_LDO_VCAMA_OP_EN_CLR 0x1a72 +#define MT6357_LDO_VCAMA_OP_CFG 0x1a74 +#define MT6357_LDO_VCAMA_OP_CFG_SET 0x1a76 +#define MT6357_LDO_VCAMA_OP_CFG_CLR 0x1a78 +#define MT6357_LDO_VCAMA_CON1 0x1a7a +#define MT6357_LDO_VCAMA_CON2 0x1a7c +#define MT6357_LDO_VCAMA_CON3 0x1a7e +#define MT6357_LDO_GOFF1_DSN_ID 0x1a80 +#define MT6357_LDO_GOFF1_DSN_REV0 0x1a82 +#define MT6357_LDO_GOFF1_DSN_DBI 0x1a84 +#define MT6357_LDO_GOFF1_DSN_DXI 0x1a86 +#define MT6357_LDO_VCAMD_CON0 0x1a88 +#define MT6357_LDO_VCAMD_OP_EN 0x1a8a +#define MT6357_LDO_VCAMD_OP_EN_SET 0x1a8c +#define MT6357_LDO_VCAMD_OP_EN_CLR 0x1a8e +#define MT6357_LDO_VCAMD_OP_CFG 0x1a90 +#define MT6357_LDO_VCAMD_OP_CFG_SET 0x1a92 +#define MT6357_LDO_VCAMD_OP_CFG_CLR 0x1a94 +#define MT6357_LDO_VCAMD_CON1 0x1a96 +#define MT6357_LDO_VCAMD_CON2 0x1a98 +#define MT6357_LDO_VCAMD_CON3 0x1a9a +#define MT6357_LDO_VCAMIO_CON0 0x1a9c +#define MT6357_LDO_VCAMIO_OP_EN 0x1a9e +#define MT6357_LDO_VCAMIO_OP_EN_SET 0x1aa0 +#define MT6357_LDO_VCAMIO_OP_EN_CLR 0x1aa2 +#define MT6357_LDO_VCAMIO_OP_CFG 0x1aa4 +#define MT6357_LDO_VCAMIO_OP_CFG_SET 0x1aa6 +#define MT6357_LDO_VCAMIO_OP_CFG_CLR 0x1aa8 +#define MT6357_LDO_VCAMIO_CON1 0x1aaa +#define MT6357_LDO_VCAMIO_CON2 0x1aac +#define MT6357_LDO_VCAMIO_CON3 0x1aae +#define MT6357_LDO_VMC_CON0 0x1ab0 +#define MT6357_LDO_VMC_OP_EN 0x1ab2 +#define MT6357_LDO_VMC_OP_EN_SET 0x1ab4 +#define MT6357_LDO_VMC_OP_EN_CLR 0x1ab6 +#define MT6357_LDO_VMC_OP_CFG 0x1ab8 +#define MT6357_LDO_VMC_OP_CFG_SET 0x1aba +#define MT6357_LDO_VMC_OP_CFG_CLR 0x1abc +#define MT6357_LDO_VMC_CON1 0x1abe +#define MT6357_LDO_VMC_CON2 0x1ac0 +#define MT6357_LDO_VMC_CON3 0x1ac2 +#define MT6357_LDO_VMCH_CON0 0x1ac4 +#define MT6357_LDO_VMCH_OP_EN 0x1ac6 +#define MT6357_LDO_VMCH_OP_EN_SET 0x1ac8 +#define MT6357_LDO_VMCH_OP_EN_CLR 0x1aca +#define MT6357_LDO_VMCH_OP_CFG 0x1acc +#define MT6357_LDO_VMCH_OP_CFG_SET 0x1ace +#define MT6357_LDO_VMCH_OP_CFG_CLR 0x1ad0 +#define MT6357_LDO_VMCH_CON1 0x1ad2 +#define MT6357_LDO_VMCH_CON2 0x1ad4 +#define MT6357_LDO_VMCH_CON3 0x1ad6 +#define MT6357_LDO_VSIM1_CON0 0x1ad8 +#define MT6357_LDO_VSIM1_OP_EN 0x1ada +#define MT6357_LDO_VSIM1_OP_EN_SET 0x1adc +#define MT6357_LDO_VSIM1_OP_EN_CLR 0x1ade +#define MT6357_LDO_VSIM1_OP_CFG 0x1ae0 +#define MT6357_LDO_VSIM1_OP_CFG_SET 0x1ae2 +#define MT6357_LDO_VSIM1_OP_CFG_CLR 0x1ae4 +#define MT6357_LDO_VSIM1_CON1 0x1ae6 +#define MT6357_LDO_VSIM1_CON2 0x1ae8 +#define MT6357_LDO_VSIM1_CON3 0x1aea +#define MT6357_LDO_VSIM2_CON0 0x1aec +#define MT6357_LDO_VSIM2_OP_EN 0x1aee +#define MT6357_LDO_VSIM2_OP_EN_SET 0x1af0 +#define MT6357_LDO_VSIM2_OP_EN_CLR 0x1af2 +#define MT6357_LDO_VSIM2_OP_CFG 0x1af4 +#define MT6357_LDO_VSIM2_OP_CFG_SET 0x1af6 +#define MT6357_LDO_VSIM2_OP_CFG_CLR 0x1af8 +#define MT6357_LDO_VSIM2_CON1 0x1afa +#define MT6357_LDO_VSIM2_CON2 0x1afc +#define MT6357_LDO_VSIM2_CON3 0x1afe +#define MT6357_LDO_GOFF2_DSN_ID 0x1b00 +#define MT6357_LDO_GOFF2_DSN_REV0 0x1b02 +#define MT6357_LDO_GOFF2_DSN_DBI 0x1b04 +#define MT6357_LDO_GOFF2_DSN_DXI 0x1b06 +#define MT6357_LDO_VIBR_CON0 0x1b08 +#define MT6357_LDO_VIBR_OP_EN 0x1b0a +#define MT6357_LDO_VIBR_OP_EN_SET 0x1b0c +#define MT6357_LDO_VIBR_OP_EN_CLR 0x1b0e +#define MT6357_LDO_VIBR_OP_CFG 0x1b10 +#define MT6357_LDO_VIBR_OP_CFG_SET 0x1b12 +#define MT6357_LDO_VIBR_OP_CFG_CLR 0x1b14 +#define MT6357_LDO_VIBR_CON1 0x1b16 +#define MT6357_LDO_VIBR_CON2 0x1b18 +#define MT6357_LDO_VIBR_CON3 0x1b1a +#define MT6357_LDO_VCN33_CON0_0 0x1b1c +#define MT6357_LDO_VCN33_OP_EN 0x1b1e +#define MT6357_LDO_VCN33_OP_EN_SET 0x1b20 +#define MT6357_LDO_VCN33_OP_EN_CLR 0x1b22 +#define MT6357_LDO_VCN33_OP_CFG 0x1b24 +#define MT6357_LDO_VCN33_OP_CFG_SET 0x1b26 +#define MT6357_LDO_VCN33_OP_CFG_CLR 0x1b28 +#define MT6357_LDO_VCN33_CON0_1 0x1b2a +#define MT6357_LDO_VCN33_CON1 0x1b2c +#define MT6357_LDO_VCN33_CON2 0x1b2e +#define MT6357_LDO_VCN33_CON3 0x1b30 +#define MT6357_LDO_VLDO28_CON0_0 0x1b32 +#define MT6357_LDO_VLDO28_OP_EN 0x1b34 +#define MT6357_LDO_VLDO28_OP_EN_SET 0x1b36 +#define MT6357_LDO_VLDO28_OP_EN_CLR 0x1b38 +#define MT6357_LDO_VLDO28_OP_CFG 0x1b3a +#define MT6357_LDO_VLDO28_OP_CFG_SET 0x1b3c +#define MT6357_LDO_VLDO28_OP_CFG_CLR 0x1b3e +#define MT6357_LDO_VLDO28_CON0_1 0x1b40 +#define MT6357_LDO_VLDO28_CON1 0x1b42 +#define MT6357_LDO_VLDO28_CON2 0x1b44 +#define MT6357_LDO_VLDO28_CON3 0x1b46 +#define MT6357_LDO_GOFF2_RSV_CON0 0x1b48 +#define MT6357_LDO_GOFF2_RSV_CON1 0x1b4a +#define MT6357_LDO_GOFF3_DSN_ID 0x1b80 +#define MT6357_LDO_GOFF3_DSN_REV0 0x1b82 +#define MT6357_LDO_GOFF3_DSN_DBI 0x1b84 +#define MT6357_LDO_GOFF3_DSN_DXI 0x1b86 +#define MT6357_LDO_VCN28_CON0 0x1b88 +#define MT6357_LDO_VCN28_OP_EN 0x1b8a +#define MT6357_LDO_VCN28_OP_EN_SET 0x1b8c +#define MT6357_LDO_VCN28_OP_EN_CLR 0x1b8e +#define MT6357_LDO_VCN28_OP_CFG 0x1b90 +#define MT6357_LDO_VCN28_OP_CFG_SET 0x1b92 +#define MT6357_LDO_VCN28_OP_CFG_CLR 0x1b94 +#define MT6357_LDO_VCN28_CON1 0x1b96 +#define MT6357_LDO_VCN28_CON2 0x1b98 +#define MT6357_LDO_VCN28_CON3 0x1b9a +#define MT6357_VRTC_CON0 0x1b9c +#define MT6357_LDO_TREF_CON0 0x1b9e +#define MT6357_LDO_TREF_OP_EN 0x1ba0 +#define MT6357_LDO_TREF_OP_EN_SET 0x1ba2 +#define MT6357_LDO_TREF_OP_EN_CLR 0x1ba4 +#define MT6357_LDO_TREF_OP_CFG 0x1ba6 +#define MT6357_LDO_TREF_OP_CFG_SET 0x1ba8 +#define MT6357_LDO_TREF_OP_CFG_CLR 0x1baa +#define MT6357_LDO_TREF_CON1 0x1bac +#define MT6357_LDO_GOFF3_RSV_CON0 0x1bae +#define MT6357_LDO_GOFF3_RSV_CON1 0x1bb0 +#define MT6357_LDO_ANA0_DSN_ID 0x1c00 +#define MT6357_LDO_ANA0_DSN_REV0 0x1c02 +#define MT6357_LDO_ANA0_DSN_DBI 0x1c04 +#define MT6357_LDO_ANA0_DSN_DXI 0x1c06 +#define MT6357_VFE28_ANA_CON0 0x1c08 +#define MT6357_VFE28_ANA_CON1 0x1c0a +#define MT6357_VCN28_ANA_CON0 0x1c0c +#define MT6357_VCN28_ANA_CON1 0x1c0e +#define MT6357_VAUD28_ANA_CON0 0x1c10 +#define MT6357_VAUD28_ANA_CON1 0x1c12 +#define MT6357_VAUX18_ANA_CON0 0x1c14 +#define MT6357_VAUX18_ANA_CON1 0x1c16 +#define MT6357_VXO22_ANA_CON0 0x1c18 +#define MT6357_VXO22_ANA_CON1 0x1c1a +#define MT6357_VCN33_ANA_CON0 0x1c1c +#define MT6357_VCN33_ANA_CON1 0x1c1e +#define MT6357_VEMC_ANA_CON0 0x1c20 +#define MT6357_VEMC_ANA_CON1 0x1c22 +#define MT6357_VLDO28_ANA_CON0 0x1c24 +#define MT6357_VLDO28_ANA_CON1 0x1c26 +#define MT6357_VIO28_ANA_CON0 0x1c28 +#define MT6357_VIO28_ANA_CON1 0x1c2a +#define MT6357_VIBR_ANA_CON0 0x1c2c +#define MT6357_VIBR_ANA_CON1 0x1c2e +#define MT6357_VSIM1_ANA_CON0 0x1c30 +#define MT6357_VSIM1_ANA_CON1 0x1c32 +#define MT6357_VSIM2_ANA_CON0 0x1c34 +#define MT6357_VSIM2_ANA_CON1 0x1c36 +#define MT6357_VMCH_ANA_CON0 0x1c38 +#define MT6357_VMCH_ANA_CON1 0x1c3a +#define MT6357_VMC_ANA_CON0 0x1c3c +#define MT6357_VMC_ANA_CON1 0x1c3e +#define MT6357_VCAMIO_ANA_CON0 0x1c40 +#define MT6357_VCAMIO_ANA_CON1 0x1c42 +#define MT6357_VCN18_ANA_CON0 0x1c44 +#define MT6357_VCN18_ANA_CON1 0x1c46 +#define MT6357_VRF18_ANA_CON0 0x1c48 +#define MT6357_VRF18_ANA_CON1 0x1c4a +#define MT6357_VIO18_ANA_CON0 0x1c4c +#define MT6357_VIO18_ANA_CON1 0x1c4e +#define MT6357_VDRAM_ANA_CON1 0x1c50 +#define MT6357_VRF12_ANA_CON0 0x1c52 +#define MT6357_VRF12_ANA_CON1 0x1c54 +#define MT6357_VSRAM_PROC_ANA_CON0 0x1c56 +#define MT6357_VSRAM_OTHERS_ANA_CON0 0x1c58 +#define MT6357_LDO_ANA0_ELR_NUM 0x1c5a +#define MT6357_VFE28_ELR_0 0x1c5c +#define MT6357_VCN28_ELR_0 0x1c5e +#define MT6357_VAUD28_ELR_0 0x1c60 +#define MT6357_VAUX18_ELR_0 0x1c62 +#define MT6357_VXO22_ELR_0 0x1c64 +#define MT6357_VCN33_ELR_0 0x1c66 +#define MT6357_VEMC_ELR_0 0x1c68 +#define MT6357_VLDO28_ELR_0 0x1c6a +#define MT6357_VIO28_ELR_0 0x1c6c +#define MT6357_VIBR_ELR_0 0x1c6e +#define MT6357_VSIM1_ELR_0 0x1c70 +#define MT6357_VSIM2_ELR_0 0x1c72 +#define MT6357_VMCH_ELR_0 0x1c74 +#define MT6357_VMC_ELR_0 0x1c76 +#define MT6357_VCAMIO_ELR_0 0x1c78 +#define MT6357_VCN18_ELR_0 0x1c7a +#define MT6357_VRF18_ELR_0 0x1c7c +#define MT6357_LDO_ANA1_DSN_ID 0x1c80 +#define MT6357_LDO_ANA1_DSN_REV0 0x1c82 +#define MT6357_LDO_ANA1_DSN_DBI 0x1c84 +#define MT6357_LDO_ANA1_DSN_DXI 0x1c86 +#define MT6357_VUSB33_ANA_CON0 0x1c88 +#define MT6357_VUSB33_ANA_CON1 0x1c8a +#define MT6357_VCAMA_ANA_CON0 0x1c8c +#define MT6357_VCAMA_ANA_CON1 0x1c8e +#define MT6357_VEFUSE_ANA_CON0 0x1c90 +#define MT6357_VEFUSE_ANA_CON1 0x1c92 +#define MT6357_VCAMD_ANA_CON0 0x1c94 +#define MT6357_VCAMD_ANA_CON1 0x1c96 +#define MT6357_LDO_ANA1_ELR_NUM 0x1c98 +#define MT6357_VUSB33_ELR_0 0x1c9a +#define MT6357_VCAMA_ELR_0 0x1c9c +#define MT6357_VEFUSE_ELR_0 0x1c9e +#define MT6357_VCAMD_ELR_0 0x1ca0 +#define MT6357_VIO18_ELR_0 0x1ca2 +#define MT6357_VDRAM_ELR_0 0x1ca4 +#define MT6357_VRF12_ELR_0 0x1ca6 +#define MT6357_VRTC_ELR_0 0x1ca8 +#define MT6357_VDRAM_ELR_1 0x1caa +#define MT6357_VDRAM_ELR_2 0x1cac +#define MT6357_XPP_TOP_ID 0x1e00 +#define MT6357_XPP_TOP_REV0 0x1e02 +#define MT6357_XPP_TOP_DBI 0x1e04 +#define MT6357_XPP_TOP_DXI 0x1e06 +#define MT6357_XPP_TPM0 0x1e08 +#define MT6357_XPP_TPM1 0x1e0a +#define MT6357_XPP_TOP_TEST_OUT 0x1e0c +#define MT6357_XPP_TOP_TEST_CON0 0x1e0e +#define MT6357_XPP_TOP_CKPDN_CON0 0x1e10 +#define MT6357_XPP_TOP_CKPDN_CON0_SET 0x1e12 +#define MT6357_XPP_TOP_CKPDN_CON0_CLR 0x1e14 +#define MT6357_XPP_TOP_CKSEL_CON0 0x1e16 +#define MT6357_XPP_TOP_CKSEL_CON0_SET 0x1e18 +#define MT6357_XPP_TOP_CKSEL_CON0_CLR 0x1e1a +#define MT6357_XPP_TOP_RST_CON0 0x1e1c +#define MT6357_XPP_TOP_RST_CON0_SET 0x1e1e +#define MT6357_XPP_TOP_RST_CON0_CLR 0x1e20 +#define MT6357_XPP_TOP_RST_BANK_CON0 0x1e22 +#define MT6357_XPP_TOP_RST_BANK_CON0_SET 0x1e24 +#define MT6357_XPP_TOP_RST_BANK_CON0_CLR 0x1e26 +#define MT6357_DRIVER_BL_DSN_ID 0x1e80 +#define MT6357_DRIVER_BL_DSN_REV0 0x1e82 +#define MT6357_DRIVER_BL_DSN_DBI 0x1e84 +#define MT6357_DRIVER_BL_DSN_DXI 0x1e86 +#define MT6357_ISINK1_CON0 0x1e88 +#define MT6357_ISINK1_CON1 0x1e8a +#define MT6357_ISINK1_CON2 0x1e8c +#define MT6357_ISINK1_CON3 0x1e8e +#define MT6357_ISINK_ANA1 0x1e90 +#define MT6357_ISINK_PHASE_DLY 0x1e92 +#define MT6357_ISINK_SFSTR 0x1e94 +#define MT6357_ISINK_EN_CTRL 0x1e96 +#define MT6357_ISINK_MODE_CTRL 0x1e98 +#define MT6357_DRIVER_ANA_CON0 0x1e9a +#define MT6357_ISINK_ANA_CON0 0x1e9c +#define MT6357_ISINK_ANA_CON1 0x1e9e +#define MT6357_DRIVER_BL_ELR_NUM 0x1ea0 +#define MT6357_DRIVER_BL_ELR_0 0x1ea2 +#define MT6357_DRIVER_CI_DSN_ID 0x1f00 +#define MT6357_DRIVER_CI_DSN_REV0 0x1f02 +#define MT6357_DRIVER_CI_DSN_DBI 0x1f04 +#define MT6357_DRIVER_CI_DSN_DXI 0x1f06 +#define MT6357_CHRIND_CON0 0x1f08 +#define MT6357_CHRIND_CON1 0x1f0a +#define MT6357_CHRIND_CON2 0x1f0c +#define MT6357_CHRIND_CON3 0x1f0e +#define MT6357_CHRIND_CON4 0x1f10 +#define MT6357_CHRIND_EN_CTRL 0x1f12 +#define MT6357_CHRIND_ANA_CON0 0x1f14 +#define MT6357_DRIVER_DL_DSN_ID 0x1f80 +#define MT6357_DRIVER_DL_DSN_REV0 0x1f82 +#define MT6357_DRIVER_DL_DSN_DBI 0x1f84 +#define MT6357_DRIVER_DL_DSN_DXI 0x1f86 +#define MT6357_ISINK2_CON0 0x1f88 +#define MT6357_ISINK3_CON0 0x1f8a +#define MT6357_ISINK_EN_CTRL_SMPL 0x1f8c +#define MT6357_AUD_TOP_ID 0x2080 +#define MT6357_AUD_TOP_REV0 0x2082 +#define MT6357_AUD_TOP_DBI 0x2084 +#define MT6357_AUD_TOP_DXI 0x2086 +#define MT6357_AUD_TOP_CKPDN_TPM0 0x2088 +#define MT6357_AUD_TOP_CKPDN_TPM1 0x208a +#define MT6357_AUD_TOP_CKPDN_CON0 0x208c +#define MT6357_AUD_TOP_CKPDN_CON0_SET 0x208e +#define MT6357_AUD_TOP_CKPDN_CON0_CLR 0x2090 +#define MT6357_AUD_TOP_CKSEL_CON0 0x2092 +#define MT6357_AUD_TOP_CKSEL_CON0_SET 0x2094 +#define MT6357_AUD_TOP_CKSEL_CON0_CLR 0x2096 +#define MT6357_AUD_TOP_CKTST_CON0 0x2098 +#define MT6357_AUD_TOP_RST_CON0 0x209a +#define MT6357_AUD_TOP_RST_CON0_SET 0x209c +#define MT6357_AUD_TOP_RST_CON0_CLR 0x209e +#define MT6357_AUD_TOP_RST_BANK_CON0 0x20a0 +#define MT6357_AUD_TOP_INT_CON0 0x20a2 +#define MT6357_AUD_TOP_INT_CON0_SET 0x20a4 +#define MT6357_AUD_TOP_INT_CON0_CLR 0x20a6 +#define MT6357_AUD_TOP_INT_MASK_CON0 0x20a8 +#define MT6357_AUD_TOP_INT_MASK_CON0_SET 0x20aa +#define MT6357_AUD_TOP_INT_MASK_CON0_CLR 0x20ac +#define MT6357_AUD_TOP_INT_STATUS0 0x20ae +#define MT6357_AUD_TOP_INT_RAW_STATUS0 0x20b0 +#define MT6357_AUD_TOP_INT_MISC_CON0 0x20b2 +#define MT6357_AUDNCP_CLKDIV_CON0 0x20b4 +#define MT6357_AUDNCP_CLKDIV_CON1 0x20b6 +#define MT6357_AUDNCP_CLKDIV_CON2 0x20b8 +#define MT6357_AUDNCP_CLKDIV_CON3 0x20ba +#define MT6357_AUDNCP_CLKDIV_CON4 0x20bc +#define MT6357_AUD_TOP_MON_CON0 0x20be +#define MT6357_AUDIO_DIG_DSN_ID 0x2100 +#define MT6357_AUDIO_DIG_DSN_REV0 0x2102 +#define MT6357_AUDIO_DIG_DSN_DBI 0x2104 +#define MT6357_AUDIO_DIG_DSN_DXI 0x2106 +#define MT6357_AFE_UL_DL_CON0 0x2108 +#define MT6357_AFE_DL_SRC2_CON0_L 0x210a +#define MT6357_AFE_UL_SRC_CON0_H 0x210c +#define MT6357_AFE_UL_SRC_CON0_L 0x210e +#define MT6357_AFE_TOP_CON0 0x2110 +#define MT6357_AUDIO_TOP_CON0 0x2112 +#define MT6357_AFE_MON_DEBUG0 0x2114 +#define MT6357_AFUNC_AUD_CON0 0x2116 +#define MT6357_AFUNC_AUD_CON1 0x2118 +#define MT6357_AFUNC_AUD_CON2 0x211a +#define MT6357_AFUNC_AUD_CON3 0x211c +#define MT6357_AFUNC_AUD_CON4 0x211e +#define MT6357_AFUNC_AUD_CON5 0x2120 +#define MT6357_AFUNC_AUD_CON6 0x2122 +#define MT6357_AFUNC_AUD_MON0 0x2124 +#define MT6357_AUDRC_TUNE_MON0 0x2126 +#define MT6357_AFE_ADDA_MTKAIF_FIFO_CFG0 0x2128 +#define MT6357_AFE_ADDA_MTKAIF_FIFO_LOG_MON1 0x212a +#define MT6357_AFE_ADDA_MTKAIF_MON0 0x212c +#define MT6357_AFE_ADDA_MTKAIF_MON1 0x212e +#define MT6357_AFE_ADDA_MTKAIF_MON2 0x2130 +#define MT6357_AFE_ADDA_MTKAIF_MON3 0x2132 +#define MT6357_AFE_ADDA_MTKAIF_CFG0 0x2134 +#define MT6357_AFE_ADDA_MTKAIF_RX_CFG0 0x2136 +#define MT6357_AFE_ADDA_MTKAIF_RX_CFG1 0x2138 +#define MT6357_AFE_ADDA_MTKAIF_RX_CFG2 0x213a +#define MT6357_AFE_ADDA_MTKAIF_RX_CFG3 0x213c +#define MT6357_AFE_ADDA_MTKAIF_TX_CFG1 0x213e +#define MT6357_AFE_SGEN_CFG0 0x2140 +#define MT6357_AFE_SGEN_CFG1 0x2142 +#define MT6357_AFE_ADC_ASYNC_FIFO_CFG 0x2144 +#define MT6357_AFE_DCCLK_CFG0 0x2146 +#define MT6357_AFE_DCCLK_CFG1 0x2148 +#define MT6357_AUDIO_DIG_CFG 0x214a +#define MT6357_AFE_AUD_PAD_TOP 0x214c +#define MT6357_AFE_AUD_PAD_TOP_MON 0x214e +#define MT6357_AFE_AUD_PAD_TOP_MON1 0x2150 +#define MT6357_AUDENC_DSN_ID 0x2180 +#define MT6357_AUDENC_DSN_REV0 0x2182 +#define MT6357_AUDENC_DSN_DBI 0x2184 +#define MT6357_AUDENC_DSN_FPI 0x2186 +#define MT6357_AUDENC_ANA_CON0 0x2188 +#define MT6357_AUDENC_ANA_CON1 0x218a +#define MT6357_AUDENC_ANA_CON2 0x218c +#define MT6357_AUDENC_ANA_CON3 0x218e +#define MT6357_AUDENC_ANA_CON4 0x2190 +#define MT6357_AUDENC_ANA_CON5 0x2192 +#define MT6357_AUDENC_ANA_CON6 0x2194 +#define MT6357_AUDENC_ANA_CON7 0x2196 +#define MT6357_AUDENC_ANA_CON8 0x2198 +#define MT6357_AUDENC_ANA_CON9 0x219a +#define MT6357_AUDENC_ANA_CON10 0x219c +#define MT6357_AUDENC_ANA_CON11 0x219e +#define MT6357_AUDDEC_DSN_ID 0x2200 +#define MT6357_AUDDEC_DSN_REV0 0x2202 +#define MT6357_AUDDEC_DSN_DBI 0x2204 +#define MT6357_AUDDEC_DSN_FPI 0x2206 +#define MT6357_AUDDEC_ANA_CON0 0x2208 +#define MT6357_AUDDEC_ANA_CON1 0x220a +#define MT6357_AUDDEC_ANA_CON2 0x220c +#define MT6357_AUDDEC_ANA_CON3 0x220e +#define MT6357_AUDDEC_ANA_CON4 0x2210 +#define MT6357_AUDDEC_ANA_CON5 0x2212 +#define MT6357_AUDDEC_ANA_CON6 0x2214 +#define MT6357_AUDDEC_ANA_CON7 0x2216 +#define MT6357_AUDDEC_ANA_CON8 0x2218 +#define MT6357_AUDDEC_ANA_CON9 0x221a +#define MT6357_AUDDEC_ANA_CON10 0x221c +#define MT6357_AUDDEC_ANA_CON11 0x221e +#define MT6357_AUDDEC_ANA_CON12 0x2220 +#define MT6357_AUDDEC_ANA_CON13 0x2222 +#define MT6357_AUDDEC_ELR_NUM 0x2224 +#define MT6357_AUDDEC_ELR_0 0x2226 +#define MT6357_AUDZCD_DSN_ID 0x2280 +#define MT6357_AUDZCD_DSN_REV0 0x2282 +#define MT6357_AUDZCD_DSN_DBI 0x2284 +#define MT6357_AUDZCD_DSN_FPI 0x2286 +#define MT6357_ZCD_CON0 0x2288 +#define MT6357_ZCD_CON1 0x228a +#define MT6357_ZCD_CON2 0x228c +#define MT6357_ZCD_CON3 0x228e +#define MT6357_ZCD_CON4 0x2290 +#define MT6357_ZCD_CON5 0x2292 +#define MT6357_ACCDET_DSN_DIG_ID 0x2300 +#define MT6357_ACCDET_DSN_DIG_REV0 0x2302 +#define MT6357_ACCDET_DSN_DBI 0x2304 +#define MT6357_ACCDET_DSN_FPI 0x2306 +#define MT6357_ACCDET_CON0 0x2308 +#define MT6357_ACCDET_CON1 0x230a +#define MT6357_ACCDET_CON2 0x230c +#define MT6357_ACCDET_CON3 0x230e +#define MT6357_ACCDET_CON4 0x2310 +#define MT6357_ACCDET_CON5 0x2312 +#define MT6357_ACCDET_CON6 0x2314 +#define MT6357_ACCDET_CON7 0x2316 +#define MT6357_ACCDET_CON8 0x2318 +#define MT6357_ACCDET_CON9 0x231a +#define MT6357_ACCDET_CON10 0x231c +#define MT6357_ACCDET_CON11 0x231e +#define MT6357_ACCDET_CON12 0x2320 +#define MT6357_ACCDET_CON13 0x2322 +#define MT6357_ACCDET_CON14 0x2324 +#define MT6357_ACCDET_CON15 0x2326 +#define MT6357_ACCDET_CON16 0x2328 +#define MT6357_ACCDET_CON17 0x232a +#define MT6357_ACCDET_CON18 0x232c +#define MT6357_ACCDET_CON19 0x232e +#define MT6357_ACCDET_CON20 0x2330 +#define MT6357_ACCDET_CON21 0x2332 +#define MT6357_ACCDET_CON22 0x2334 +#define MT6357_ACCDET_CON23 0x2336 +#define MT6357_ACCDET_CON24 0x2338 +#define MT6357_ACCDET_CON25 0x233a +#define MT6357_ACCDET_CON26 0x233c +#define MT6357_ACCDET_CON27 0x233e +#define MT6357_ACCDET_CON28 0x2340 + +#endif /* __MFD_MT6357_REGISTERS_H__ */ -- cgit v1.2.3 From 738654be3cf7af4a0a131bd92142db045f0660dc Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Tue, 31 May 2022 14:49:57 +0200 Subject: mfd: mt6358-irq: Add MT6357 PMIC support Add MT6357 PMIC IRQ support. Signed-off-by: Fabien Parent Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220531124959.202787-6-fparent@baylibre.com --- include/linux/mfd/mt6397/core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/mt6397/core.h b/include/linux/mfd/mt6397/core.h index 1cf78726503b..3fecaffe5019 100644 --- a/include/linux/mfd/mt6397/core.h +++ b/include/linux/mfd/mt6397/core.h @@ -12,6 +12,7 @@ enum chip_id { MT6323_CHIP_ID = 0x23, + MT6357_CHIP_ID = 0x57, MT6358_CHIP_ID = 0x58, MT6359_CHIP_ID = 0x59, MT6366_CHIP_ID = 0x66, -- cgit v1.2.3 From 66ee379d743c69c726b61d078119a34d5be96a35 Mon Sep 17 00:00:00 2001 From: Tinghan Shen Date: Wed, 1 Jun 2022 19:22:01 +0800 Subject: mfd: cros_ec: Add SCP Core-1 as a new CrOS EC MCU MT8195 System Companion Processors(SCP) is a dual-core RISC-V MCU. Add a new CrOS feature ID to represent the SCP's 2nd core. The 1st core is referred to as 'core 0', and the 2nd core is referred to as 'core 1'. Signed-off-by: Tinghan Shen Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220601112201.15510-16-tinghan.shen@mediatek.com --- include/linux/platform_data/cros_ec_commands.h | 2 ++ include/linux/platform_data/cros_ec_proto.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 8cfa8cfca77e..9fbf1c5eb8d3 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1300,6 +1300,8 @@ enum ec_feature_code { * mux. */ EC_FEATURE_TYPEC_MUX_REQUIRE_AP_ACK = 43, + /* The MCU is a System Companion Processor (SCP) 2nd Core. */ + EC_FEATURE_SCP_C1 = 45, }; #define EC_FEATURE_MASK_0(event_code) BIT(event_code % 32) diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 138fd912c808..da06dc7cf1cb 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -19,6 +19,7 @@ #define CROS_EC_DEV_ISH_NAME "cros_ish" #define CROS_EC_DEV_PD_NAME "cros_pd" #define CROS_EC_DEV_SCP_NAME "cros_scp" +#define CROS_EC_DEV_SCP_C1_NAME "cros_scp_c1" #define CROS_EC_DEV_TP_NAME "cros_tp" /* -- cgit v1.2.3 From 4a346a03a63cb45f7766d9d6559cf3fee783e926 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 14 Jun 2022 17:21:48 +0200 Subject: mfd: twl: Remove platform data support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no in-tree machine that provides a struct twl4030_platform_data since commit e92fc4f04a34 ("ARM: OMAP2+: Drop legacy board file for LDP"). So assume dev_get_platdata() returns NULL in twl_probe() and simplify accordingly. Signed-off-by: Uwe Kleine-König Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220614152148.252820-1-u.kleine-koenig@pengutronix.de --- include/linux/mfd/twl.h | 55 ------------------------------------------------- 1 file changed, 55 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/twl.h b/include/linux/mfd/twl.h index 8871cc5188a0..e426938eafd5 100644 --- a/include/linux/mfd/twl.h +++ b/include/linux/mfd/twl.h @@ -694,61 +694,6 @@ struct twl4030_audio_data { unsigned int irq_base; }; -struct twl4030_platform_data { - struct twl4030_clock_init_data *clock; - struct twl4030_bci_platform_data *bci; - struct twl4030_gpio_platform_data *gpio; - struct twl4030_madc_platform_data *madc; - struct twl4030_keypad_data *keypad; - struct twl4030_usb_data *usb; - struct twl4030_power_data *power; - struct twl4030_audio_data *audio; - - /* Common LDO regulators for TWL4030/TWL6030 */ - struct regulator_init_data *vdac; - struct regulator_init_data *vaux1; - struct regulator_init_data *vaux2; - struct regulator_init_data *vaux3; - struct regulator_init_data *vdd1; - struct regulator_init_data *vdd2; - struct regulator_init_data *vdd3; - /* TWL4030 LDO regulators */ - struct regulator_init_data *vpll1; - struct regulator_init_data *vpll2; - struct regulator_init_data *vmmc1; - struct regulator_init_data *vmmc2; - struct regulator_init_data *vsim; - struct regulator_init_data *vaux4; - struct regulator_init_data *vio; - struct regulator_init_data *vintana1; - struct regulator_init_data *vintana2; - struct regulator_init_data *vintdig; - /* TWL6030 LDO regulators */ - struct regulator_init_data *vmmc; - struct regulator_init_data *vpp; - struct regulator_init_data *vusim; - struct regulator_init_data *vana; - struct regulator_init_data *vcxio; - struct regulator_init_data *vusb; - struct regulator_init_data *clk32kg; - struct regulator_init_data *v1v8; - struct regulator_init_data *v2v1; - /* TWL6032 LDO regulators */ - struct regulator_init_data *ldo1; - struct regulator_init_data *ldo2; - struct regulator_init_data *ldo3; - struct regulator_init_data *ldo4; - struct regulator_init_data *ldo5; - struct regulator_init_data *ldo6; - struct regulator_init_data *ldo7; - struct regulator_init_data *ldoln; - struct regulator_init_data *ldousb; - /* TWL6032 DCDC regulators */ - struct regulator_init_data *smps3; - struct regulator_init_data *smps4; - struct regulator_init_data *vio6025; -}; - struct twl_regulator_driver_data { int (*set_voltage)(void *data, int target_uV); int (*get_voltage)(void *data); -- cgit v1.2.3 From c55333064d6ea9a26c7e8cfbe2ba1fa77c3a8e48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 19 Jun 2022 10:26:55 +0200 Subject: mfd: tc6393xb: Make disable callback return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All implementations return 0, so simplify accordingly. This is a preparation for making platform remove callbacks return void. Signed-off-by: Uwe Kleine-König Reported-by: kernel test robot Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220619082655.53728-1-u.kleine-koenig@pengutronix.de --- include/linux/mfd/tc6393xb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tc6393xb.h b/include/linux/mfd/tc6393xb.h index d336c541b7df..d17807f2d0c9 100644 --- a/include/linux/mfd/tc6393xb.h +++ b/include/linux/mfd/tc6393xb.h @@ -22,7 +22,7 @@ struct tc6393xb_platform_data { u16 scr_gper; /* GP Enable */ int (*enable)(struct platform_device *dev); - int (*disable)(struct platform_device *dev); + void (*disable)(struct platform_device *dev); int (*suspend)(struct platform_device *dev); int (*resume)(struct platform_device *dev); -- cgit v1.2.3 From 79f821b5a3bf46d2d5ee2dc0e2b2428b1062a040 Mon Sep 17 00:00:00 2001 From: Zhang Jiaming Date: Thu, 23 Jun 2022 15:33:33 +0800 Subject: mfd: ipaq-micro: Fix spelling mistake of "receive{d}" Change 'receieved' to 'received' and 'recieve' to 'receive'. Signed-off-by: Zhang Jiaming Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220623073333.5675-1-jiaming@nfschina.com --- include/linux/mfd/ipaq-micro.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/ipaq-micro.h b/include/linux/mfd/ipaq-micro.h index ee48a4321c57..d5caa4c86ecc 100644 --- a/include/linux/mfd/ipaq-micro.h +++ b/include/linux/mfd/ipaq-micro.h @@ -75,8 +75,8 @@ struct ipaq_micro_rxdev { * @id: 4-bit ID of the message * @tx_len: length of TX data * @tx_data: TX data to send - * @rx_len: length of receieved RX data - * @rx_data: RX data to recieve + * @rx_len: length of received RX data + * @rx_data: RX data to receive * @ack: a completion that will be completed when RX is complete * @node: list node if message gets queued */ -- cgit v1.2.3 From d9cd0bc604705eb01497c3e483f8820a9677c682 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 27 Jun 2022 14:39:54 +0200 Subject: mfd: mt6397: Add basic support for MT6331+MT6332 PMIC Add support for the MT6331 PMIC with MT6332 Companion PMIC, found in MT6795 Helio X10 smartphone platforms. This combo has support for multiple devices but, for a start, only the following have been implemented: - Regulators (two instances, one in MT6331, one in MT6332) - RTC (MT6331) - Keys (MT6331) - Interrupts (MT6331 also dispatches MT6332's interrupts) There's more to be implemented, especially for MT6332, which will come at a later stage. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20220627123954.64299-1-angelogioacchino.delregno@collabora.com --- include/linux/mfd/mt6331/core.h | 40 +++ include/linux/mfd/mt6331/registers.h | 584 +++++++++++++++++++++++++++++++ include/linux/mfd/mt6332/core.h | 65 ++++ include/linux/mfd/mt6332/registers.h | 642 +++++++++++++++++++++++++++++++++++ include/linux/mfd/mt6397/core.h | 2 + 5 files changed, 1333 insertions(+) create mode 100644 include/linux/mfd/mt6331/core.h create mode 100644 include/linux/mfd/mt6331/registers.h create mode 100644 include/linux/mfd/mt6332/core.h create mode 100644 include/linux/mfd/mt6332/registers.h (limited to 'include/linux') diff --git a/include/linux/mfd/mt6331/core.h b/include/linux/mfd/mt6331/core.h new file mode 100644 index 000000000000..df8e6b1e4bc1 --- /dev/null +++ b/include/linux/mfd/mt6331/core.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 AngeloGioacchino Del Regno + */ + +#ifndef __MFD_MT6331_CORE_H__ +#define __MFD_MT6331_CORE_H__ + +enum mt6331_irq_status_numbers { + MT6331_IRQ_STATUS_PWRKEY = 0, + MT6331_IRQ_STATUS_HOMEKEY, + MT6331_IRQ_STATUS_CHRDET, + MT6331_IRQ_STATUS_THR_H, + MT6331_IRQ_STATUS_THR_L, + MT6331_IRQ_STATUS_BAT_H, + MT6331_IRQ_STATUS_BAT_L, + MT6331_IRQ_STATUS_RTC, + MT6331_IRQ_STATUS_AUDIO, + MT6331_IRQ_STATUS_MAD, + MT6331_IRQ_STATUS_ACCDET, + MT6331_IRQ_STATUS_ACCDET_EINT, + MT6331_IRQ_STATUS_ACCDET_NEGV = 12, + MT6331_IRQ_STATUS_VDVFS11_OC = 16, + MT6331_IRQ_STATUS_VDVFS12_OC, + MT6331_IRQ_STATUS_VDVFS13_OC, + MT6331_IRQ_STATUS_VDVFS14_OC, + MT6331_IRQ_STATUS_GPU_OC, + MT6331_IRQ_STATUS_VCORE1_OC, + MT6331_IRQ_STATUS_VCORE2_OC, + MT6331_IRQ_STATUS_VIO18_OC, + MT6331_IRQ_STATUS_LDO_OC, + MT6331_IRQ_STATUS_NR, +}; + +#define MT6331_IRQ_CON0_BASE MT6331_IRQ_STATUS_PWRKEY +#define MT6331_IRQ_CON0_BITS (MT6331_IRQ_STATUS_ACCDET_NEGV + 1) +#define MT6331_IRQ_CON1_BASE MT6331_IRQ_STATUS_VDVFS11_OC +#define MT6331_IRQ_CON1_BITS (MT6331_IRQ_STATUS_LDO_OC - MT6331_IRQ_STATUS_VDFS11_OC + 1) + +#endif /* __MFD_MT6331_CORE_H__ */ diff --git a/include/linux/mfd/mt6331/registers.h b/include/linux/mfd/mt6331/registers.h new file mode 100644 index 000000000000..e2be6bccd1a7 --- /dev/null +++ b/include/linux/mfd/mt6331/registers.h @@ -0,0 +1,584 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 AngeloGioacchino Del Regno + */ + +#ifndef __MFD_MT6331_REGISTERS_H__ +#define __MFD_MT6331_REGISTERS_H__ + +/* PMIC Registers */ +#define MT6331_STRUP_CON0 0x0 +#define MT6331_STRUP_CON2 0x2 +#define MT6331_STRUP_CON3 0x4 +#define MT6331_STRUP_CON4 0x6 +#define MT6331_STRUP_CON5 0x8 +#define MT6331_STRUP_CON6 0xA +#define MT6331_STRUP_CON7 0xC +#define MT6331_STRUP_CON8 0xE +#define MT6331_STRUP_CON9 0x10 +#define MT6331_STRUP_CON10 0x12 +#define MT6331_STRUP_CON11 0x14 +#define MT6331_STRUP_CON12 0x16 +#define MT6331_STRUP_CON13 0x18 +#define MT6331_STRUP_CON14 0x1A +#define MT6331_STRUP_CON15 0x1C +#define MT6331_STRUP_CON16 0x1E +#define MT6331_STRUP_CON17 0x20 +#define MT6331_STRUP_CON18 0x22 +#define MT6331_HWCID 0x100 +#define MT6331_SWCID 0x102 +#define MT6331_EXT_PMIC_STATUS 0x104 +#define MT6331_TOP_CON 0x106 +#define MT6331_TEST_OUT 0x108 +#define MT6331_TEST_CON0 0x10A +#define MT6331_TEST_CON1 0x10C +#define MT6331_TESTMODE_SW 0x10E +#define MT6331_EN_STATUS0 0x110 +#define MT6331_EN_STATUS1 0x112 +#define MT6331_EN_STATUS2 0x114 +#define MT6331_OCSTATUS0 0x116 +#define MT6331_OCSTATUS1 0x118 +#define MT6331_OCSTATUS2 0x11A +#define MT6331_PGSTATUS 0x11C +#define MT6331_TOPSTATUS 0x11E +#define MT6331_TDSEL_CON 0x120 +#define MT6331_RDSEL_CON 0x122 +#define MT6331_SMT_CON0 0x124 +#define MT6331_SMT_CON1 0x126 +#define MT6331_SMT_CON2 0x128 +#define MT6331_DRV_CON0 0x12A +#define MT6331_DRV_CON1 0x12C +#define MT6331_DRV_CON2 0x12E +#define MT6331_DRV_CON3 0x130 +#define MT6331_TOP_STATUS 0x132 +#define MT6331_TOP_STATUS_SET 0x134 +#define MT6331_TOP_STATUS_CLR 0x136 +#define MT6331_TOP_CKPDN_CON0 0x138 +#define MT6331_TOP_CKPDN_CON0_SET 0x13A +#define MT6331_TOP_CKPDN_CON0_CLR 0x13C +#define MT6331_TOP_CKPDN_CON1 0x13E +#define MT6331_TOP_CKPDN_CON1_SET 0x140 +#define MT6331_TOP_CKPDN_CON1_CLR 0x142 +#define MT6331_TOP_CKPDN_CON2 0x144 +#define MT6331_TOP_CKPDN_CON2_SET 0x146 +#define MT6331_TOP_CKPDN_CON2_CLR 0x148 +#define MT6331_TOP_CKSEL_CON 0x14A +#define MT6331_TOP_CKSEL_CON_SET 0x14C +#define MT6331_TOP_CKSEL_CON_CLR 0x14E +#define MT6331_TOP_CKHWEN_CON 0x150 +#define MT6331_TOP_CKHWEN_CON_SET 0x152 +#define MT6331_TOP_CKHWEN_CON_CLR 0x154 +#define MT6331_TOP_CKTST_CON0 0x156 +#define MT6331_TOP_CKTST_CON1 0x158 +#define MT6331_TOP_CLKSQ 0x15A +#define MT6331_TOP_CLKSQ_SET 0x15C +#define MT6331_TOP_CLKSQ_CLR 0x15E +#define MT6331_TOP_RST_CON 0x160 +#define MT6331_TOP_RST_CON_SET 0x162 +#define MT6331_TOP_RST_CON_CLR 0x164 +#define MT6331_TOP_RST_MISC 0x166 +#define MT6331_TOP_RST_MISC_SET 0x168 +#define MT6331_TOP_RST_MISC_CLR 0x16A +#define MT6331_INT_CON0 0x16C +#define MT6331_INT_CON0_SET 0x16E +#define MT6331_INT_CON0_CLR 0x170 +#define MT6331_INT_CON1 0x172 +#define MT6331_INT_CON1_SET 0x174 +#define MT6331_INT_CON1_CLR 0x176 +#define MT6331_INT_MISC_CON 0x178 +#define MT6331_INT_MISC_CON_SET 0x17A +#define MT6331_INT_MISC_CON_CLR 0x17C +#define MT6331_INT_STATUS_CON0 0x17E +#define MT6331_INT_STATUS_CON1 0x180 +#define MT6331_OC_GEAR_0 0x182 +#define MT6331_FQMTR_CON0 0x184 +#define MT6331_FQMTR_CON1 0x186 +#define MT6331_FQMTR_CON2 0x188 +#define MT6331_RG_SPI_CON 0x18A +#define MT6331_DEW_DIO_EN 0x18C +#define MT6331_DEW_READ_TEST 0x18E +#define MT6331_DEW_WRITE_TEST 0x190 +#define MT6331_DEW_CRC_SWRST 0x192 +#define MT6331_DEW_CRC_EN 0x194 +#define MT6331_DEW_CRC_VAL 0x196 +#define MT6331_DEW_DBG_MON_SEL 0x198 +#define MT6331_DEW_CIPHER_KEY_SEL 0x19A +#define MT6331_DEW_CIPHER_IV_SEL 0x19C +#define MT6331_DEW_CIPHER_EN 0x19E +#define MT6331_DEW_CIPHER_RDY 0x1A0 +#define MT6331_DEW_CIPHER_MODE 0x1A2 +#define MT6331_DEW_CIPHER_SWRST 0x1A4 +#define MT6331_DEW_RDDMY_NO 0x1A6 +#define MT6331_INT_TYPE_CON0 0x1A8 +#define MT6331_INT_TYPE_CON0_SET 0x1AA +#define MT6331_INT_TYPE_CON0_CLR 0x1AC +#define MT6331_INT_TYPE_CON1 0x1AE +#define MT6331_INT_TYPE_CON1_SET 0x1B0 +#define MT6331_INT_TYPE_CON1_CLR 0x1B2 +#define MT6331_INT_STA 0x1B4 +#define MT6331_BUCK_ALL_CON0 0x200 +#define MT6331_BUCK_ALL_CON1 0x202 +#define MT6331_BUCK_ALL_CON2 0x204 +#define MT6331_BUCK_ALL_CON3 0x206 +#define MT6331_BUCK_ALL_CON4 0x208 +#define MT6331_BUCK_ALL_CON5 0x20A +#define MT6331_BUCK_ALL_CON6 0x20C +#define MT6331_BUCK_ALL_CON7 0x20E +#define MT6331_BUCK_ALL_CON8 0x210 +#define MT6331_BUCK_ALL_CON9 0x212 +#define MT6331_BUCK_ALL_CON10 0x214 +#define MT6331_BUCK_ALL_CON11 0x216 +#define MT6331_BUCK_ALL_CON12 0x218 +#define MT6331_BUCK_ALL_CON13 0x21A +#define MT6331_BUCK_ALL_CON14 0x21C +#define MT6331_BUCK_ALL_CON15 0x21E +#define MT6331_BUCK_ALL_CON16 0x220 +#define MT6331_BUCK_ALL_CON17 0x222 +#define MT6331_BUCK_ALL_CON18 0x224 +#define MT6331_BUCK_ALL_CON19 0x226 +#define MT6331_BUCK_ALL_CON20 0x228 +#define MT6331_BUCK_ALL_CON21 0x22A +#define MT6331_BUCK_ALL_CON22 0x22C +#define MT6331_BUCK_ALL_CON23 0x22E +#define MT6331_BUCK_ALL_CON24 0x230 +#define MT6331_BUCK_ALL_CON25 0x232 +#define MT6331_BUCK_ALL_CON26 0x234 +#define MT6331_VDVFS11_CON0 0x236 +#define MT6331_VDVFS11_CON1 0x238 +#define MT6331_VDVFS11_CON2 0x23A +#define MT6331_VDVFS11_CON3 0x23C +#define MT6331_VDVFS11_CON4 0x23E +#define MT6331_VDVFS11_CON5 0x240 +#define MT6331_VDVFS11_CON6 0x242 +#define MT6331_VDVFS11_CON7 0x244 +#define MT6331_VDVFS11_CON8 0x246 +#define MT6331_VDVFS11_CON9 0x248 +#define MT6331_VDVFS11_CON10 0x24A +#define MT6331_VDVFS11_CON11 0x24C +#define MT6331_VDVFS11_CON12 0x24E +#define MT6331_VDVFS11_CON13 0x250 +#define MT6331_VDVFS11_CON14 0x252 +#define MT6331_VDVFS11_CON18 0x25A +#define MT6331_VDVFS11_CON19 0x25C +#define MT6331_VDVFS11_CON20 0x25E +#define MT6331_VDVFS11_CON21 0x260 +#define MT6331_VDVFS11_CON22 0x262 +#define MT6331_VDVFS11_CON23 0x264 +#define MT6331_VDVFS11_CON24 0x266 +#define MT6331_VDVFS11_CON25 0x268 +#define MT6331_VDVFS11_CON26 0x26A +#define MT6331_VDVFS11_CON27 0x26C +#define MT6331_VDVFS12_CON0 0x26E +#define MT6331_VDVFS12_CON1 0x270 +#define MT6331_VDVFS12_CON2 0x272 +#define MT6331_VDVFS12_CON3 0x274 +#define MT6331_VDVFS12_CON4 0x276 +#define MT6331_VDVFS12_CON5 0x278 +#define MT6331_VDVFS12_CON6 0x27A +#define MT6331_VDVFS12_CON7 0x27C +#define MT6331_VDVFS12_CON8 0x27E +#define MT6331_VDVFS12_CON9 0x280 +#define MT6331_VDVFS12_CON10 0x282 +#define MT6331_VDVFS12_CON11 0x284 +#define MT6331_VDVFS12_CON12 0x286 +#define MT6331_VDVFS12_CON13 0x288 +#define MT6331_VDVFS12_CON14 0x28A +#define MT6331_VDVFS12_CON18 0x292 +#define MT6331_VDVFS12_CON19 0x294 +#define MT6331_VDVFS12_CON20 0x296 +#define MT6331_VDVFS13_CON0 0x298 +#define MT6331_VDVFS13_CON1 0x29A +#define MT6331_VDVFS13_CON2 0x29C +#define MT6331_VDVFS13_CON3 0x29E +#define MT6331_VDVFS13_CON4 0x2A0 +#define MT6331_VDVFS13_CON5 0x2A2 +#define MT6331_VDVFS13_CON6 0x2A4 +#define MT6331_VDVFS13_CON7 0x2A6 +#define MT6331_VDVFS13_CON8 0x2A8 +#define MT6331_VDVFS13_CON9 0x2AA +#define MT6331_VDVFS13_CON10 0x2AC +#define MT6331_VDVFS13_CON11 0x2AE +#define MT6331_VDVFS13_CON12 0x2B0 +#define MT6331_VDVFS13_CON13 0x2B2 +#define MT6331_VDVFS13_CON14 0x2B4 +#define MT6331_VDVFS13_CON18 0x2BC +#define MT6331_VDVFS13_CON19 0x2BE +#define MT6331_VDVFS13_CON20 0x2C0 +#define MT6331_VDVFS14_CON0 0x2C2 +#define MT6331_VDVFS14_CON1 0x2C4 +#define MT6331_VDVFS14_CON2 0x2C6 +#define MT6331_VDVFS14_CON3 0x2C8 +#define MT6331_VDVFS14_CON4 0x2CA +#define MT6331_VDVFS14_CON5 0x2CC +#define MT6331_VDVFS14_CON6 0x2CE +#define MT6331_VDVFS14_CON7 0x2D0 +#define MT6331_VDVFS14_CON8 0x2D2 +#define MT6331_VDVFS14_CON9 0x2D4 +#define MT6331_VDVFS14_CON10 0x2D6 +#define MT6331_VDVFS14_CON11 0x2D8 +#define MT6331_VDVFS14_CON12 0x2DA +#define MT6331_VDVFS14_CON13 0x2DC +#define MT6331_VDVFS14_CON14 0x2DE +#define MT6331_VDVFS14_CON18 0x2E6 +#define MT6331_VDVFS14_CON19 0x2E8 +#define MT6331_VDVFS14_CON20 0x2EA +#define MT6331_VGPU_CON0 0x300 +#define MT6331_VGPU_CON1 0x302 +#define MT6331_VGPU_CON2 0x304 +#define MT6331_VGPU_CON3 0x306 +#define MT6331_VGPU_CON4 0x308 +#define MT6331_VGPU_CON5 0x30A +#define MT6331_VGPU_CON6 0x30C +#define MT6331_VGPU_CON7 0x30E +#define MT6331_VGPU_CON8 0x310 +#define MT6331_VGPU_CON9 0x312 +#define MT6331_VGPU_CON10 0x314 +#define MT6331_VGPU_CON11 0x316 +#define MT6331_VGPU_CON12 0x318 +#define MT6331_VGPU_CON13 0x31A +#define MT6331_VGPU_CON14 0x31C +#define MT6331_VGPU_CON15 0x31E +#define MT6331_VGPU_CON16 0x320 +#define MT6331_VGPU_CON17 0x322 +#define MT6331_VGPU_CON18 0x324 +#define MT6331_VGPU_CON19 0x326 +#define MT6331_VGPU_CON20 0x328 +#define MT6331_VCORE1_CON0 0x32A +#define MT6331_VCORE1_CON1 0x32C +#define MT6331_VCORE1_CON2 0x32E +#define MT6331_VCORE1_CON3 0x330 +#define MT6331_VCORE1_CON4 0x332 +#define MT6331_VCORE1_CON5 0x334 +#define MT6331_VCORE1_CON6 0x336 +#define MT6331_VCORE1_CON7 0x338 +#define MT6331_VCORE1_CON8 0x33A +#define MT6331_VCORE1_CON9 0x33C +#define MT6331_VCORE1_CON10 0x33E +#define MT6331_VCORE1_CON11 0x340 +#define MT6331_VCORE1_CON12 0x342 +#define MT6331_VCORE1_CON13 0x344 +#define MT6331_VCORE1_CON14 0x346 +#define MT6331_VCORE1_CON15 0x348 +#define MT6331_VCORE1_CON16 0x34A +#define MT6331_VCORE1_CON17 0x34C +#define MT6331_VCORE1_CON18 0x34E +#define MT6331_VCORE1_CON19 0x350 +#define MT6331_VCORE1_CON20 0x352 +#define MT6331_VCORE2_CON0 0x354 +#define MT6331_VCORE2_CON1 0x356 +#define MT6331_VCORE2_CON2 0x358 +#define MT6331_VCORE2_CON3 0x35A +#define MT6331_VCORE2_CON4 0x35C +#define MT6331_VCORE2_CON5 0x35E +#define MT6331_VCORE2_CON6 0x360 +#define MT6331_VCORE2_CON7 0x362 +#define MT6331_VCORE2_CON8 0x364 +#define MT6331_VCORE2_CON9 0x366 +#define MT6331_VCORE2_CON10 0x368 +#define MT6331_VCORE2_CON11 0x36A +#define MT6331_VCORE2_CON12 0x36C +#define MT6331_VCORE2_CON13 0x36E +#define MT6331_VCORE2_CON14 0x370 +#define MT6331_VCORE2_CON15 0x372 +#define MT6331_VCORE2_CON16 0x374 +#define MT6331_VCORE2_CON17 0x376 +#define MT6331_VCORE2_CON18 0x378 +#define MT6331_VCORE2_CON19 0x37A +#define MT6331_VCORE2_CON20 0x37C +#define MT6331_VCORE2_CON21 0x37E +#define MT6331_VIO18_CON0 0x380 +#define MT6331_VIO18_CON1 0x382 +#define MT6331_VIO18_CON2 0x384 +#define MT6331_VIO18_CON3 0x386 +#define MT6331_VIO18_CON4 0x388 +#define MT6331_VIO18_CON5 0x38A +#define MT6331_VIO18_CON6 0x38C +#define MT6331_VIO18_CON7 0x38E +#define MT6331_VIO18_CON8 0x390 +#define MT6331_VIO18_CON9 0x392 +#define MT6331_VIO18_CON10 0x394 +#define MT6331_VIO18_CON11 0x396 +#define MT6331_VIO18_CON12 0x398 +#define MT6331_VIO18_CON13 0x39A +#define MT6331_VIO18_CON14 0x39C +#define MT6331_VIO18_CON15 0x39E +#define MT6331_VIO18_CON16 0x3A0 +#define MT6331_VIO18_CON17 0x3A2 +#define MT6331_VIO18_CON18 0x3A4 +#define MT6331_VIO18_CON19 0x3A6 +#define MT6331_VIO18_CON20 0x3A8 +#define MT6331_BUCK_K_CON0 0x3AA +#define MT6331_BUCK_K_CON1 0x3AC +#define MT6331_BUCK_K_CON2 0x3AE +#define MT6331_BUCK_K_CON3 0x3B0 +#define MT6331_ZCD_CON0 0x400 +#define MT6331_ZCD_CON1 0x402 +#define MT6331_ZCD_CON2 0x404 +#define MT6331_ZCD_CON3 0x406 +#define MT6331_ZCD_CON4 0x408 +#define MT6331_ZCD_CON5 0x40A +#define MT6331_ISINK0_CON0 0x40C +#define MT6331_ISINK0_CON1 0x40E +#define MT6331_ISINK0_CON2 0x410 +#define MT6331_ISINK0_CON3 0x412 +#define MT6331_ISINK0_CON4 0x414 +#define MT6331_ISINK1_CON0 0x416 +#define MT6331_ISINK1_CON1 0x418 +#define MT6331_ISINK1_CON2 0x41A +#define MT6331_ISINK1_CON3 0x41C +#define MT6331_ISINK1_CON4 0x41E +#define MT6331_ISINK2_CON0 0x420 +#define MT6331_ISINK2_CON1 0x422 +#define MT6331_ISINK2_CON2 0x424 +#define MT6331_ISINK2_CON3 0x426 +#define MT6331_ISINK2_CON4 0x428 +#define MT6331_ISINK3_CON0 0x42A +#define MT6331_ISINK3_CON1 0x42C +#define MT6331_ISINK3_CON2 0x42E +#define MT6331_ISINK3_CON3 0x430 +#define MT6331_ISINK3_CON4 0x432 +#define MT6331_ISINK_ANA0 0x434 +#define MT6331_ISINK_ANA1 0x436 +#define MT6331_ISINK_PHASE_DLY 0x438 +#define MT6331_ISINK_EN_CTRL 0x43A +#define MT6331_ANALDO_CON0 0x500 +#define MT6331_ANALDO_CON1 0x502 +#define MT6331_ANALDO_CON2 0x504 +#define MT6331_ANALDO_CON3 0x506 +#define MT6331_ANALDO_CON4 0x508 +#define MT6331_ANALDO_CON5 0x50A +#define MT6331_ANALDO_CON6 0x50C +#define MT6331_ANALDO_CON7 0x50E +#define MT6331_ANALDO_CON8 0x510 +#define MT6331_ANALDO_CON9 0x512 +#define MT6331_ANALDO_CON10 0x514 +#define MT6331_ANALDO_CON11 0x516 +#define MT6331_ANALDO_CON12 0x518 +#define MT6331_ANALDO_CON13 0x51A +#define MT6331_SYSLDO_CON0 0x51C +#define MT6331_SYSLDO_CON1 0x51E +#define MT6331_SYSLDO_CON2 0x520 +#define MT6331_SYSLDO_CON3 0x522 +#define MT6331_SYSLDO_CON4 0x524 +#define MT6331_SYSLDO_CON5 0x526 +#define MT6331_SYSLDO_CON6 0x528 +#define MT6331_SYSLDO_CON7 0x52A +#define MT6331_SYSLDO_CON8 0x52C +#define MT6331_SYSLDO_CON9 0x52E +#define MT6331_SYSLDO_CON10 0x530 +#define MT6331_SYSLDO_CON11 0x532 +#define MT6331_SYSLDO_CON12 0x534 +#define MT6331_SYSLDO_CON13 0x536 +#define MT6331_SYSLDO_CON14 0x538 +#define MT6331_SYSLDO_CON15 0x53A +#define MT6331_SYSLDO_CON16 0x53C +#define MT6331_SYSLDO_CON17 0x53E +#define MT6331_SYSLDO_CON18 0x540 +#define MT6331_SYSLDO_CON19 0x542 +#define MT6331_SYSLDO_CON20 0x544 +#define MT6331_SYSLDO_CON21 0x546 +#define MT6331_DIGLDO_CON0 0x548 +#define MT6331_DIGLDO_CON1 0x54A +#define MT6331_DIGLDO_CON2 0x54C +#define MT6331_DIGLDO_CON3 0x54E +#define MT6331_DIGLDO_CON4 0x550 +#define MT6331_DIGLDO_CON5 0x552 +#define MT6331_DIGLDO_CON6 0x554 +#define MT6331_DIGLDO_CON7 0x556 +#define MT6331_DIGLDO_CON8 0x558 +#define MT6331_DIGLDO_CON9 0x55A +#define MT6331_DIGLDO_CON10 0x55C +#define MT6331_DIGLDO_CON11 0x55E +#define MT6331_DIGLDO_CON12 0x560 +#define MT6331_DIGLDO_CON13 0x562 +#define MT6331_DIGLDO_CON14 0x564 +#define MT6331_DIGLDO_CON15 0x566 +#define MT6331_DIGLDO_CON16 0x568 +#define MT6331_DIGLDO_CON17 0x56A +#define MT6331_DIGLDO_CON18 0x56C +#define MT6331_DIGLDO_CON19 0x56E +#define MT6331_DIGLDO_CON20 0x570 +#define MT6331_DIGLDO_CON21 0x572 +#define MT6331_DIGLDO_CON22 0x574 +#define MT6331_DIGLDO_CON23 0x576 +#define MT6331_DIGLDO_CON24 0x578 +#define MT6331_DIGLDO_CON25 0x57A +#define MT6331_DIGLDO_CON26 0x57C +#define MT6331_DIGLDO_CON27 0x57E +#define MT6331_DIGLDO_CON28 0x580 +#define MT6331_OTP_CON0 0x600 +#define MT6331_OTP_CON1 0x602 +#define MT6331_OTP_CON2 0x604 +#define MT6331_OTP_CON3 0x606 +#define MT6331_OTP_CON4 0x608 +#define MT6331_OTP_CON5 0x60A +#define MT6331_OTP_CON6 0x60C +#define MT6331_OTP_CON7 0x60E +#define MT6331_OTP_CON8 0x610 +#define MT6331_OTP_CON9 0x612 +#define MT6331_OTP_CON10 0x614 +#define MT6331_OTP_CON11 0x616 +#define MT6331_OTP_CON12 0x618 +#define MT6331_OTP_CON13 0x61A +#define MT6331_OTP_CON14 0x61C +#define MT6331_OTP_DOUT_0_15 0x61E +#define MT6331_OTP_DOUT_16_31 0x620 +#define MT6331_OTP_DOUT_32_47 0x622 +#define MT6331_OTP_DOUT_48_63 0x624 +#define MT6331_OTP_DOUT_64_79 0x626 +#define MT6331_OTP_DOUT_80_95 0x628 +#define MT6331_OTP_DOUT_96_111 0x62A +#define MT6331_OTP_DOUT_112_127 0x62C +#define MT6331_OTP_DOUT_128_143 0x62E +#define MT6331_OTP_DOUT_144_159 0x630 +#define MT6331_OTP_DOUT_160_175 0x632 +#define MT6331_OTP_DOUT_176_191 0x634 +#define MT6331_OTP_DOUT_192_207 0x636 +#define MT6331_OTP_DOUT_208_223 0x638 +#define MT6331_OTP_DOUT_224_239 0x63A +#define MT6331_OTP_DOUT_240_255 0x63C +#define MT6331_OTP_VAL_0_15 0x63E +#define MT6331_OTP_VAL_16_31 0x640 +#define MT6331_OTP_VAL_32_47 0x642 +#define MT6331_OTP_VAL_48_63 0x644 +#define MT6331_OTP_VAL_64_79 0x646 +#define MT6331_OTP_VAL_80_95 0x648 +#define MT6331_OTP_VAL_96_111 0x64A +#define MT6331_OTP_VAL_112_127 0x64C +#define MT6331_OTP_VAL_128_143 0x64E +#define MT6331_OTP_VAL_144_159 0x650 +#define MT6331_OTP_VAL_160_175 0x652 +#define MT6331_OTP_VAL_176_191 0x654 +#define MT6331_OTP_VAL_192_207 0x656 +#define MT6331_OTP_VAL_208_223 0x658 +#define MT6331_OTP_VAL_224_239 0x65A +#define MT6331_OTP_VAL_240_255 0x65C +#define MT6331_RTC_MIX_CON0 0x65E +#define MT6331_RTC_MIX_CON1 0x660 +#define MT6331_AUDDAC_CFG0 0x662 +#define MT6331_AUDBUF_CFG0 0x664 +#define MT6331_AUDBUF_CFG1 0x666 +#define MT6331_AUDBUF_CFG2 0x668 +#define MT6331_AUDBUF_CFG3 0x66A +#define MT6331_AUDBUF_CFG4 0x66C +#define MT6331_AUDBUF_CFG5 0x66E +#define MT6331_AUDBUF_CFG6 0x670 +#define MT6331_AUDBUF_CFG7 0x672 +#define MT6331_AUDBUF_CFG8 0x674 +#define MT6331_IBIASDIST_CFG0 0x676 +#define MT6331_AUDCLKGEN_CFG0 0x678 +#define MT6331_AUDLDO_CFG0 0x67A +#define MT6331_AUDDCDC_CFG0 0x67C +#define MT6331_AUDDCDC_CFG1 0x67E +#define MT6331_AUDNVREGGLB_CFG0 0x680 +#define MT6331_AUD_NCP0 0x682 +#define MT6331_AUD_ZCD_CFG0 0x684 +#define MT6331_AUDPREAMP_CFG0 0x686 +#define MT6331_AUDPREAMP_CFG1 0x688 +#define MT6331_AUDPREAMP_CFG2 0x68A +#define MT6331_AUDADC_CFG0 0x68C +#define MT6331_AUDADC_CFG1 0x68E +#define MT6331_AUDADC_CFG2 0x690 +#define MT6331_AUDADC_CFG3 0x692 +#define MT6331_AUDADC_CFG4 0x694 +#define MT6331_AUDADC_CFG5 0x696 +#define MT6331_AUDDIGMI_CFG0 0x698 +#define MT6331_AUDDIGMI_CFG1 0x69A +#define MT6331_AUDMICBIAS_CFG0 0x69C +#define MT6331_AUDMICBIAS_CFG1 0x69E +#define MT6331_AUDENCSPARE_CFG0 0x6A0 +#define MT6331_AUDPREAMPGAIN_CFG0 0x6A2 +#define MT6331_AUDMADPLL_CFG0 0x6A4 +#define MT6331_AUDMADPLL_CFG1 0x6A6 +#define MT6331_AUDMADPLL_CFG2 0x6A8 +#define MT6331_AUDLDO_NVREG_CFG0 0x6AA +#define MT6331_AUDLDO_NVREG_CFG1 0x6AC +#define MT6331_AUDLDO_NVREG_CFG2 0x6AE +#define MT6331_AUXADC_ADC0 0x700 +#define MT6331_AUXADC_ADC1 0x702 +#define MT6331_AUXADC_ADC2 0x704 +#define MT6331_AUXADC_ADC3 0x706 +#define MT6331_AUXADC_ADC4 0x708 +#define MT6331_AUXADC_ADC5 0x70A +#define MT6331_AUXADC_ADC6 0x70C +#define MT6331_AUXADC_ADC7 0x70E +#define MT6331_AUXADC_ADC8 0x710 +#define MT6331_AUXADC_ADC9 0x712 +#define MT6331_AUXADC_ADC10 0x714 +#define MT6331_AUXADC_ADC11 0x716 +#define MT6331_AUXADC_ADC12 0x718 +#define MT6331_AUXADC_ADC13 0x71A +#define MT6331_AUXADC_ADC14 0x71C +#define MT6331_AUXADC_ADC15 0x71E +#define MT6331_AUXADC_ADC16 0x720 +#define MT6331_AUXADC_ADC17 0x722 +#define MT6331_AUXADC_ADC18 0x724 +#define MT6331_AUXADC_ADC19 0x726 +#define MT6331_AUXADC_STA0 0x728 +#define MT6331_AUXADC_STA1 0x72A +#define MT6331_AUXADC_RQST0 0x72C +#define MT6331_AUXADC_RQST0_SET 0x72E +#define MT6331_AUXADC_RQST0_CLR 0x730 +#define MT6331_AUXADC_RQST1 0x732 +#define MT6331_AUXADC_RQST1_SET 0x734 +#define MT6331_AUXADC_RQST1_CLR 0x736 +#define MT6331_AUXADC_CON0 0x738 +#define MT6331_AUXADC_CON1 0x73A +#define MT6331_AUXADC_CON2 0x73C +#define MT6331_AUXADC_CON3 0x73E +#define MT6331_AUXADC_CON4 0x740 +#define MT6331_AUXADC_CON5 0x742 +#define MT6331_AUXADC_CON6 0x744 +#define MT6331_AUXADC_CON7 0x746 +#define MT6331_AUXADC_CON8 0x748 +#define MT6331_AUXADC_CON9 0x74A +#define MT6331_AUXADC_CON10 0x74C +#define MT6331_AUXADC_CON11 0x74E +#define MT6331_AUXADC_CON12 0x750 +#define MT6331_AUXADC_CON13 0x752 +#define MT6331_AUXADC_CON14 0x754 +#define MT6331_AUXADC_CON15 0x756 +#define MT6331_AUXADC_CON16 0x758 +#define MT6331_AUXADC_CON17 0x75A +#define MT6331_AUXADC_CON18 0x75C +#define MT6331_AUXADC_CON19 0x75E +#define MT6331_AUXADC_CON20 0x760 +#define MT6331_AUXADC_CON21 0x762 +#define MT6331_AUXADC_CON22 0x764 +#define MT6331_AUXADC_CON23 0x766 +#define MT6331_AUXADC_CON24 0x768 +#define MT6331_AUXADC_CON25 0x76A +#define MT6331_AUXADC_CON26 0x76C +#define MT6331_AUXADC_CON27 0x76E +#define MT6331_AUXADC_CON28 0x770 +#define MT6331_AUXADC_CON29 0x772 +#define MT6331_AUXADC_CON30 0x774 +#define MT6331_AUXADC_CON31 0x776 +#define MT6331_AUXADC_CON32 0x778 +#define MT6331_ACCDET_CON0 0x77A +#define MT6331_ACCDET_CON1 0x77C +#define MT6331_ACCDET_CON2 0x77E +#define MT6331_ACCDET_CON3 0x780 +#define MT6331_ACCDET_CON4 0x782 +#define MT6331_ACCDET_CON5 0x784 +#define MT6331_ACCDET_CON6 0x786 +#define MT6331_ACCDET_CON7 0x788 +#define MT6331_ACCDET_CON8 0x78A +#define MT6331_ACCDET_CON9 0x78C +#define MT6331_ACCDET_CON10 0x78E +#define MT6331_ACCDET_CON11 0x790 +#define MT6331_ACCDET_CON12 0x792 +#define MT6331_ACCDET_CON13 0x794 +#define MT6331_ACCDET_CON14 0x796 +#define MT6331_ACCDET_CON15 0x798 +#define MT6331_ACCDET_CON16 0x79A +#define MT6331_ACCDET_CON17 0x79C +#define MT6331_ACCDET_CON18 0x79E +#define MT6331_ACCDET_CON19 0x7A0 +#define MT6331_ACCDET_CON20 0x7A2 +#define MT6331_ACCDET_CON21 0x7A4 +#define MT6331_ACCDET_CON22 0x7A6 +#define MT6331_ACCDET_CON23 0x7A8 +#define MT6331_ACCDET_CON24 0x7AA + +#endif /* __MFD_MT6331_REGISTERS_H__ */ diff --git a/include/linux/mfd/mt6332/core.h b/include/linux/mfd/mt6332/core.h new file mode 100644 index 000000000000..cd6013eb82d9 --- /dev/null +++ b/include/linux/mfd/mt6332/core.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 AngeloGioacchino Del Regno + */ + +#ifndef __MFD_MT6332_CORE_H__ +#define __MFD_MT6332_CORE_H__ + +enum mt6332_irq_status_numbers { + MT6332_IRQ_STATUS_CHR_COMPLETE = 0, + MT6332_IRQ_STATUS_THERMAL_SD, + MT6332_IRQ_STATUS_THERMAL_REG_IN, + MT6332_IRQ_STATUS_THERMAL_REG_OUT, + MT6332_IRQ_STATUS_OTG_OC, + MT6332_IRQ_STATUS_CHR_OC, + MT6332_IRQ_STATUS_OTG_THERMAL, + MT6332_IRQ_STATUS_CHRIN_SHORT, + MT6332_IRQ_STATUS_DRVCDT_SHORT, + MT6332_IRQ_STATUS_PLUG_IN_FLASH, + MT6332_IRQ_STATUS_CHRWDT_FLAG, + MT6332_IRQ_STATUS_FLASH_EN_TIMEOUT, + MT6332_IRQ_STATUS_FLASH_VLED1_SHORT, + MT6332_IRQ_STATUS_FLASH_VLED1_OPEN = 13, + MT6332_IRQ_STATUS_OV = 16, + MT6332_IRQ_STATUS_BVALID_DET, + MT6332_IRQ_STATUS_VBATON_UNDET, + MT6332_IRQ_STATUS_CHR_PLUG_IN, + MT6332_IRQ_STATUS_CHR_PLUG_OUT, + MT6332_IRQ_STATUS_BC11_TIMEOUT, + MT6332_IRQ_STATUS_FLASH_VLED2_SHORT, + MT6332_IRQ_STATUS_FLASH_VLED2_OPEN = 23, + MT6332_IRQ_STATUS_THR_H = 32, + MT6332_IRQ_STATUS_THR_L, + MT6332_IRQ_STATUS_BAT_H, + MT6332_IRQ_STATUS_BAT_L, + MT6332_IRQ_STATUS_M3_H, + MT6332_IRQ_STATUS_M3_L, + MT6332_IRQ_STATUS_FG_BAT_H, + MT6332_IRQ_STATUS_FG_BAT_L, + MT6332_IRQ_STATUS_FG_CUR_H, + MT6332_IRQ_STATUS_FG_CUR_L, + MT6332_IRQ_STATUS_SPKL_D, + MT6332_IRQ_STATUS_SPKL_AB, + MT6332_IRQ_STATUS_BIF, + MT6332_IRQ_STATUS_VWLED_OC = 45, + MT6332_IRQ_STATUS_VDRAM_OC = 48, + MT6332_IRQ_STATUS_VDVFS2_OC, + MT6332_IRQ_STATUS_VRF1_OC, + MT6332_IRQ_STATUS_VRF2_OC, + MT6332_IRQ_STATUS_VPA_OC, + MT6332_IRQ_STATUS_VSBST_OC, + MT6332_IRQ_STATUS_LDO_OC, + MT6332_IRQ_STATUS_NR, +}; + +#define MT6332_IRQ_CON0_BASE MT6332_IRQ_STATUS_CHR_COMPLETE +#define MT6332_IRQ_CON0_BITS (MT6332_IRQ_STATUS_FLASH_VLED1_OPEN + 1) +#define MT6332_IRQ_CON1_BASE MT6332_IRQ_STATUS_OV +#define MT6332_IRQ_CON1_BITS (MT6332_IRQ_STATUS_FLASH_VLED2_OPEN - MT6332_IRQ_STATUS_OV + 1) +#define MT6332_IRQ_CON2_BASE MT6332_IRQ_STATUS_THR_H +#define MT6332_IRQ_CON2_BITS (MT6332_IRQ_STATUS_VWLED_OC - MT6332_IRQ_STATUS_THR_H + 1) +#define MT6332_IRQ_CON3_BASE MT6332_IRQ_STATUS_VDRAM_OC +#define MT6332_IRQ_CON3_BITS (MT6332_IRQ_STATUS_LDO_OC - MT6332_IRQ_STATUS_VDRAM_OC + 1) + +#endif /* __MFD_MT6332_CORE_H__ */ diff --git a/include/linux/mfd/mt6332/registers.h b/include/linux/mfd/mt6332/registers.h new file mode 100644 index 000000000000..65e0b86fceac --- /dev/null +++ b/include/linux/mfd/mt6332/registers.h @@ -0,0 +1,642 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 AngeloGioacchino Del Regno + */ + +#ifndef __MFD_MT6332_REGISTERS_H__ +#define __MFD_MT6332_REGISTERS_H__ + +/* PMIC Registers */ +#define MT6332_HWCID 0x8000 +#define MT6332_SWCID 0x8002 +#define MT6332_TOP_CON 0x8004 +#define MT6332_DDR_VREF_AP_CON 0x8006 +#define MT6332_DDR_VREF_DQ_CON 0x8008 +#define MT6332_DDR_VREF_CA_CON 0x800A +#define MT6332_TEST_OUT 0x800C +#define MT6332_TEST_CON0 0x800E +#define MT6332_TEST_CON1 0x8010 +#define MT6332_TESTMODE_SW 0x8012 +#define MT6332_TESTMODE_ANA 0x8014 +#define MT6332_TDSEL_CON 0x8016 +#define MT6332_RDSEL_CON 0x8018 +#define MT6332_SMT_CON0 0x801A +#define MT6332_SMT_CON1 0x801C +#define MT6332_DRV_CON0 0x801E +#define MT6332_DRV_CON1 0x8020 +#define MT6332_DRV_CON2 0x8022 +#define MT6332_EN_STATUS0 0x8024 +#define MT6332_OCSTATUS0 0x8026 +#define MT6332_TOP_STATUS 0x8028 +#define MT6332_TOP_STATUS_SET 0x802A +#define MT6332_TOP_STATUS_CLR 0x802C +#define MT6332_FLASH_CON0 0x802E +#define MT6332_FLASH_CON1 0x8030 +#define MT6332_FLASH_CON2 0x8032 +#define MT6332_CORE_CON0 0x8034 +#define MT6332_CORE_CON1 0x8036 +#define MT6332_CORE_CON2 0x8038 +#define MT6332_CORE_CON3 0x803A +#define MT6332_CORE_CON4 0x803C +#define MT6332_CORE_CON5 0x803E +#define MT6332_CORE_CON6 0x8040 +#define MT6332_CORE_CON7 0x8042 +#define MT6332_CORE_CON8 0x8044 +#define MT6332_CORE_CON9 0x8046 +#define MT6332_CORE_CON10 0x8048 +#define MT6332_CORE_CON11 0x804A +#define MT6332_CORE_CON12 0x804C +#define MT6332_CORE_CON13 0x804E +#define MT6332_CORE_CON14 0x8050 +#define MT6332_CORE_CON15 0x8052 +#define MT6332_STA_CON0 0x8054 +#define MT6332_STA_CON1 0x8056 +#define MT6332_STA_CON2 0x8058 +#define MT6332_STA_CON3 0x805A +#define MT6332_STA_CON4 0x805C +#define MT6332_STA_CON5 0x805E +#define MT6332_STA_CON6 0x8060 +#define MT6332_STA_CON7 0x8062 +#define MT6332_CHR_CON0 0x8064 +#define MT6332_CHR_CON1 0x8066 +#define MT6332_CHR_CON2 0x8068 +#define MT6332_CHR_CON3 0x806A +#define MT6332_CHR_CON4 0x806C +#define MT6332_CHR_CON5 0x806E +#define MT6332_CHR_CON6 0x8070 +#define MT6332_CHR_CON7 0x8072 +#define MT6332_CHR_CON8 0x8074 +#define MT6332_CHR_CON9 0x8076 +#define MT6332_CHR_CON10 0x8078 +#define MT6332_CHR_CON11 0x807A +#define MT6332_CHR_CON12 0x807C +#define MT6332_CHR_CON13 0x807E +#define MT6332_CHR_CON14 0x8080 +#define MT6332_CHR_CON15 0x8082 +#define MT6332_BOOST_CON0 0x8084 +#define MT6332_BOOST_CON1 0x8086 +#define MT6332_BOOST_CON2 0x8088 +#define MT6332_BOOST_CON3 0x808A +#define MT6332_BOOST_CON4 0x808C +#define MT6332_BOOST_CON5 0x808E +#define MT6332_BOOST_CON6 0x8090 +#define MT6332_BOOST_CON7 0x8092 +#define MT6332_TOP_CKPDN_CON0 0x8094 +#define MT6332_TOP_CKPDN_CON0_SET 0x8096 +#define MT6332_TOP_CKPDN_CON0_CLR 0x8098 +#define MT6332_TOP_CKPDN_CON1 0x809A +#define MT6332_TOP_CKPDN_CON1_SET 0x809C +#define MT6332_TOP_CKPDN_CON1_CLR 0x809E +#define MT6332_TOP_CKPDN_CON2 0x80A0 +#define MT6332_TOP_CKPDN_CON2_SET 0x80A2 +#define MT6332_TOP_CKPDN_CON2_CLR 0x80A4 +#define MT6332_TOP_CKSEL_CON0 0x80A6 +#define MT6332_TOP_CKSEL_CON0_SET 0x80A8 +#define MT6332_TOP_CKSEL_CON0_CLR 0x80AA +#define MT6332_TOP_CKSEL_CON1 0x80AC +#define MT6332_TOP_CKSEL_CON1_SET 0x80AE +#define MT6332_TOP_CKSEL_CON1_CLR 0x80B0 +#define MT6332_TOP_CKHWEN_CON 0x80B2 +#define MT6332_TOP_CKHWEN_CON_SET 0x80B4 +#define MT6332_TOP_CKHWEN_CON_CLR 0x80B6 +#define MT6332_TOP_CKTST_CON0 0x80B8 +#define MT6332_TOP_CKTST_CON1 0x80BA +#define MT6332_TOP_RST_CON 0x80BC +#define MT6332_TOP_RST_CON_SET 0x80BE +#define MT6332_TOP_RST_CON_CLR 0x80C0 +#define MT6332_TOP_RST_MISC 0x80C2 +#define MT6332_TOP_RST_MISC_SET 0x80C4 +#define MT6332_TOP_RST_MISC_CLR 0x80C6 +#define MT6332_INT_CON0 0x80C8 +#define MT6332_INT_CON0_SET 0x80CA +#define MT6332_INT_CON0_CLR 0x80CC +#define MT6332_INT_CON1 0x80CE +#define MT6332_INT_CON1_SET 0x80D0 +#define MT6332_INT_CON1_CLR 0x80D2 +#define MT6332_INT_CON2 0x80D4 +#define MT6332_INT_CON2_SET 0x80D6 +#define MT6332_INT_CON2_CLR 0x80D8 +#define MT6332_INT_CON3 0x80DA +#define MT6332_INT_CON3_SET 0x80DC +#define MT6332_INT_CON3_CLR 0x80DE +#define MT6332_CHRWDT_CON0 0x80E0 +#define MT6332_CHRWDT_STATUS0 0x80E2 +#define MT6332_INT_STATUS0 0x80E4 +#define MT6332_INT_STATUS1 0x80E6 +#define MT6332_INT_STATUS2 0x80E8 +#define MT6332_INT_STATUS3 0x80EA +#define MT6332_OC_GEAR_0 0x80EC +#define MT6332_OC_GEAR_1 0x80EE +#define MT6332_OC_GEAR_2 0x80F0 +#define MT6332_INT_MISC_CON 0x80F2 +#define MT6332_RG_SPI_CON 0x80F4 +#define MT6332_DEW_DIO_EN 0x80F6 +#define MT6332_DEW_READ_TEST 0x80F8 +#define MT6332_DEW_WRITE_TEST 0x80FA +#define MT6332_DEW_CRC_SWRST 0x80FC +#define MT6332_DEW_CRC_EN 0x80FE +#define MT6332_DEW_CRC_VAL 0x8100 +#define MT6332_DEW_DBG_MON_SEL 0x8102 +#define MT6332_DEW_CIPHER_KEY_SEL 0x8104 +#define MT6332_DEW_CIPHER_IV_SEL 0x8106 +#define MT6332_DEW_CIPHER_EN 0x8108 +#define MT6332_DEW_CIPHER_RDY 0x810A +#define MT6332_DEW_CIPHER_MODE 0x810C +#define MT6332_DEW_CIPHER_SWRST 0x810E +#define MT6332_DEW_RDDMY_NO 0x8110 +#define MT6332_INT_STA 0x8112 +#define MT6332_BIF_CON0 0x8114 +#define MT6332_BIF_CON1 0x8116 +#define MT6332_BIF_CON2 0x8118 +#define MT6332_BIF_CON3 0x811A +#define MT6332_BIF_CON4 0x811C +#define MT6332_BIF_CON5 0x811E +#define MT6332_BIF_CON6 0x8120 +#define MT6332_BIF_CON7 0x8122 +#define MT6332_BIF_CON8 0x8124 +#define MT6332_BIF_CON9 0x8126 +#define MT6332_BIF_CON10 0x8128 +#define MT6332_BIF_CON11 0x812A +#define MT6332_BIF_CON12 0x812C +#define MT6332_BIF_CON13 0x812E +#define MT6332_BIF_CON14 0x8130 +#define MT6332_BIF_CON15 0x8132 +#define MT6332_BIF_CON16 0x8134 +#define MT6332_BIF_CON17 0x8136 +#define MT6332_BIF_CON18 0x8138 +#define MT6332_BIF_CON19 0x813A +#define MT6332_BIF_CON20 0x813C +#define MT6332_BIF_CON21 0x813E +#define MT6332_BIF_CON22 0x8140 +#define MT6332_BIF_CON23 0x8142 +#define MT6332_BIF_CON24 0x8144 +#define MT6332_BIF_CON25 0x8146 +#define MT6332_BIF_CON26 0x8148 +#define MT6332_BIF_CON27 0x814A +#define MT6332_BIF_CON28 0x814C +#define MT6332_BIF_CON29 0x814E +#define MT6332_BIF_CON30 0x8150 +#define MT6332_BIF_CON31 0x8152 +#define MT6332_BIF_CON32 0x8154 +#define MT6332_BIF_CON33 0x8156 +#define MT6332_BIF_CON34 0x8158 +#define MT6332_BIF_CON35 0x815A +#define MT6332_BIF_CON36 0x815C +#define MT6332_BATON_CON0 0x815E +#define MT6332_BIF_CON37 0x8160 +#define MT6332_BIF_CON38 0x8162 +#define MT6332_CHR_CON16 0x8164 +#define MT6332_CHR_CON17 0x8166 +#define MT6332_CHR_CON18 0x8168 +#define MT6332_CHR_CON19 0x816A +#define MT6332_CHR_CON20 0x816C +#define MT6332_CHR_CON21 0x816E +#define MT6332_CHR_CON22 0x8170 +#define MT6332_CHR_CON23 0x8172 +#define MT6332_CHR_CON24 0x8174 +#define MT6332_CHR_CON25 0x8176 +#define MT6332_STA_CON8 0x8178 +#define MT6332_BUCK_ALL_CON0 0x8400 +#define MT6332_BUCK_ALL_CON1 0x8402 +#define MT6332_BUCK_ALL_CON2 0x8404 +#define MT6332_BUCK_ALL_CON3 0x8406 +#define MT6332_BUCK_ALL_CON4 0x8408 +#define MT6332_BUCK_ALL_CON5 0x840A +#define MT6332_BUCK_ALL_CON6 0x840C +#define MT6332_BUCK_ALL_CON7 0x840E +#define MT6332_BUCK_ALL_CON8 0x8410 +#define MT6332_BUCK_ALL_CON9 0x8412 +#define MT6332_BUCK_ALL_CON10 0x8414 +#define MT6332_BUCK_ALL_CON11 0x8416 +#define MT6332_BUCK_ALL_CON12 0x8418 +#define MT6332_BUCK_ALL_CON13 0x841A +#define MT6332_BUCK_ALL_CON14 0x841C +#define MT6332_BUCK_ALL_CON15 0x841E +#define MT6332_BUCK_ALL_CON16 0x8420 +#define MT6332_BUCK_ALL_CON17 0x8422 +#define MT6332_BUCK_ALL_CON18 0x8424 +#define MT6332_BUCK_ALL_CON19 0x8426 +#define MT6332_BUCK_ALL_CON20 0x8428 +#define MT6332_BUCK_ALL_CON21 0x842A +#define MT6332_BUCK_ALL_CON22 0x842C +#define MT6332_BUCK_ALL_CON23 0x842E +#define MT6332_BUCK_ALL_CON24 0x8430 +#define MT6332_BUCK_ALL_CON25 0x8432 +#define MT6332_BUCK_ALL_CON26 0x8434 +#define MT6332_BUCK_ALL_CON27 0x8436 +#define MT6332_VDRAM_CON0 0x8438 +#define MT6332_VDRAM_CON1 0x843A +#define MT6332_VDRAM_CON2 0x843C +#define MT6332_VDRAM_CON3 0x843E +#define MT6332_VDRAM_CON4 0x8440 +#define MT6332_VDRAM_CON5 0x8442 +#define MT6332_VDRAM_CON6 0x8444 +#define MT6332_VDRAM_CON7 0x8446 +#define MT6332_VDRAM_CON8 0x8448 +#define MT6332_VDRAM_CON9 0x844A +#define MT6332_VDRAM_CON10 0x844C +#define MT6332_VDRAM_CON11 0x844E +#define MT6332_VDRAM_CON12 0x8450 +#define MT6332_VDRAM_CON13 0x8452 +#define MT6332_VDRAM_CON14 0x8454 +#define MT6332_VDRAM_CON15 0x8456 +#define MT6332_VDRAM_CON16 0x8458 +#define MT6332_VDRAM_CON17 0x845A +#define MT6332_VDRAM_CON18 0x845C +#define MT6332_VDRAM_CON19 0x845E +#define MT6332_VDRAM_CON20 0x8460 +#define MT6332_VDRAM_CON21 0x8462 +#define MT6332_VDVFS2_CON0 0x8464 +#define MT6332_VDVFS2_CON1 0x8466 +#define MT6332_VDVFS2_CON2 0x8468 +#define MT6332_VDVFS2_CON3 0x846A +#define MT6332_VDVFS2_CON4 0x846C +#define MT6332_VDVFS2_CON5 0x846E +#define MT6332_VDVFS2_CON6 0x8470 +#define MT6332_VDVFS2_CON7 0x8472 +#define MT6332_VDVFS2_CON8 0x8474 +#define MT6332_VDVFS2_CON9 0x8476 +#define MT6332_VDVFS2_CON10 0x8478 +#define MT6332_VDVFS2_CON11 0x847A +#define MT6332_VDVFS2_CON12 0x847C +#define MT6332_VDVFS2_CON13 0x847E +#define MT6332_VDVFS2_CON14 0x8480 +#define MT6332_VDVFS2_CON15 0x8482 +#define MT6332_VDVFS2_CON16 0x8484 +#define MT6332_VDVFS2_CON17 0x8486 +#define MT6332_VDVFS2_CON18 0x8488 +#define MT6332_VDVFS2_CON19 0x848A +#define MT6332_VDVFS2_CON20 0x848C +#define MT6332_VDVFS2_CON21 0x848E +#define MT6332_VDVFS2_CON22 0x8490 +#define MT6332_VDVFS2_CON23 0x8492 +#define MT6332_VDVFS2_CON24 0x8494 +#define MT6332_VDVFS2_CON25 0x8496 +#define MT6332_VDVFS2_CON26 0x8498 +#define MT6332_VDVFS2_CON27 0x849A +#define MT6332_VRF1_CON0 0x849C +#define MT6332_VRF1_CON1 0x849E +#define MT6332_VRF1_CON2 0x84A0 +#define MT6332_VRF1_CON3 0x84A2 +#define MT6332_VRF1_CON4 0x84A4 +#define MT6332_VRF1_CON5 0x84A6 +#define MT6332_VRF1_CON6 0x84A8 +#define MT6332_VRF1_CON7 0x84AA +#define MT6332_VRF1_CON8 0x84AC +#define MT6332_VRF1_CON9 0x84AE +#define MT6332_VRF1_CON10 0x84B0 +#define MT6332_VRF1_CON11 0x84B2 +#define MT6332_VRF1_CON12 0x84B4 +#define MT6332_VRF1_CON13 0x84B6 +#define MT6332_VRF1_CON14 0x84B8 +#define MT6332_VRF1_CON15 0x84BA +#define MT6332_VRF1_CON16 0x84BC +#define MT6332_VRF1_CON17 0x84BE +#define MT6332_VRF1_CON18 0x84C0 +#define MT6332_VRF1_CON19 0x84C2 +#define MT6332_VRF1_CON20 0x84C4 +#define MT6332_VRF1_CON21 0x84C6 +#define MT6332_VRF2_CON0 0x84C8 +#define MT6332_VRF2_CON1 0x84CA +#define MT6332_VRF2_CON2 0x84CC +#define MT6332_VRF2_CON3 0x84CE +#define MT6332_VRF2_CON4 0x84D0 +#define MT6332_VRF2_CON5 0x84D2 +#define MT6332_VRF2_CON6 0x84D4 +#define MT6332_VRF2_CON7 0x84D6 +#define MT6332_VRF2_CON8 0x84D8 +#define MT6332_VRF2_CON9 0x84DA +#define MT6332_VRF2_CON10 0x84DC +#define MT6332_VRF2_CON11 0x84DE +#define MT6332_VRF2_CON12 0x84E0 +#define MT6332_VRF2_CON13 0x84E2 +#define MT6332_VRF2_CON14 0x84E4 +#define MT6332_VRF2_CON15 0x84E6 +#define MT6332_VRF2_CON16 0x84E8 +#define MT6332_VRF2_CON17 0x84EA +#define MT6332_VRF2_CON18 0x84EC +#define MT6332_VRF2_CON19 0x84EE +#define MT6332_VRF2_CON20 0x84F0 +#define MT6332_VRF2_CON21 0x84F2 +#define MT6332_VPA_CON0 0x84F4 +#define MT6332_VPA_CON1 0x84F6 +#define MT6332_VPA_CON2 0x84F8 +#define MT6332_VPA_CON3 0x84FC +#define MT6332_VPA_CON4 0x84FE +#define MT6332_VPA_CON5 0x8500 +#define MT6332_VPA_CON6 0x8502 +#define MT6332_VPA_CON7 0x8504 +#define MT6332_VPA_CON8 0x8506 +#define MT6332_VPA_CON9 0x8508 +#define MT6332_VPA_CON10 0x850A +#define MT6332_VPA_CON11 0x850C +#define MT6332_VPA_CON12 0x850E +#define MT6332_VPA_CON13 0x8510 +#define MT6332_VPA_CON14 0x8512 +#define MT6332_VPA_CON15 0x8514 +#define MT6332_VPA_CON16 0x8516 +#define MT6332_VPA_CON17 0x8518 +#define MT6332_VPA_CON18 0x851A +#define MT6332_VPA_CON19 0x851C +#define MT6332_VPA_CON20 0x851E +#define MT6332_VPA_CON21 0x8520 +#define MT6332_VPA_CON22 0x8522 +#define MT6332_VPA_CON23 0x8524 +#define MT6332_VPA_CON24 0x8526 +#define MT6332_VPA_CON25 0x8528 +#define MT6332_VSBST_CON0 0x852A +#define MT6332_VSBST_CON1 0x852C +#define MT6332_VSBST_CON2 0x852E +#define MT6332_VSBST_CON3 0x8530 +#define MT6332_VSBST_CON4 0x8532 +#define MT6332_VSBST_CON5 0x8534 +#define MT6332_VSBST_CON6 0x8536 +#define MT6332_VSBST_CON7 0x8538 +#define MT6332_VSBST_CON8 0x853A +#define MT6332_VSBST_CON9 0x853C +#define MT6332_VSBST_CON10 0x853E +#define MT6332_VSBST_CON11 0x8540 +#define MT6332_VSBST_CON12 0x8542 +#define MT6332_VSBST_CON13 0x8544 +#define MT6332_VSBST_CON14 0x8546 +#define MT6332_VSBST_CON15 0x8548 +#define MT6332_VSBST_CON16 0x854A +#define MT6332_VSBST_CON17 0x854C +#define MT6332_VSBST_CON18 0x854E +#define MT6332_VSBST_CON19 0x8550 +#define MT6332_VSBST_CON20 0x8552 +#define MT6332_VSBST_CON21 0x8554 +#define MT6332_BUCK_K_CON0 0x8556 +#define MT6332_BUCK_K_CON1 0x8558 +#define MT6332_BUCK_K_CON2 0x855A +#define MT6332_BUCK_K_CON3 0x855C +#define MT6332_BUCK_K_CON4 0x855E +#define MT6332_BUCK_K_CON5 0x8560 +#define MT6332_AUXADC_ADC0 0x8800 +#define MT6332_AUXADC_ADC1 0x8802 +#define MT6332_AUXADC_ADC2 0x8804 +#define MT6332_AUXADC_ADC3 0x8806 +#define MT6332_AUXADC_ADC4 0x8808 +#define MT6332_AUXADC_ADC5 0x880A +#define MT6332_AUXADC_ADC6 0x880C +#define MT6332_AUXADC_ADC7 0x880E +#define MT6332_AUXADC_ADC8 0x8810 +#define MT6332_AUXADC_ADC9 0x8812 +#define MT6332_AUXADC_ADC10 0x8814 +#define MT6332_AUXADC_ADC11 0x8816 +#define MT6332_AUXADC_ADC12 0x8818 +#define MT6332_AUXADC_ADC13 0x881A +#define MT6332_AUXADC_ADC14 0x881C +#define MT6332_AUXADC_ADC15 0x881E +#define MT6332_AUXADC_ADC16 0x8820 +#define MT6332_AUXADC_ADC17 0x8822 +#define MT6332_AUXADC_ADC18 0x8824 +#define MT6332_AUXADC_ADC19 0x8826 +#define MT6332_AUXADC_ADC20 0x8828 +#define MT6332_AUXADC_ADC21 0x882A +#define MT6332_AUXADC_ADC22 0x882C +#define MT6332_AUXADC_ADC23 0x882E +#define MT6332_AUXADC_ADC24 0x8830 +#define MT6332_AUXADC_ADC25 0x8832 +#define MT6332_AUXADC_ADC26 0x8834 +#define MT6332_AUXADC_ADC27 0x8836 +#define MT6332_AUXADC_ADC28 0x8838 +#define MT6332_AUXADC_ADC29 0x883A +#define MT6332_AUXADC_ADC30 0x883C +#define MT6332_AUXADC_ADC31 0x883E +#define MT6332_AUXADC_ADC32 0x8840 +#define MT6332_AUXADC_ADC33 0x8842 +#define MT6332_AUXADC_ADC34 0x8844 +#define MT6332_AUXADC_ADC35 0x8846 +#define MT6332_AUXADC_ADC36 0x8848 +#define MT6332_AUXADC_ADC37 0x884A +#define MT6332_AUXADC_ADC38 0x884C +#define MT6332_AUXADC_ADC39 0x884E +#define MT6332_AUXADC_ADC40 0x8850 +#define MT6332_AUXADC_ADC41 0x8852 +#define MT6332_AUXADC_ADC42 0x8854 +#define MT6332_AUXADC_ADC43 0x8856 +#define MT6332_AUXADC_STA0 0x8858 +#define MT6332_AUXADC_STA1 0x885A +#define MT6332_AUXADC_RQST0 0x885C +#define MT6332_AUXADC_RQST0_SET 0x885E +#define MT6332_AUXADC_RQST0_CLR 0x8860 +#define MT6332_AUXADC_RQST1 0x8862 +#define MT6332_AUXADC_RQST1_SET 0x8864 +#define MT6332_AUXADC_RQST1_CLR 0x8866 +#define MT6332_AUXADC_CON0 0x8868 +#define MT6332_AUXADC_CON1 0x886A +#define MT6332_AUXADC_CON2 0x886C +#define MT6332_AUXADC_CON3 0x886E +#define MT6332_AUXADC_CON4 0x8870 +#define MT6332_AUXADC_CON5 0x8872 +#define MT6332_AUXADC_CON6 0x8874 +#define MT6332_AUXADC_CON7 0x8876 +#define MT6332_AUXADC_CON8 0x8878 +#define MT6332_AUXADC_CON9 0x887A +#define MT6332_AUXADC_CON10 0x887C +#define MT6332_AUXADC_CON11 0x887E +#define MT6332_AUXADC_CON12 0x8880 +#define MT6332_AUXADC_CON13 0x8882 +#define MT6332_AUXADC_CON14 0x8884 +#define MT6332_AUXADC_CON15 0x8886 +#define MT6332_AUXADC_CON16 0x8888 +#define MT6332_AUXADC_CON17 0x888A +#define MT6332_AUXADC_CON18 0x888C +#define MT6332_AUXADC_CON19 0x888E +#define MT6332_AUXADC_CON20 0x8890 +#define MT6332_AUXADC_CON21 0x8892 +#define MT6332_AUXADC_CON22 0x8894 +#define MT6332_AUXADC_CON23 0x8896 +#define MT6332_AUXADC_CON24 0x8898 +#define MT6332_AUXADC_CON25 0x889A +#define MT6332_AUXADC_CON26 0x889C +#define MT6332_AUXADC_CON27 0x889E +#define MT6332_AUXADC_CON28 0x88A0 +#define MT6332_AUXADC_CON29 0x88A2 +#define MT6332_AUXADC_CON30 0x88A4 +#define MT6332_AUXADC_CON31 0x88A6 +#define MT6332_AUXADC_CON32 0x88A8 +#define MT6332_AUXADC_CON33 0x88AA +#define MT6332_AUXADC_CON34 0x88AC +#define MT6332_AUXADC_CON35 0x88AE +#define MT6332_AUXADC_CON36 0x88B0 +#define MT6332_AUXADC_CON37 0x88B2 +#define MT6332_AUXADC_CON38 0x88B4 +#define MT6332_AUXADC_CON39 0x88B6 +#define MT6332_AUXADC_CON40 0x88B8 +#define MT6332_AUXADC_CON41 0x88BA +#define MT6332_AUXADC_CON42 0x88BC +#define MT6332_AUXADC_CON43 0x88BE +#define MT6332_AUXADC_CON44 0x88C0 +#define MT6332_AUXADC_CON45 0x88C2 +#define MT6332_AUXADC_CON46 0x88C4 +#define MT6332_AUXADC_CON47 0x88C6 +#define MT6332_STRUP_CONA0 0x8C00 +#define MT6332_STRUP_CONA1 0x8C02 +#define MT6332_STRUP_CONA2 0x8C04 +#define MT6332_STRUP_CON0 0x8C06 +#define MT6332_STRUP_CON2 0x8C08 +#define MT6332_STRUP_CON3 0x8C0A +#define MT6332_STRUP_CON4 0x8C0C +#define MT6332_STRUP_CON5 0x8C0E +#define MT6332_STRUP_CON6 0x8C10 +#define MT6332_STRUP_CON7 0x8C12 +#define MT6332_STRUP_CON8 0x8C14 +#define MT6332_STRUP_CON9 0x8C16 +#define MT6332_STRUP_CON10 0x8C18 +#define MT6332_STRUP_CON11 0x8C1A +#define MT6332_STRUP_CON12 0x8C1C +#define MT6332_STRUP_CON13 0x8C1E +#define MT6332_STRUP_CON14 0x8C20 +#define MT6332_STRUP_CON15 0x8C22 +#define MT6332_STRUP_CON16 0x8C24 +#define MT6332_STRUP_CON17 0x8C26 +#define MT6332_FGADC_CON0 0x8C28 +#define MT6332_FGADC_CON1 0x8C2A +#define MT6332_FGADC_CON2 0x8C2C +#define MT6332_FGADC_CON3 0x8C2E +#define MT6332_FGADC_CON4 0x8C30 +#define MT6332_FGADC_CON5 0x8C32 +#define MT6332_FGADC_CON6 0x8C34 +#define MT6332_FGADC_CON7 0x8C36 +#define MT6332_FGADC_CON8 0x8C38 +#define MT6332_FGADC_CON9 0x8C3A +#define MT6332_FGADC_CON10 0x8C3C +#define MT6332_FGADC_CON11 0x8C3E +#define MT6332_FGADC_CON12 0x8C40 +#define MT6332_FGADC_CON13 0x8C42 +#define MT6332_FGADC_CON14 0x8C44 +#define MT6332_FGADC_CON15 0x8C46 +#define MT6332_FGADC_CON16 0x8C48 +#define MT6332_FGADC_CON17 0x8C4A +#define MT6332_FGADC_CON18 0x8C4C +#define MT6332_FGADC_CON19 0x8C4E +#define MT6332_FGADC_CON20 0x8C50 +#define MT6332_FGADC_CON21 0x8C52 +#define MT6332_FGADC_CON22 0x8C54 +#define MT6332_OTP_CON0 0x8C56 +#define MT6332_OTP_CON1 0x8C58 +#define MT6332_OTP_CON2 0x8C5A +#define MT6332_OTP_CON3 0x8C5C +#define MT6332_OTP_CON4 0x8C5E +#define MT6332_OTP_CON5 0x8C60 +#define MT6332_OTP_CON6 0x8C62 +#define MT6332_OTP_CON7 0x8C64 +#define MT6332_OTP_CON8 0x8C66 +#define MT6332_OTP_CON9 0x8C68 +#define MT6332_OTP_CON10 0x8C6A +#define MT6332_OTP_CON11 0x8C6C +#define MT6332_OTP_CON12 0x8C6E +#define MT6332_OTP_CON13 0x8C70 +#define MT6332_OTP_CON14 0x8C72 +#define MT6332_OTP_DOUT_0_15 0x8C74 +#define MT6332_OTP_DOUT_16_31 0x8C76 +#define MT6332_OTP_DOUT_32_47 0x8C78 +#define MT6332_OTP_DOUT_48_63 0x8C7A +#define MT6332_OTP_DOUT_64_79 0x8C7C +#define MT6332_OTP_DOUT_80_95 0x8C7E +#define MT6332_OTP_DOUT_96_111 0x8C80 +#define MT6332_OTP_DOUT_112_127 0x8C82 +#define MT6332_OTP_DOUT_128_143 0x8C84 +#define MT6332_OTP_DOUT_144_159 0x8C86 +#define MT6332_OTP_DOUT_160_175 0x8C88 +#define MT6332_OTP_DOUT_176_191 0x8C8A +#define MT6332_OTP_DOUT_192_207 0x8C8C +#define MT6332_OTP_DOUT_208_223 0x8C8E +#define MT6332_OTP_DOUT_224_239 0x8C90 +#define MT6332_OTP_DOUT_240_255 0x8C92 +#define MT6332_OTP_VAL_0_15 0x8C94 +#define MT6332_OTP_VAL_16_31 0x8C96 +#define MT6332_OTP_VAL_32_47 0x8C98 +#define MT6332_OTP_VAL_48_63 0x8C9A +#define MT6332_OTP_VAL_64_79 0x8C9C +#define MT6332_OTP_VAL_80_95 0x8C9E +#define MT6332_OTP_VAL_96_111 0x8CA0 +#define MT6332_OTP_VAL_112_127 0x8CA2 +#define MT6332_OTP_VAL_128_143 0x8CA4 +#define MT6332_OTP_VAL_144_159 0x8CA6 +#define MT6332_OTP_VAL_160_175 0x8CA8 +#define MT6332_OTP_VAL_176_191 0x8CAA +#define MT6332_OTP_VAL_192_207 0x8CAC +#define MT6332_OTP_VAL_208_223 0x8CAE +#define MT6332_OTP_VAL_224_239 0x8CB0 +#define MT6332_OTP_VAL_240_255 0x8CB2 +#define MT6332_LDO_CON0 0x8CB4 +#define MT6332_LDO_CON1 0x8CB6 +#define MT6332_LDO_CON2 0x8CB8 +#define MT6332_LDO_CON3 0x8CBA +#define MT6332_LDO_CON5 0x8CBC +#define MT6332_LDO_CON6 0x8CBE +#define MT6332_LDO_CON7 0x8CC0 +#define MT6332_LDO_CON8 0x8CC2 +#define MT6332_LDO_CON9 0x8CC4 +#define MT6332_LDO_CON10 0x8CC6 +#define MT6332_LDO_CON11 0x8CC8 +#define MT6332_LDO_CON12 0x8CCA +#define MT6332_LDO_CON13 0x8CCC +#define MT6332_FQMTR_CON0 0x8CCE +#define MT6332_FQMTR_CON1 0x8CD0 +#define MT6332_FQMTR_CON2 0x8CD2 +#define MT6332_IWLED_CON0 0x8CD4 +#define MT6332_IWLED_DEG 0x8CD6 +#define MT6332_IWLED_STATUS 0x8CD8 +#define MT6332_IWLED_EN_CTRL 0x8CDA +#define MT6332_IWLED_CON1 0x8CDC +#define MT6332_IWLED_CON2 0x8CDE +#define MT6332_IWLED_TRIM0 0x8CE0 +#define MT6332_IWLED_TRIM1 0x8CE2 +#define MT6332_IWLED_CON3 0x8CE4 +#define MT6332_IWLED_CON4 0x8CE6 +#define MT6332_IWLED_CON5 0x8CE8 +#define MT6332_IWLED_CON6 0x8CEA +#define MT6332_IWLED_CON7 0x8CEC +#define MT6332_IWLED_CON8 0x8CEE +#define MT6332_IWLED_CON9 0x8CF0 +#define MT6332_SPK_CON0 0x8CF2 +#define MT6332_SPK_CON1 0x8CF4 +#define MT6332_SPK_CON2 0x8CF6 +#define MT6332_SPK_CON3 0x8CF8 +#define MT6332_SPK_CON4 0x8CFA +#define MT6332_SPK_CON5 0x8CFC +#define MT6332_SPK_CON6 0x8CFE +#define MT6332_SPK_CON7 0x8D00 +#define MT6332_SPK_CON8 0x8D02 +#define MT6332_SPK_CON9 0x8D04 +#define MT6332_SPK_CON10 0x8D06 +#define MT6332_SPK_CON11 0x8D08 +#define MT6332_SPK_CON12 0x8D0A +#define MT6332_SPK_CON13 0x8D0C +#define MT6332_SPK_CON14 0x8D0E +#define MT6332_SPK_CON15 0x8D10 +#define MT6332_SPK_CON16 0x8D12 +#define MT6332_TESTI_CON0 0x8D14 +#define MT6332_TESTI_CON1 0x8D16 +#define MT6332_TESTI_CON2 0x8D18 +#define MT6332_TESTI_CON3 0x8D1A +#define MT6332_TESTI_CON4 0x8D1C +#define MT6332_TESTI_CON5 0x8D1E +#define MT6332_TESTI_CON6 0x8D20 +#define MT6332_TESTI_MUX_CON0 0x8D22 +#define MT6332_TESTI_MUX_CON1 0x8D24 +#define MT6332_TESTI_MUX_CON2 0x8D26 +#define MT6332_TESTI_MUX_CON3 0x8D28 +#define MT6332_TESTI_MUX_CON4 0x8D2A +#define MT6332_TESTI_MUX_CON5 0x8D2C +#define MT6332_TESTI_MUX_CON6 0x8D2E +#define MT6332_TESTO_CON0 0x8D30 +#define MT6332_TESTO_CON1 0x8D32 +#define MT6332_TEST_OMUX_CON0 0x8D34 +#define MT6332_TEST_OMUX_CON1 0x8D36 +#define MT6332_DEBUG_CON0 0x8D38 +#define MT6332_DEBUG_CON1 0x8D3A +#define MT6332_DEBUG_CON2 0x8D3C +#define MT6332_FGADC_CON23 0x8D3E +#define MT6332_FGADC_CON24 0x8D40 +#define MT6332_FGADC_CON25 0x8D42 +#define MT6332_TOP_RST_STATUS 0x8D44 +#define MT6332_TOP_RST_STATUS_SET 0x8D46 +#define MT6332_TOP_RST_STATUS_CLR 0x8D48 +#define MT6332_VDVFS2_CON28 0x8D4A + +#endif /* __MFD_MT6332_REGISTERS_H__ */ diff --git a/include/linux/mfd/mt6397/core.h b/include/linux/mfd/mt6397/core.h index 3fecaffe5019..627487e26287 100644 --- a/include/linux/mfd/mt6397/core.h +++ b/include/linux/mfd/mt6397/core.h @@ -12,6 +12,8 @@ enum chip_id { MT6323_CHIP_ID = 0x23, + MT6331_CHIP_ID = 0x20, + MT6332_CHIP_ID = 0x20, MT6357_CHIP_ID = 0x57, MT6358_CHIP_ID = 0x58, MT6359_CHIP_ID = 0x59, -- cgit v1.2.3 From 32f02a211b0a192553075803b0b819027f96bb43 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 19 Jul 2022 13:57:48 +0200 Subject: Revert "platform/chrome: Add Type-C mux set command definitions" This reverts commit 28a6ed8e39f77f6ac613ec9b7461aa75e85fa79a. The chrome platform driver changes need to come in through the platform tree due to some api changes that showed up there that cause build errors in linux-next Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20220719160821.5e68e30b@oak.ozlabs.ibm.com Cc: Prashant Malani Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/cros_ec_commands.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index a3945c5e7f50..8cfa8cfca77e 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -5722,21 +5722,8 @@ enum typec_control_command { TYPEC_CONTROL_COMMAND_EXIT_MODES, TYPEC_CONTROL_COMMAND_CLEAR_EVENTS, TYPEC_CONTROL_COMMAND_ENTER_MODE, - TYPEC_CONTROL_COMMAND_TBT_UFP_REPLY, - TYPEC_CONTROL_COMMAND_USB_MUX_SET, }; -/* Replies the AP may specify to the TBT EnterMode command as a UFP */ -enum typec_tbt_ufp_reply { - TYPEC_TBT_UFP_REPLY_NAK, - TYPEC_TBT_UFP_REPLY_ACK, -}; - -struct typec_usb_mux_set { - uint8_t mux_index; /* Index of the mux to set in the chain */ - uint8_t mux_flags; /* USB_PD_MUX_*-encoded USB mux state to set */ -} __ec_align1; - struct ec_params_typec_control { uint8_t port; uint8_t command; /* enum typec_control_command */ @@ -5750,8 +5737,6 @@ struct ec_params_typec_control { union { uint32_t clear_events_mask; uint8_t mode_to_enter; /* enum typec_mode */ - uint8_t tbt_ufp_reply; /* enum typec_tbt_ufp_reply */ - struct typec_usb_mux_set mux_params; uint8_t placeholder[128]; }; } __ec_align1; @@ -5830,9 +5815,6 @@ enum tcpc_cc_polarity { #define PD_STATUS_EVENT_SOP_DISC_DONE BIT(0) #define PD_STATUS_EVENT_SOP_PRIME_DISC_DONE BIT(1) #define PD_STATUS_EVENT_HARD_RESET BIT(2) -#define PD_STATUS_EVENT_DISCONNECTED BIT(3) -#define PD_STATUS_EVENT_MUX_0_SET_DONE BIT(4) -#define PD_STATUS_EVENT_MUX_1_SET_DONE BIT(5) struct ec_params_typec_status { uint8_t port; -- cgit v1.2.3 From 2b3416ceff5e6bd4922f6d1c61fb68113dd82302 Mon Sep 17 00:00:00 2001 From: Yang Xu Date: Thu, 14 Jul 2022 14:11:25 +0800 Subject: fs: add mode_strip_sgid() helper Add a dedicated helper to handle the setgid bit when creating a new file in a setgid directory. This is a preparatory patch for moving setgid stripping into the vfs. The patch contains no functional changes. Currently the setgid stripping logic is open-coded directly in inode_init_owner() and the individual filesystems are responsible for handling setgid inheritance. Since this has proven to be brittle as evidenced by old issues we uncovered over the last months (see [1] to [3] below) we will try to move this logic into the vfs. Link: e014f37db1a2 ("xfs: use setattr_copy to set vfs inode attributes") [1] Link: 01ea173e103e ("xfs: fix up non-directory creation in SGID directories") [2] Link: fd84bfdddd16 ("ceph: fix up non-directory creation in SGID directories") [3] Link: https://lore.kernel.org/r/1657779088-2242-1-git-send-email-xuyang2018.jy@fujitsu.com Reviewed-by: Darrick J. Wong Reviewed-by: Christian Brauner (Microsoft) Reviewed-and-Tested-by: Jeff Layton Signed-off-by: Yang Xu Signed-off-by: Christian Brauner (Microsoft) --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ad5e3520fae..50642668c60f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1903,6 +1903,8 @@ extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode, const struct inode *dir, umode_t mode); extern bool may_open_dev(const struct path *path); +umode_t mode_strip_sgid(struct user_namespace *mnt_userns, + const struct inode *dir, umode_t mode); /* * This is the "filldir" function type, used by readdir() to let -- cgit v1.2.3 From fd1894224407c484f652ad456e1ce423e89bb3eb Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 15 Jul 2022 19:55:59 +0800 Subject: bpf: Don't redirect packets with invalid pkt_len Syzbot found an issue [1]: fq_codel_drop() try to drop a flow whitout any skbs, that is, the flow->head is null. The root cause, as the [2] says, is because that bpf_prog_test_run_skb() run a bpf prog which redirects empty skbs. So we should determine whether the length of the packet modified by bpf prog or others like bpf_prog_test is valid before forwarding it directly. LINK: [1] https://syzkaller.appspot.com/bug?id=0b84da80c2917757915afa89f7738a9d16ec96c5 LINK: [2] https://www.spinics.net/lists/netdev/msg777503.html Reported-by: syzbot+7a12909485b94426aceb@syzkaller.appspotmail.com Signed-off-by: Zhengchao Shao Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220715115559.139691-1-shaozhengchao@huawei.com Signed-off-by: Alexei Starovoitov --- include/linux/skbuff.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f6a27ab19202..82e8368ba6e6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2459,6 +2459,14 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) #endif /* NET_SKBUFF_DATA_USES_OFFSET */ +static inline void skb_assert_len(struct sk_buff *skb) +{ +#ifdef CONFIG_DEBUG_NET + if (WARN_ONCE(!skb->len, "%s\n", __func__)) + DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); +#endif /* CONFIG_DEBUG_NET */ +} + /* * Add data to an sk_buff */ -- cgit v1.2.3 From 800d6acf40e5ce676b53a1259fb4e93e56279367 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 May 2022 17:07:45 +0200 Subject: rcu: tiny: Record kvfree_call_rcu() call stack for KASAN When running KASAN with Tiny RCU (e.g. under ARCH=um, where a working KASAN patch is now available), we don't get any information on the original kfree_rcu() (or similar) caller when a problem is reported, as Tiny RCU doesn't record this. Add the recording, which required pulling kvfree_call_rcu() out of line for the KASAN case since the recording function (kasan_record_aux_stack_noalloc) is neither exported, nor can we include kasan.h into rcutiny.h. without KASAN, the patch has no size impact (ARCH=um kernel): text data bss dec hex filename 6151515 4423154 33148520 43723189 29b29b5 linux 6151515 4423154 33148520 43723189 29b29b5 linux + patch with KASAN, the impact on my build was minimal: text data bss dec hex filename 13915539 7388050 33282304 54585893 340ea25 linux 13911266 7392114 33282304 54585684 340e954 linux + patch -4273 +4064 +-0 -209 Acked-by: Dmitry Vyukov Signed-off-by: Johannes Berg Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 5fed476f977f..d84e13f2c384 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -38,7 +38,7 @@ static inline void synchronize_rcu_expedited(void) */ extern void kvfree(const void *addr); -static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) +static inline void __kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) { if (head) { call_rcu(head, func); @@ -51,6 +51,15 @@ static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) kvfree((void *) func); } +#ifdef CONFIG_KASAN_GENERIC +void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func); +#else +static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) +{ + __kvfree_call_rcu(head, func); +} +#endif + void rcu_qs(void); static inline void rcu_softirq_qs(void) -- cgit v1.2.3 From 2e5e4185ff89ea0fc44c9dead8dccf306a3b3bbb Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 4 Jul 2022 19:34:08 +0300 Subject: net/mlx5: Expose ts_cqe_metadata_size2wqe_counter Add capability field which indicates the mask for wqe_counter which connects between loopback CQE and the original WQE. With this connection the driver can identify lost of the loopback CQE and reply PTP synchronization with timestamp given in the original CQE. Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 254cc22f5eec..51b4e71017ee 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1833,7 +1833,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 sw_vhca_id[0xe]; u8 reserved_at_230[0x10]; - u8 reserved_at_240[0x5c0]; + u8 reserved_at_240[0xb]; + u8 ts_cqe_metadata_size2wqe_counter[0x5]; + u8 reserved_at_250[0x10]; + + u8 reserved_at_260[0x5a0]; }; enum mlx5_ifc_flow_destination_type { -- cgit v1.2.3 From 7c701d92b2b5e5175dbfec875816474b802b0c45 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:29 +0100 Subject: skbuff: carry external ubuf_info in msghdr Make possible for network in-kernel callers like io_uring to pass in a custom ubuf_info by setting it in a new field of struct msghdr. Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 17311ad9f9af..7bac9fc1cee0 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -69,6 +69,7 @@ struct msghdr { unsigned int msg_flags; /* flags on received message */ __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ + struct ubuf_info *msg_ubuf; }; struct user_msghdr { -- cgit v1.2.3 From ebe73a284f4de8c5d401adeccd9b8fe3183b6e95 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 12 Jul 2022 21:52:30 +0100 Subject: net: Allow custom iter handler in msghdr Add support for custom iov_iter handling to msghdr. The idea is that in-kernel subsystems want control over how an SG is split. Signed-off-by: David Ahern [pavel: move callback into msghdr] Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 7 ++++--- include/linux/socket.h | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8e12b3b9ad6c..a8a2dd4cfdfd 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1776,13 +1776,14 @@ void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, bool success); -int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, - struct iov_iter *from, size_t length); +int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb, struct iov_iter *from, + size_t length); static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) { - return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len); + return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len); } int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, diff --git a/include/linux/socket.h b/include/linux/socket.h index 7bac9fc1cee0..3c11ef18a9cf 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -14,6 +14,8 @@ struct file; struct pid; struct cred; struct socket; +struct sock; +struct sk_buff; #define __sockaddr_check_size(size) \ BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage))) @@ -70,6 +72,8 @@ struct msghdr { __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ struct ubuf_info *msg_ubuf; + int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb, + struct iov_iter *from, size_t length); }; struct user_msghdr { -- cgit v1.2.3 From 753f1ca4e1e50248a1b760c9774d6d6b354562cc Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:31 +0100 Subject: net: introduce managed frags infrastructure Some users like io_uring can do page pinning more efficiently, so we want a way to delegate referencing to other subsystems. For that add a new flag called SKBFL_MANAGED_FRAG_REFS. When set, skb doesn't hold page references and upper layers are responsivle to managing page lifetime. It's allowed to convert skbs from managed to normal by calling skb_zcopy_downgrade_managed(). The function will take all needed page references and clear the flag. It's needed, for instance, to avoid mixing managed modes. Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a8a2dd4cfdfd..07004593d7ca 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -688,11 +688,16 @@ enum { SKBFL_PURE_ZEROCOPY = BIT(2), SKBFL_DONT_ORPHAN = BIT(3), + + /* page references are managed by the ubuf_info, so it's safe to + * use frags only up until ubuf_info is released + */ + SKBFL_MANAGED_FRAG_REFS = BIT(4), }; #define SKBFL_ZEROCOPY_FRAG (SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG) #define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY | \ - SKBFL_DONT_ORPHAN) + SKBFL_DONT_ORPHAN | SKBFL_MANAGED_FRAG_REFS) /* * The callback notifies userspace to release buffers when skb DMA is done in @@ -1810,6 +1815,11 @@ static inline bool skb_zcopy_pure(const struct sk_buff *skb) return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY; } +static inline bool skb_zcopy_managed(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->flags & SKBFL_MANAGED_FRAG_REFS; +} + static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1, const struct sk_buff *skb2) { @@ -1884,6 +1894,14 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success) } } +void __skb_zcopy_downgrade_managed(struct sk_buff *skb); + +static inline void skb_zcopy_downgrade_managed(struct sk_buff *skb) +{ + if (unlikely(skb_zcopy_managed(skb))) + __skb_zcopy_downgrade_managed(skb); +} + static inline void skb_mark_not_on_list(struct sk_buff *skb) { skb->next = NULL; @@ -3499,7 +3517,10 @@ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) */ static inline void skb_frag_unref(struct sk_buff *skb, int f) { - __skb_frag_unref(&skb_shinfo(skb)->frags[f], skb->pp_recycle); + struct skb_shared_info *shinfo = skb_shinfo(skb); + + if (!skb_zcopy_managed(skb)) + __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle); } /** -- cgit v1.2.3 From 84ce071e38a6e25ea3ea91188e5482ac1f17b3af Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:32 +0100 Subject: net: introduce __skb_fill_page_desc_noacc Managed pages contain pinned userspace pages and controlled by upper layers, there is no need in tracking skb->pfmemalloc for them. Introduce a helper for filling frags but ignoring page tracking, it'll be needed later. Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 07004593d7ca..1111adefd906 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2550,6 +2550,22 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) return skb_headlen(skb) + __skb_pagelen(skb); } +static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, + int i, struct page *page, + int off, int size) +{ + skb_frag_t *frag = &shinfo->frags[i]; + + /* + * Propagate page pfmemalloc to the skb if we can. The problem is + * that not all callers have unique ownership of the page but rely + * on page_is_pfmemalloc doing the right thing(tm). + */ + frag->bv_page = page; + frag->bv_offset = off; + skb_frag_size_set(frag, size); +} + /** * __skb_fill_page_desc - initialise a paged fragment in an skb * @skb: buffer containing fragment to be initialised @@ -2566,17 +2582,7 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - /* - * Propagate page pfmemalloc to the skb if we can. The problem is - * that not all callers have unique ownership of the page but rely - * on page_is_pfmemalloc doing the right thing(tm). - */ - frag->bv_page = page; - frag->bv_offset = off; - skb_frag_size_set(frag, size); - + __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size); page = compound_head(page); if (page_is_pfmemalloc(page)) skb->pfmemalloc = true; -- cgit v1.2.3 From e6b8a0a5e7f688e092d1c639d438ccd0b323213c Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 19 Jul 2022 13:52:44 -0700 Subject: PCI: Add vendor ID for the PCI SIG This ID is used in DOE headers to identify protocols that are defined within the PCI Express Base Specification, PCIe r6.0, sec 6.30.1.1 table 6-32. Acked-by: Bjorn Helgaas Reviewed-by: Davidlohr Bueso Reviewed-by: Dan Williams Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20220719205249.566684-2-ira.weiny@intel.com Signed-off-by: Dan Williams --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0178823ce8c2..8af3b86206b1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -151,6 +151,7 @@ #define PCI_CLASS_OTHERS 0xff /* Vendors and devices. Sort key: vendor first, device next. */ +#define PCI_VENDOR_ID_PCI_SIG 0x0001 #define PCI_VENDOR_ID_LOONGSON 0x0014 -- cgit v1.2.3 From 9d24322e887b6a3d3f9f9c3e76937a646102c8c1 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 19 Jul 2022 13:52:46 -0700 Subject: PCI/DOE: Add DOE mailbox support functions Introduced in a PCIe r6.0, sec 6.30, DOE provides a config space based mailbox with standard protocol discovery. Each mailbox is accessed through a DOE Extended Capability. Each DOE mailbox must support the DOE discovery protocol in addition to any number of additional protocols. Define core PCIe functionality to manage a single PCIe DOE mailbox at a defined config space offset. Functionality includes iterating, creating, query of supported protocol, and task submission. Destruction of the mailboxes is device managed. Cc: "Li, Ming" Cc: Bjorn Helgaas Cc: Matthew Wilcox Acked-by: Bjorn Helgaas Signed-off-by: Jonathan Cameron Co-developed-by: Ira Weiny Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20220719205249.566684-4-ira.weiny@intel.com Signed-off-by: Dan Williams --- include/linux/pci-doe.h | 77 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 include/linux/pci-doe.h (limited to 'include/linux') diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h new file mode 100644 index 000000000000..ed9b4df792b8 --- /dev/null +++ b/include/linux/pci-doe.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Data Object Exchange + * PCIe r6.0, sec 6.30 DOE + * + * Copyright (C) 2021 Huawei + * Jonathan Cameron + * + * Copyright (C) 2022 Intel Corporation + * Ira Weiny + */ + +#ifndef LINUX_PCI_DOE_H +#define LINUX_PCI_DOE_H + +struct pci_doe_protocol { + u16 vid; + u8 type; +}; + +struct pci_doe_mb; + +/** + * struct pci_doe_task - represents a single query/response + * + * @prot: DOE Protocol + * @request_pl: The request payload + * @request_pl_sz: Size of the request payload (bytes) + * @response_pl: The response payload + * @response_pl_sz: Size of the response payload (bytes) + * @rv: Return value. Length of received response or error (bytes) + * @complete: Called when task is complete + * @private: Private data for the consumer + * @work: Used internally by the mailbox + * @doe_mb: Used internally by the mailbox + * + * The payload sizes and rv are specified in bytes with the following + * restrictions concerning the protocol. + * + * 1) The request_pl_sz must be a multiple of double words (4 bytes) + * 2) The response_pl_sz must be >= a single double word (4 bytes) + * 3) rv is returned as bytes but it will be a multiple of double words + * + * NOTE there is no need for the caller to initialize work or doe_mb. + */ +struct pci_doe_task { + struct pci_doe_protocol prot; + u32 *request_pl; + size_t request_pl_sz; + u32 *response_pl; + size_t response_pl_sz; + int rv; + void (*complete)(struct pci_doe_task *task); + void *private; + + /* No need for the user to initialize these fields */ + struct work_struct work; + struct pci_doe_mb *doe_mb; +}; + +/** + * pci_doe_for_each_off - Iterate each DOE capability + * @pdev: struct pci_dev to iterate + * @off: u16 of config space offset of each mailbox capability found + */ +#define pci_doe_for_each_off(pdev, off) \ + for (off = pci_find_next_ext_capability(pdev, off, \ + PCI_EXT_CAP_ID_DOE); \ + off > 0; \ + off = pci_find_next_ext_capability(pdev, off, \ + PCI_EXT_CAP_ID_DOE)) + +struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset); +bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type); +int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task); + +#endif -- cgit v1.2.3 From 9d6794feeb90903b10c34bddd9c74c992447ce83 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 19 Jul 2022 13:52:48 -0700 Subject: driver-core: Introduce BIN_ATTR_ADMIN_{RO,RW} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many binary attributes need to limit access to CAP_SYS_ADMIN only; ie many binary attributes specify is_visible with 0400 or 0600. Make setting the permissions of such attributes more explicit by defining BIN_ATTR_ADMIN_{RO,RW}. Cc: Bjorn Helgaas Suggested-by: Dan Williams Suggested-by: Krzysztof Wilczyński Reviewed-by: Jonathan Cameron Reviewed-by: Greg Kroah-Hartman Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20220719205249.566684-6-ira.weiny@intel.com Signed-off-by: Dan Williams --- include/linux/sysfs.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index e3f1e8ac1f85..fd3fe5c8c17f 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -235,6 +235,22 @@ struct bin_attribute bin_attr_##_name = __BIN_ATTR_WO(_name, _size) #define BIN_ATTR_RW(_name, _size) \ struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size) + +#define __BIN_ATTR_ADMIN_RO(_name, _size) { \ + .attr = { .name = __stringify(_name), .mode = 0400 }, \ + .read = _name##_read, \ + .size = _size, \ +} + +#define __BIN_ATTR_ADMIN_RW(_name, _size) \ + __BIN_ATTR(_name, 0600, _name##_read, _name##_write, _size) + +#define BIN_ATTR_ADMIN_RO(_name, _size) \ +struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RO(_name, _size) + +#define BIN_ATTR_ADMIN_RW(_name, _size) \ +struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RW(_name, _size) + struct sysfs_ops { ssize_t (*show)(struct kobject *, struct attribute *, char *); ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t); -- cgit v1.2.3 From d0637c505f8a1d8c4088642f1f3e9e3b22da14f6 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 20 Jul 2022 21:37:37 +1200 Subject: arm64: enable THP_SWAP for arm64 THP_SWAP has been proven to improve the swap throughput significantly on x86_64 according to commit bd4c82c22c367e ("mm, THP, swap: delay splitting THP after swapped out"). As long as arm64 uses 4K page size, it is quite similar with x86_64 by having 2MB PMD THP. THP_SWAP is architecture-independent, thus, enabling it on arm64 will benefit arm64 as well. A corner case is that MTE has an assumption that only base pages can be swapped. We won't enable THP_SWAP for ARM64 hardware with MTE support until MTE is reworked to coexist with THP_SWAP. A micro-benchmark is written to measure thp swapout throughput as below, unsigned long long tv_to_ms(struct timeval tv) { return tv.tv_sec * 1000 + tv.tv_usec / 1000; } main() { struct timeval tv_b, tv_e;; #define SIZE 400*1024*1024 volatile void *p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (!p) { perror("fail to get memory"); exit(-1); } madvise(p, SIZE, MADV_HUGEPAGE); memset(p, 0x11, SIZE); /* write to get mem */ gettimeofday(&tv_b, NULL); madvise(p, SIZE, MADV_PAGEOUT); gettimeofday(&tv_e, NULL); printf("swp out bandwidth: %ld bytes/ms\n", SIZE/(tv_to_ms(tv_e) - tv_to_ms(tv_b))); } Testing is done on rk3568 64bit Quad Core Cortex-A55 platform - ROCK 3A. thp swp throughput w/o patch: 2734bytes/ms (mean of 10 tests) thp swp throughput w/ patch: 3331bytes/ms (mean of 10 tests) Cc: "Huang, Ying" Cc: Minchan Kim Cc: Johannes Weiner Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: Steven Price Cc: Yang Shi Reviewed-by: Anshuman Khandual Signed-off-by: Barry Song Link: https://lore.kernel.org/r/20220720093737.133375-1-21cnbao@gmail.com Signed-off-by: Will Deacon --- include/linux/huge_mm.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index de29821231c9..4ddaf6ad73ef 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -461,4 +461,16 @@ static inline int split_folio_to_list(struct folio *folio, return split_huge_page_to_list(&folio->page, list); } +/* + * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to + * limitations in the implementation like arm64 MTE can override this to + * false + */ +#ifndef arch_thp_swp_supported +static inline bool arch_thp_swp_supported(void) +{ + return true; +} +#endif + #endif /* _LINUX_HUGE_MM_H */ -- cgit v1.2.3 From 7327b16f5f56741960e11ae4d7ef0ffdff5fd252 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 20 Jul 2022 18:51:21 +0800 Subject: APCI: irq: Add support for multiple GSI domains In an unfortunate departure from the ACPI spec, the LoongArch architecture split its GSI space across multiple interrupt controllers. In order to be able to reuse the core code and prevent architectures from reinventing an already square wheel, offer the arch code the ability to register a dispatcher function that will return the domain fwnode for a given GSI. The ARM GIC drivers are updated to support this (with a single domain, as intended). Signed-off-by: Marc Zyngier Cc: Hanjun Guo Cc: Lorenzo Pieralisi Signed-off-by: Jianmin Lv Tested-by: Hanjun Guo Reviewed-by: Hanjun Guo Link: https://lore.kernel.org/r/1658314292-35346-3-git-send-email-lvjianmin@loongson.cn --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4f82a5bc6d98..957e23f727ea 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -356,7 +356,7 @@ int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi); void acpi_set_irq_model(enum acpi_irq_model_id model, - struct fwnode_handle *fwnode); + struct fwnode_handle *(*)(u32)); struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags, unsigned int size, -- cgit v1.2.3 From 744b9a0c3c8334d705dea6af3645ea30d597c360 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 20 Jul 2022 18:51:22 +0800 Subject: ACPI: irq: Allow acpi_gsi_to_irq() to have an arch-specific fallback It appears that the generic version of acpi_gsi_to_irq() doesn't fallback to establishing a mapping if there is no pre-existing one while the x86 version does. While arm64 seems unaffected by it, LoongArch is relying on the x86 behaviour. In an effort to prevent new architectures from reinventing the proverbial wheel, provide an optional callback that the arch code can set to restore the x86 behaviour. Hopefully we can eventually get rid of this in the future once the expected behaviour has been clarified. Reported-by: Jianmin Lv Signed-off-by: Marc Zyngier Signed-off-by: Jianmin Lv Tested-by: Hanjun Guo Reviewed-by: Hanjun Guo Link: https://lore.kernel.org/r/1658314292-35346-4-git-send-email-lvjianmin@loongson.cn --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 957e23f727ea..e2b60d53ca60 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -357,6 +357,7 @@ int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi); void acpi_set_irq_model(enum acpi_irq_model_id model, struct fwnode_handle *(*)(u32)); +void acpi_set_gsi_to_irq_fallback(u32 (*)(u32)); struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags, unsigned int size, -- cgit v1.2.3 From d319a299f4066685a787cfb89ad36fd78bb830ed Mon Sep 17 00:00:00 2001 From: Jianmin Lv Date: Wed, 20 Jul 2022 18:51:23 +0800 Subject: genirq/generic_chip: Export irq_unmap_generic_chip Some irq controllers have to re-implement a private version for irq_generic_chip_ops, because they have a different xlate to translate hwirq. Export irq_unmap_generic_chip to allow reusing in drivers. Signed-off-by: Jianmin Lv Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1658314292-35346-5-git-send-email-lvjianmin@loongson.cn --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 505308253d23..83a4574308fa 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1121,6 +1121,7 @@ int irq_gc_set_wake(struct irq_data *d, unsigned int on); /* Setup functions for irq_chip_generic */ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw_irq); +void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq); struct irq_chip_generic * irq_alloc_generic_chip(const char *name, int nr_ct, unsigned int irq_base, void __iomem *reg_base, irq_flow_handler_t handler); -- cgit v1.2.3 From dd281e1a1a937ee2f13bd0db5be78e5f5b811ca7 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 20 Jul 2022 18:51:30 +0800 Subject: irqchip: Add Loongson Extended I/O interrupt controller support EIOINTC stands for "Extended I/O Interrupts" that described in Section 11.2 of "Loongson 3A5000 Processor Reference Manual". For more information please refer Documentation/loongarch/irq-chip-model.rst. Loongson-3A5000 has 4 cores per NUMA node, and each NUMA node has an EIOINTC; while Loongson-3C5000 has 16 cores per NUMA node, and each NUMA node has 4 EIOINTCs. In other words, 16 cores of one NUMA node in Loongson-3C5000 are organized in 4 groups, each group connects to an EIOINTC. We call the "group" here as an EIOINTC node, so each EIOINTC node always includes 4 cores (both in Loongson-3A5000 and Loongson- 3C5000). Co-developed-by: Jianmin Lv Signed-off-by: Jianmin Lv Signed-off-by: Huacai Chen Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1658314292-35346-12-git-send-email-lvjianmin@loongson.cn --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 19f0dbfdd7fe..de662f3a6cee 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -151,6 +151,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_IRQ_MIPS_GIC_STARTING, CPUHP_AP_IRQ_RISCV_STARTING, + CPUHP_AP_IRQ_LOONGARCH_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_MICROCODE_LOADER, -- cgit v1.2.3 From e8bba72b396cef7c919c73710f3c5884521adb4e Mon Sep 17 00:00:00 2001 From: Jianmin Lv Date: Wed, 20 Jul 2022 18:51:32 +0800 Subject: irqchip / ACPI: Introduce ACPI_IRQ_MODEL_LPIC for LoongArch For LoongArch, ACPI_IRQ_MODEL_LPIC is introduced, and then the callback acpi_get_gsi_domain_id and acpi_gsi_to_irq_fallback are implemented. The acpi_get_gsi_domain_id callback returns related fwnode handle of irqdomain for different GSI range. The acpi_gsi_to_irq_fallback will create new mapping for gsi when the mapping of it is not found. Signed-off-by: Jianmin Lv Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1658314292-35346-14-git-send-email-lvjianmin@loongson.cn --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e2b60d53ca60..76520f379313 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -105,6 +105,7 @@ enum acpi_irq_model_id { ACPI_IRQ_MODEL_IOSAPIC, ACPI_IRQ_MODEL_PLATFORM, ACPI_IRQ_MODEL_GIC, + ACPI_IRQ_MODEL_LPIC, ACPI_IRQ_MODEL_COUNT }; -- cgit v1.2.3 From f8faf3496633b302a6591fda599540a0b53a35bb Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 19 Jul 2022 14:52:51 -0500 Subject: x86/amd_nb: Add AMD PCI IDs for SMN communication Add support for SMN communication on family 17h model A0h and family 19h models 60h-70h. [ bp: Merge into a single patch. ] Signed-off-by: Mario Limonciello Signed-off-by: Borislav Petkov Reviewed-by: Yazen Ghannam Acked-by: Bjorn Helgaas # pci_ids.h Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/20220719195256.1516-1-mario.limonciello@amd.com --- include/linux/pci_ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0178823ce8c2..7fa460ccf7fa 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -556,10 +556,13 @@ #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F3 0x1493 #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F3 0x144b #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443 +#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F3 0x1727 #define PCI_DEVICE_ID_AMD_19H_DF_F3 0x1653 #define PCI_DEVICE_ID_AMD_19H_M10H_DF_F3 0x14b0 #define PCI_DEVICE_ID_AMD_19H_M40H_DF_F3 0x167c #define PCI_DEVICE_ID_AMD_19H_M50H_DF_F3 0x166d +#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F3 0x14e3 +#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F3 0x14f3 #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 -- cgit v1.2.3 From ce4b4657ff18925c315855aa290e93c5fa652d96 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 19 Jul 2022 21:02:48 -0300 Subject: vfio: Replace the DMA unmapping notifier with a callback Instead of having drivers register the notifier with explicit code just have them provide a dma_unmap callback op in their driver ops and rely on the core code to wire it up. Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Kevin Tian Reviewed-by: Tony Krowiak Reviewed-by: Eric Farman Reviewed-by: Zhenyu Wang Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v4-681e038e30fd+78-vfio_unmap_notif_jgg@nvidia.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 4d26e149db81..1f9fc7a9be9e 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -49,6 +49,7 @@ struct vfio_device { unsigned int open_count; struct completion comp; struct list_head group_next; + struct notifier_block iommu_nb; }; /** @@ -65,6 +66,8 @@ struct vfio_device { * @match: Optional device name match callback (return: 0 for no-match, >0 for * match, -errno for abort (ex. match with insufficient or incorrect * additional args) + * @dma_unmap: Called when userspace unmaps IOVA from the container + * this device is attached to. * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl */ struct vfio_device_ops { @@ -80,6 +83,7 @@ struct vfio_device_ops { int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); void (*request)(struct vfio_device *vdev, unsigned int count); int (*match)(struct vfio_device *vdev, char *buf); + void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length); int (*device_feature)(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz); }; @@ -164,23 +168,6 @@ int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, size_t len, bool write); -/* each type has independent events */ -enum vfio_notify_type { - VFIO_IOMMU_NOTIFY = 0, -}; - -/* events for VFIO_IOMMU_NOTIFY */ -#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) - -int vfio_register_notifier(struct vfio_device *device, - enum vfio_notify_type type, - unsigned long *required_events, - struct notifier_block *nb); -int vfio_unregister_notifier(struct vfio_device *device, - enum vfio_notify_type type, - struct notifier_block *nb); - - /* * Sub-module helpers */ -- cgit v1.2.3 From 8cfc5b60751bcf9b4c6bbab3f6a72d59e0156a89 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 19 Jul 2022 21:02:49 -0300 Subject: vfio: Replace the iommu notifier with a device list Instead of bouncing the function call to the driver op through a blocking notifier just have the iommu layer call it directly. Register each device that is being attached to the iommu with the lower driver which then threads them on a linked list and calls the appropriate driver op at the right time. Currently the only use is if dma_unmap() is defined. Also, fully lock all the debugging tests on the pinning path that a dma_unmap is registered. Reviewed-by: Christoph Hellwig Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v4-681e038e30fd+78-vfio_unmap_notif_jgg@nvidia.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 1f9fc7a9be9e..19cefbaa3d06 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -49,7 +49,7 @@ struct vfio_device { unsigned int open_count; struct completion comp; struct list_head group_next; - struct notifier_block iommu_nb; + struct list_head iommu_entry; }; /** -- cgit v1.2.3 From 9cb61fda8c71baaff423fe5be2e609100860fa78 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 20 Jul 2022 08:52:20 -0700 Subject: bpf: Fix bpf_trampoline_{,un}link_cgroup_shim ifdef guards They were updated in kernel/bpf/trampoline.c to fix another build issue. We should to do the same for include/linux/bpf.h header. Fixes: 3908fcddc65d ("bpf: fix lsm_cgroup build errors on esoteric configs") Reported-by: Stephen Rothwell Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220720155220.4087433-1-sdf@google.com --- include/linux/bpf.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a5bf00649995..11950029284f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1254,9 +1254,6 @@ struct bpf_dummy_ops { int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); #endif -int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, - int cgroup_atype); -void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog); #else static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) { @@ -1280,6 +1277,13 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, { return -EINVAL; } +#endif + +#if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) +int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, + int cgroup_atype); +void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog); +#else static inline int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, int cgroup_atype) { -- cgit v1.2.3 From a1a5482a2c6e38a3ebed32e571625c56a8cc41a6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Jun 2022 16:52:06 +0200 Subject: x86/extable: Fix ex_handler_msr() print condition On Fri, Jun 17, 2022 at 02:08:52PM +0300, Stephane Eranian wrote: > Some changes to the way invalid MSR accesses are reported by the > kernel is causing some problems with messages printed on the > console. > > We have seen several cases of ex_handler_msr() printing invalid MSR > accesses once but the callstack multiple times causing confusion on > the console. > The problem here is that another earlier commit (5.13): > > a358f40600b3 ("once: implement DO_ONCE_LITE for non-fast-path "do once" functionality") > > Modifies all the pr_*_once() calls to always return true claiming > that no caller is ever checking the return value of the functions. > > This is why we are seeing the callstack printed without the > associated printk() msg. Extract the ONCE_IF(cond) part into __ONCE_LTE_IF() and use that to implement DO_ONCE_LITE_IF() and fix the extable code. Fixes: a358f40600b3 ("once: implement DO_ONCE_LITE for non-fast-path "do once" functionality") Reported-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Tested-by: Stephane Eranian Link: https://lkml.kernel.org/r/YqyVFsbviKjVGGZ9@worktop.programming.kicks-ass.net --- include/linux/once_lite.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/once_lite.h b/include/linux/once_lite.h index 861e606b820f..b7bce4983638 100644 --- a/include/linux/once_lite.h +++ b/include/linux/once_lite.h @@ -9,15 +9,27 @@ */ #define DO_ONCE_LITE(func, ...) \ DO_ONCE_LITE_IF(true, func, ##__VA_ARGS__) -#define DO_ONCE_LITE_IF(condition, func, ...) \ + +#define __ONCE_LITE_IF(condition) \ ({ \ static bool __section(".data.once") __already_done; \ - bool __ret_do_once = !!(condition); \ + bool __ret_cond = !!(condition); \ + bool __ret_once = false; \ \ - if (unlikely(__ret_do_once && !__already_done)) { \ + if (unlikely(__ret_cond && !__already_done)) { \ __already_done = true; \ - func(__VA_ARGS__); \ + __ret_once = true; \ } \ + unlikely(__ret_once); \ + }) + +#define DO_ONCE_LITE_IF(condition, func, ...) \ + ({ \ + bool __ret_do_once = !!(condition); \ + \ + if (__ONCE_LITE_IF(__ret_do_once)) \ + func(__VA_ARGS__); \ + \ unlikely(__ret_do_once); \ }) -- cgit v1.2.3 From 5588d628027092e66195097bdf6835ddf64418b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Spintzyk?= Date: Wed, 20 Jul 2022 08:05:18 +0200 Subject: net/cdc_ncm: Increase NTB max RX/TX values to 64kb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DisplayLink ethernet devices require NTB buffers larger then 32kb in order to run with highest performance. This patch is changing upper limit of the rx and tx buffers. Those buffers are initialized with CDC_NCM_NTB_DEF_SIZE_RX and CDC_NCM_NTB_DEF_SIZE_TX which is 16kb so by default no device is affected by increased limit. Rx and tx buffer is increased under two conditions: - Device need to advertise that it supports higher buffer size in dwNtbMaxInMaxSize and dwNtbMaxOutMaxSize. - cdc_ncm/rx_max and cdc_ncm/tx_max driver parameters must be adjusted with udev rule or ethtool. Summary of testing and performance results: Tests were performed on following devices: - DisplayLink DL-3xxx family device - DisplayLink DL-6xxx family device - ASUS USB-C2500 2.5G USB3 ethernet adapter - Plugable USB3 1G USB3 ethernet adapter - EDIMAX EU-4307 USB-C ethernet adapter - Dell DBQBCBC064 USB-C ethernet adapter Performance measurements were done with: - iperf3 between two linux boxes - http://openspeedtest.com/ instance running on local test machine Insights from tests results: - All except one from third party usb adapters were not affected by increased buffer size to their advertised dwNtbOutMaxSize and dwNtbInMaxSize. Devices were generally reaching 912-940Mbps both download and upload. Only EDIMAX adapter experienced decreased download size from 929Mbps to 827Mbps with iper3, with openspeedtest decrease was from 968Mbps to 886Mbps. - DisplayLink DL-3xxx family devices experienced performance increase with iperf3 download from 300Mbps to 870Mbps and upload from 782Mbps to 844Mbps. With openspeedtest download increased from 556Mbps to 873Mbps and upload from 727Mbps to 973Mbps - DiplayLink DL-6xxx family devices are not affected by increased buffer size. Signed-off-by: Łukasz Spintzyk Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20220720060518.541-2-lukasz.spintzyk@synaptics.com Signed-off-by: Paolo Abeni --- include/linux/usb/cdc_ncm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index f7cb3ddce7fb..2d207cb4837d 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -53,8 +53,8 @@ #define USB_CDC_NCM_NDP32_LENGTH_MIN 0x20 /* Maximum NTB length */ -#define CDC_NCM_NTB_MAX_SIZE_TX 32768 /* bytes */ -#define CDC_NCM_NTB_MAX_SIZE_RX 32768 /* bytes */ +#define CDC_NCM_NTB_MAX_SIZE_TX 65536 /* bytes */ +#define CDC_NCM_NTB_MAX_SIZE_RX 65536 /* bytes */ /* Initial NTB length */ #define CDC_NCM_NTB_DEF_SIZE_TX 16384 /* bytes */ -- cgit v1.2.3 From e0c7ea83f006ce8c3264ef8b6508a891d886ad4f Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 7 Jul 2022 11:00:29 +0800 Subject: dmaengine: imx-sdma: Add FIFO stride support for multi FIFO script The peripheral may have several FIFOs, but some case just select some FIFOs from them for data transfer, which means FIFO0 and FIFO2 may be selected. So add FIFO address stride support, 0 means all FIFOs are continuous, 1 means 1 word stride between FIFOs. All stride between FIFOs should be same. Another option words_per_fifo means how many audio channel data copied to one FIFO one time, 1 means one channel per FIFO, 2 means 2 channels per FIFO. If 'n_fifos_src = 4' and 'words_per_fifo = 2', it means the first two words(channels) fetch from FIFO0 and then jump to FIFO1 for next two words, and so on after the last FIFO3 fetched, roll back to FIFO0. Signed-off-by: Joy Zou Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1657162829-9273-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Vinod Koul --- include/linux/dma/imx-dma.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/imx-dma.h b/include/linux/dma/imx-dma.h index 8887762360d4..f487a4fa103a 100644 --- a/include/linux/dma/imx-dma.h +++ b/include/linux/dma/imx-dma.h @@ -70,6 +70,16 @@ static inline int imx_dma_is_general_purpose(struct dma_chan *chan) * struct sdma_peripheral_config - SDMA config for audio * @n_fifos_src: Number of FIFOs for recording * @n_fifos_dst: Number of FIFOs for playback + * @stride_fifos_src: FIFO address stride for recording, 0 means all FIFOs are + * continuous, 1 means 1 word stride between FIFOs. All stride + * between FIFOs should be same. + * @stride_fifos_dst: FIFO address stride for playback + * @words_per_fifo: numbers of words per FIFO fetch/fill, 1 means + * one channel per FIFO, 2 means 2 channels per FIFO.. + * If 'n_fifos_src = 4' and 'words_per_fifo = 2', it + * means the first two words(channels) fetch from FIFO0 + * and then jump to FIFO1 for next two words, and so on + * after the last FIFO3 fetched, roll back to FIFO0. * @sw_done: Use software done. Needed for PDM (micfil) * * Some i.MX Audio devices (SAI, micfil) have multiple successive FIFO @@ -82,6 +92,9 @@ static inline int imx_dma_is_general_purpose(struct dma_chan *chan) struct sdma_peripheral_config { int n_fifos_src; int n_fifos_dst; + int stride_fifos_src; + int stride_fifos_dst; + int words_per_fifo; bool sw_done; }; -- cgit v1.2.3 From 974854ab0728532600c72e41a44d6ce1cf8f20a4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 12 Jul 2022 18:37:54 -0700 Subject: cxl/acpi: Track CXL resources in iomem_resource Recall that CXL capable address ranges, on ACPI platforms, are published in the CEDT.CFMWS (CXL Early Discovery Table: CXL Fixed Memory Window Structures). These windows represent both the actively mapped capacity and the potential address space that can be dynamically assigned to a new CXL decode configuration (region / interleave-set). CXL endpoints like DDR DIMMs can be mapped at any physical address including 0 and legacy ranges. There is an expectation and requirement that the /proc/iomem interface and the iomem_resource tree in the kernel reflect the full set of platform address ranges. I.e. that every address range that platform firmware and bus drivers enumerate be reflected as an iomem_resource entry. The hard requirement to do this for CXL arises from the fact that facilities like CONFIG_DEVICE_PRIVATE expect to be able to treat empty iomem_resource ranges as free for software to use as proxy address space. Without CXL publishing its potential address ranges in iomem_resource, the CONFIG_DEVICE_PRIVATE mechanism may inadvertently steal capacity reserved for runtime provisioning of new CXL regions. So, iomem_resource needs to know about both active and potential CXL resource ranges. The active CXL resources might already be reflected in iomem_resource as "System RAM". insert_resource_expand_to_fit() handles re-parenting "System RAM" underneath a CXL window. The "_expand_to_fit()" behavior handles cases where a CXL window is not a strict superset of an existing entry in the iomem_resource tree. The "_expand_to_fit()" behavior is acceptable from the perspective of resource allocation. The expansion happens because a conflicting resource range is already populated, which means the resource boundary expansion does not result in any additional free CXL address space being made available. CXL address space allocation is always bounded by the orginal unexpanded address range. However, the potential for expansion does mean that something like walk_iomem_res_desc(IORES_DESC_CXL...) can only return fuzzy answers on corner case platforms that cause the resource tree to expand a CXL window resource over a range that is not decoded by CXL. This would be an odd platform configuration, but if it becomes a problem in practice the CXL subsytem could just publish an API that returns definitive answers. Cc: Andrew Morton Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: Tony Luck Cc: Christoph Hellwig Reviewed-by: Jonathan Cameron Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/165784325943.1758207.5310344844375305118.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- include/linux/ioport.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index ec5f71f7135b..79d1ad6d6275 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -141,6 +141,7 @@ enum { IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, IORES_DESC_RESERVED = 7, IORES_DESC_SOFT_RESERVED = 8, + IORES_DESC_CXL = 9, }; /* -- cgit v1.2.3 From 14b80582c43e4f550acfd93c2b2cadbe36ea0874 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 20 May 2022 13:41:24 -0700 Subject: resource: Introduce alloc_free_mem_region() The core of devm_request_free_mem_region() is a helper that searches for free space in iomem_resource and performs __request_region_locked() on the result of that search. The policy choices of the implementation conform to what CONFIG_DEVICE_PRIVATE users want which is memory that is immediately marked busy, and a preference to search for the first-fit free range in descending order from the top of the physical address space. CXL has a need for a similar allocator, but with the following tweaks: 1/ Search for free space in ascending order 2/ Search for free space relative to a given CXL window 3/ 'insert' rather than 'request' the new resource given downstream drivers from the CXL Region driver (like the pmem or dax drivers) are responsible for request_mem_region() when they activate the memory range. Rework __request_free_mem_region() into get_free_mem_region() which takes a set of GFR_* (Get Free Region) flags to control the allocation policy (ascending vs descending), and "busy" policy (insert_resource() vs request_region()). As part of the consolidation of the legacy GFR_REQUEST_REGION case with the new default of just inserting a new resource into the free space some minor cleanups like not checking for NULL before calling devres_free() (which does its own check) is included. Suggested-by: Jason Gunthorpe Link: https://lore.kernel.org/linux-cxl/20220420143406.GY2120790@nvidia.com/ Cc: Matthew Wilcox Cc: Christoph Hellwig Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165784333333.1758207.13703329337805274043.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- include/linux/ioport.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 79d1ad6d6275..616b683563a9 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -330,6 +330,8 @@ struct resource *devm_request_free_mem_region(struct device *dev, struct resource *base, unsigned long size); struct resource *request_free_mem_region(struct resource *base, unsigned long size, const char *name); +struct resource *alloc_free_mem_region(struct resource *base, + unsigned long size, unsigned long align, const char *name); static inline void irqresource_disabled(struct resource *res, u32 irq) { -- cgit v1.2.3 From d96c52fe4907c68adc5e61a0bef7aec0933223d5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 15 Apr 2022 10:55:42 -0700 Subject: rcu: Add polled expedited grace-period primitives This commit adds expedited grace-period functionality to RCU's polled grace-period API, adding start_poll_synchronize_rcu_expedited() and cond_synchronize_rcu_expedited(), which are similar to the existing start_poll_synchronize_rcu() and cond_synchronize_rcu() functions, respectively. Note that although start_poll_synchronize_rcu_expedited() can be invoked very early, the resulting expedited grace periods are not guaranteed to start until after workqueues are fully initialized. On the other hand, both synchronize_rcu() and synchronize_rcu_expedited() can also be invoked very early, and the resulting grace periods will be taken into account as they occur. [ paulmck: Apply feedback from Neeraj Upadhyay. ] Link: https://lore.kernel.org/all/20220121142454.1994916-1-bfoster@redhat.com/ Link: https://docs.google.com/document/d/1RNKWW9jQyfjxw2E8dsXVTdvZYh0HnYeSHDKog9jhdN8/edit?usp=sharing Cc: Brian Foster Cc: Dave Chinner Cc: Al Viro Cc: Ian Kent Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 10 ++++++++++ include/linux/rcutree.h | 2 ++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 5fed476f977f..ab7e20dfb07b 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -23,6 +23,16 @@ static inline void cond_synchronize_rcu(unsigned long oldstate) might_sleep(); } +static inline unsigned long start_poll_synchronize_rcu_expedited(void) +{ + return start_poll_synchronize_rcu(); +} + +static inline void cond_synchronize_rcu_expedited(unsigned long oldstate) +{ + cond_synchronize_rcu(oldstate); +} + extern void rcu_barrier(void); static inline void synchronize_rcu_expedited(void) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 9c6cfb742504..20dbaa9a3882 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -40,6 +40,8 @@ bool rcu_eqs_special_set(int cpu); void rcu_momentary_dyntick_idle(void); void kfree_rcu_scheduler_running(void); bool rcu_gp_might_be_stalled(void); +unsigned long start_poll_synchronize_rcu_expedited(void); +void cond_synchronize_rcu_expedited(unsigned long oldstate); unsigned long get_state_synchronize_rcu(void); unsigned long start_poll_synchronize_rcu(void); bool poll_state_synchronize_rcu(unsigned long oldstate); -- cgit v1.2.3 From ab21d6063c01180a8e9b22a37b847e5819525d9f Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 21 Jul 2022 15:42:33 +0200 Subject: bpf: Introduce 8-byte BTF set Introduce support for defining flags for kfuncs using a new set of macros, BTF_SET8_START/BTF_SET8_END, which define a set which contains 8 byte elements (each of which consists of a pair of BTF ID and flags), using a new BTF_ID_FLAGS macro. This will be used to tag kfuncs registered for a certain program type as acquire, release, sleepable, ret_null, etc. without having to create more and more sets which was proving to be an unscalable solution. Now, when looking up whether a kfunc is allowed for a certain program, we can also obtain its kfunc flags in the same call and avoid further lookups. The resolve_btfids change is split into a separate patch. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220721134245.2450-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/btf_ids.h | 68 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 252a4befeab1..3cb0741e71d7 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -8,6 +8,15 @@ struct btf_id_set { u32 ids[]; }; +struct btf_id_set8 { + u32 cnt; + u32 flags; + struct { + u32 id; + u32 flags; + } pairs[]; +}; + #ifdef CONFIG_DEBUG_INFO_BTF #include /* for __PASTE */ @@ -25,7 +34,7 @@ struct btf_id_set { #define BTF_IDS_SECTION ".BTF_ids" -#define ____BTF_ID(symbol) \ +#define ____BTF_ID(symbol, word) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ ".local " #symbol " ; \n" \ @@ -33,10 +42,11 @@ asm( \ ".size " #symbol ", 4; \n" \ #symbol ": \n" \ ".zero 4 \n" \ +word \ ".popsection; \n"); -#define __BTF_ID(symbol) \ - ____BTF_ID(symbol) +#define __BTF_ID(symbol, word) \ + ____BTF_ID(symbol, word) #define __ID(prefix) \ __PASTE(prefix, __COUNTER__) @@ -46,7 +56,14 @@ asm( \ * to 4 zero bytes. */ #define BTF_ID(prefix, name) \ - __BTF_ID(__ID(__BTF_ID__##prefix##__##name##__)) + __BTF_ID(__ID(__BTF_ID__##prefix##__##name##__), "") + +#define ____BTF_ID_FLAGS(prefix, name, flags) \ + __BTF_ID(__ID(__BTF_ID__##prefix##__##name##__), ".long " #flags "\n") +#define __BTF_ID_FLAGS(prefix, name, flags, ...) \ + ____BTF_ID_FLAGS(prefix, name, flags) +#define BTF_ID_FLAGS(prefix, name, ...) \ + __BTF_ID_FLAGS(prefix, name, ##__VA_ARGS__, 0) /* * The BTF_ID_LIST macro defines pure (unsorted) list @@ -145,10 +162,51 @@ asm( \ ".popsection; \n"); \ extern struct btf_id_set name; +/* + * The BTF_SET8_START/END macros pair defines sorted list of + * BTF IDs and their flags plus its members count, with the + * following layout: + * + * BTF_SET8_START(list) + * BTF_ID_FLAGS(type1, name1, flags) + * BTF_ID_FLAGS(type2, name2, flags) + * BTF_SET8_END(list) + * + * __BTF_ID__set8__list: + * .zero 8 + * list: + * __BTF_ID__type1__name1__3: + * .zero 4 + * .word (1 << 0) | (1 << 2) + * __BTF_ID__type2__name2__5: + * .zero 4 + * .word (1 << 3) | (1 << 1) | (1 << 2) + * + */ +#define __BTF_SET8_START(name, scope) \ +asm( \ +".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ +"." #scope " __BTF_ID__set8__" #name "; \n" \ +"__BTF_ID__set8__" #name ":; \n" \ +".zero 8 \n" \ +".popsection; \n"); + +#define BTF_SET8_START(name) \ +__BTF_ID_LIST(name, local) \ +__BTF_SET8_START(name, local) + +#define BTF_SET8_END(name) \ +asm( \ +".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ +".size __BTF_ID__set8__" #name ", .-" #name " \n" \ +".popsection; \n"); \ +extern struct btf_id_set8 name; + #else #define BTF_ID_LIST(name) static u32 __maybe_unused name[5]; #define BTF_ID(prefix, name) +#define BTF_ID_FLAGS(prefix, name, flags) #define BTF_ID_UNUSED #define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n]; #define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1]; @@ -156,6 +214,8 @@ extern struct btf_id_set name; #define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 }; #define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 }; #define BTF_SET_END(name) +#define BTF_SET8_START(name) static struct btf_id_set8 __maybe_unused name = { 0 }; +#define BTF_SET8_END(name) static struct btf_id_set8 __maybe_unused name = { 0 }; #endif /* CONFIG_DEBUG_INFO_BTF */ -- cgit v1.2.3 From a4703e3184320d6e15e2bc81d2ccf1c8c883f9d1 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 21 Jul 2022 15:42:35 +0200 Subject: bpf: Switch to new kfunc flags infrastructure Instead of populating multiple sets to indicate some attribute and then researching the same BTF ID in them, prepare a single unified BTF set which indicates whether a kfunc is allowed to be called, and also its attributes if any at the same time. Now, only one call is needed to perform the lookup for both kfunc availability and its attributes. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220721134245.2450-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 ++- include/linux/btf.h | 33 ++++++++++----------------------- 2 files changed, 12 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 11950029284f..a97751d845c9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1924,7 +1924,8 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, - struct bpf_reg_state *regs); + struct bpf_reg_state *regs, + u32 kfunc_flags); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *reg); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, diff --git a/include/linux/btf.h b/include/linux/btf.h index 1bfed7fa0428..6dfc6eaf7f8c 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -12,14 +12,11 @@ #define BTF_TYPE_EMIT(type) ((void)(type *)0) #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val) -enum btf_kfunc_type { - BTF_KFUNC_TYPE_CHECK, - BTF_KFUNC_TYPE_ACQUIRE, - BTF_KFUNC_TYPE_RELEASE, - BTF_KFUNC_TYPE_RET_NULL, - BTF_KFUNC_TYPE_KPTR_ACQUIRE, - BTF_KFUNC_TYPE_MAX, -}; +/* These need to be macros, as the expressions are used in assembler input */ +#define KF_ACQUIRE (1 << 0) /* kfunc is an acquire function */ +#define KF_RELEASE (1 << 1) /* kfunc is a release function */ +#define KF_RET_NULL (1 << 2) /* kfunc returns a pointer that may be NULL */ +#define KF_KPTR_GET (1 << 3) /* kfunc returns reference to a kptr */ struct btf; struct btf_member; @@ -30,16 +27,7 @@ struct btf_id_set; struct btf_kfunc_id_set { struct module *owner; - union { - struct { - struct btf_id_set *check_set; - struct btf_id_set *acquire_set; - struct btf_id_set *release_set; - struct btf_id_set *ret_null_set; - struct btf_id_set *kptr_acquire_set; - }; - struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX]; - }; + struct btf_id_set8 *set; }; struct btf_id_dtor_kfunc { @@ -378,9 +366,9 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); -bool btf_kfunc_id_set_contains(const struct btf *btf, +u32 *btf_kfunc_id_set_contains(const struct btf *btf, enum bpf_prog_type prog_type, - enum btf_kfunc_type type, u32 kfunc_btf_id); + u32 kfunc_btf_id); int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, const struct btf_kfunc_id_set *s); s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); @@ -397,12 +385,11 @@ static inline const char *btf_name_by_offset(const struct btf *btf, { return NULL; } -static inline bool btf_kfunc_id_set_contains(const struct btf *btf, +static inline u32 *btf_kfunc_id_set_contains(const struct btf *btf, enum bpf_prog_type prog_type, - enum btf_kfunc_type type, u32 kfunc_btf_id) { - return false; + return NULL; } static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, const struct btf_kfunc_id_set *s) -- cgit v1.2.3 From 56e948ffc098a780fefb6c1784a3a2c7b81100a1 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 21 Jul 2022 15:42:36 +0200 Subject: bpf: Add support for forcing kfunc args to be trusted Teach the verifier to detect a new KF_TRUSTED_ARGS kfunc flag, which means each pointer argument must be trusted, which we define as a pointer that is referenced (has non-zero ref_obj_id) and also needs to have its offset unchanged, similar to how release functions expect their argument. This allows a kfunc to receive pointer arguments unchanged from the result of the acquire kfunc. This is required to ensure that kfunc that operate on some object only work on acquired pointers and not normal PTR_TO_BTF_ID with same type which can be obtained by pointer walking. The restrictions applied to release arguments also apply to trusted arguments. This implies that strict type matching (not deducing type by recursively following members at offset) and OBJ_RELEASE offset checks (ensuring they are zero) are used for trusted pointer arguments. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220721134245.2450-5-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index 6dfc6eaf7f8c..cdb376d53238 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -17,6 +17,38 @@ #define KF_RELEASE (1 << 1) /* kfunc is a release function */ #define KF_RET_NULL (1 << 2) /* kfunc returns a pointer that may be NULL */ #define KF_KPTR_GET (1 << 3) /* kfunc returns reference to a kptr */ +/* Trusted arguments are those which are meant to be referenced arguments with + * unchanged offset. It is used to enforce that pointers obtained from acquire + * kfuncs remain unmodified when being passed to helpers taking trusted args. + * + * Consider + * struct foo { + * int data; + * struct foo *next; + * }; + * + * struct bar { + * int data; + * struct foo f; + * }; + * + * struct foo *f = alloc_foo(); // Acquire kfunc + * struct bar *b = alloc_bar(); // Acquire kfunc + * + * If a kfunc set_foo_data() wants to operate only on the allocated object, it + * will set the KF_TRUSTED_ARGS flag, which will prevent unsafe usage like: + * + * set_foo_data(f, 42); // Allowed + * set_foo_data(f->next, 42); // Rejected, non-referenced pointer + * set_foo_data(&f->next, 42);// Rejected, referenced, but wrong type + * set_foo_data(&b->f, 42); // Rejected, referenced, but bad offset + * + * In the final case, usually for the purposes of type matching, it is deduced + * by looking at the type of the member at the offset, but due to the + * requirement of trusted argument, this deduction will be strict and not done + * for this case. + */ +#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */ struct btf; struct btf_member; -- cgit v1.2.3 From 949d6b405e6160ae44baea39192d67b39cb7eeac Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 20 Jul 2022 16:57:58 -0700 Subject: net: add missing includes and forward declarations under net/ This patch adds missing includes to headers under include/net. All these problems are currently masked by the existing users including the missing dependency before the broken header. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/lapb.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lapb.h b/include/linux/lapb.h index eb56472f23b2..b5333f9413dc 100644 --- a/include/linux/lapb.h +++ b/include/linux/lapb.h @@ -6,6 +6,11 @@ #ifndef LAPB_KERNEL_H #define LAPB_KERNEL_H +#include +#include + +struct net_device; + #define LAPB_OK 0 #define LAPB_BADTOKEN 1 #define LAPB_INVALUE 2 -- cgit v1.2.3 From 0903f899418ee0235e4ad756f5502162f29b49b9 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Thu, 27 Jan 2022 14:39:46 +0200 Subject: wifi: ieee80211: add helper functions for detecting TM/FTM frames Add helper functions for detection timing measurement and fine timing measurement frames. Signed-off-by: Avraham Stern Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index cca564372d16..55e6f4ad0ca6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1332,6 +1332,15 @@ struct ieee80211_mgmt { u8 action_code; u8 variable[]; } __packed s1g; + struct { + u8 action_code; + u8 dialog_token; + u8 follow_up; + u32 tod; + u32 toa; + u8 max_tod_error; + u8 max_toa_error; + } __packed wnm_timing_msr; } u; } __packed action; } u; @@ -3522,6 +3531,17 @@ enum ieee80211_mesh_actioncode { WLAN_MESH_ACTION_TBTT_ADJUSTMENT_RESPONSE, }; +/* Unprotected WNM action codes */ +enum ieee80211_unprotected_wnm_actioncode { + WLAN_UNPROTECTED_WNM_ACTION_TIM = 0, + WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE = 1, +}; + +/* Public action codes */ +enum ieee80211_public_actioncode { + WLAN_PUBLIC_ACTION_FTM_RESPONSE = 33, +}; + /* Security key length */ enum ieee80211_key_len { WLAN_KEY_LEN_WEP40 = 5, @@ -4311,6 +4331,40 @@ static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb) return true; } +static inline bool ieee80211_is_timing_measurement(struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (void *)skb->data; + + if (skb->len < IEEE80211_MIN_ACTION_SIZE) + return false; + + if (!ieee80211_is_action(mgmt->frame_control)) + return false; + + if (mgmt->u.action.category == WLAN_CATEGORY_WNM_UNPROTECTED && + mgmt->u.action.u.wnm_timing_msr.action_code == + WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE && + skb->len >= offsetofend(typeof(*mgmt), u.action.u.wnm_timing_msr)) + return true; + + return false; +} + +static inline bool ieee80211_is_ftm(struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (void *)skb->data; + + if (!ieee80211_is_public_action((void *)mgmt, skb->len)) + return false; + + if (mgmt->u.action.u.ftm.action_code == + WLAN_PUBLIC_ACTION_FTM_RESPONSE && + skb->len >= offsetofend(typeof(*mgmt), u.action.u.ftm)) + return true; + + return false; +} + struct element { u8 id; u8 datalen; -- cgit v1.2.3 From dea997733575c5793ca77e166bbbf89097987eb4 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 22 Jul 2022 10:48:50 +0100 Subject: firmware: cs_dsp: Add pre_stop callback The code already has a post_stop callback, add a matching pre_stop callback to the client_ops that is called before execution is stopped. This callback provides a convenient place for the client code to communicate with the DSP before it is stopped. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220722094851.92521-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/firmware/cirrus/cs_dsp.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h index 30055706cce2..6ab230218df0 100644 --- a/include/linux/firmware/cirrus/cs_dsp.h +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -189,7 +189,8 @@ struct cs_dsp { * @control_remove: Called under the pwr_lock when a control is destroyed * @pre_run: Called under the pwr_lock by cs_dsp_run() before the core is started * @post_run: Called under the pwr_lock by cs_dsp_run() after the core is started - * @post_stop: Called under the pwr_lock by cs_dsp_stop() + * @pre_stop: Called under the pwr_lock by cs_dsp_stop() before the core is stopped + * @post_stop: Called under the pwr_lock by cs_dsp_stop() after the core is stopped * @watchdog_expired: Called when a watchdog expiry is detected * * These callbacks give the cs_dsp client an opportunity to respond to events @@ -200,6 +201,7 @@ struct cs_dsp_client_ops { void (*control_remove)(struct cs_dsp_coeff_ctl *ctl); int (*pre_run)(struct cs_dsp *dsp); int (*post_run)(struct cs_dsp *dsp); + void (*pre_stop)(struct cs_dsp *dsp); void (*post_stop)(struct cs_dsp *dsp); void (*watchdog_expired)(struct cs_dsp *dsp); }; -- cgit v1.2.3 From a4b976552f122ea851f556769874022cf097741e Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 22 Jul 2022 10:48:51 +0100 Subject: firmware: cs_dsp: Add memory chunk helpers Add helpers that can be layered on top of a buffer read from or to be written to the DSP to faciliate accessing datastructures within the DSP memory. These functions handle adding the padding bytes for the DSP, converting to big endian, and packing arbitrary length data. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220722094851.92521-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/firmware/cirrus/cs_dsp.h | 73 ++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h index 6ab230218df0..cad828e21c72 100644 --- a/include/linux/firmware/cirrus/cs_dsp.h +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -11,6 +11,7 @@ #ifndef __CS_DSP_H #define __CS_DSP_H +#include #include #include #include @@ -34,6 +35,7 @@ #define CS_ADSP2_REGION_ALL (CS_ADSP2_REGION_0 | CS_ADSP2_REGION_1_9) #define CS_DSP_DATA_WORD_SIZE 3 +#define CS_DSP_DATA_WORD_BITS (3 * BITS_PER_BYTE) #define CS_DSP_ACKED_CTL_TIMEOUT_MS 100 #define CS_DSP_ACKED_CTL_N_QUICKPOLLS 10 @@ -252,4 +254,75 @@ struct cs_dsp_alg_region *cs_dsp_find_alg_region(struct cs_dsp *dsp, const char *cs_dsp_mem_region_name(unsigned int type); +/** + * struct cs_dsp_chunk - Describes a buffer holding data formatted for the DSP + * @data: Pointer to underlying buffer memory + * @max: Pointer to end of the buffer memory + * @bytes: Number of bytes read/written into the memory chunk + * @cache: Temporary holding data as it is formatted + * @cachebits: Number of bits of data currently in cache + */ +struct cs_dsp_chunk { + u8 *data; + u8 *max; + int bytes; + + u32 cache; + int cachebits; +}; + +/** + * cs_dsp_chunk() - Create a DSP memory chunk + * @data: Pointer to the buffer that will be used to store data + * @size: Size of the buffer in bytes + * + * Return: A cs_dsp_chunk structure + */ +static inline struct cs_dsp_chunk cs_dsp_chunk(void *data, int size) +{ + struct cs_dsp_chunk ch = { + .data = data, + .max = data + size, + }; + + return ch; +} + +/** + * cs_dsp_chunk_end() - Check if a DSP memory chunk is full + * @ch: Pointer to the chunk structure + * + * Return: True if the whole buffer has been read/written + */ +static inline bool cs_dsp_chunk_end(struct cs_dsp_chunk *ch) +{ + return ch->data == ch->max; +} + +/** + * cs_dsp_chunk_bytes() - Number of bytes written/read from a DSP memory chunk + * @ch: Pointer to the chunk structure + * + * Return: Number of bytes read/written to the buffer + */ +static inline int cs_dsp_chunk_bytes(struct cs_dsp_chunk *ch) +{ + return ch->bytes; +} + +/** + * cs_dsp_chunk_valid_addr() - Check if an address is in a DSP memory chunk + * @ch: Pointer to the chunk structure + * + * Return: True if the given address is within the buffer + */ +static inline bool cs_dsp_chunk_valid_addr(struct cs_dsp_chunk *ch, void *addr) +{ + return (u8 *)addr >= ch->data && (u8 *)addr < ch->max; +} + +int cs_dsp_chunk_write(struct cs_dsp_chunk *ch, int nbits, u32 val); +int cs_dsp_chunk_flush(struct cs_dsp_chunk *ch); +int cs_dsp_chunk_read(struct cs_dsp_chunk *ch, int nbits); + #endif -- cgit v1.2.3 From e423414375866a399ebbe55ed044acb39846e8bf Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 22 Jul 2022 13:36:05 +0200 Subject: bpf: Fix build error in case of !CONFIG_DEBUG_INFO_BTF BTF_ID_FLAGS macro needs to be able to take 0 or 1 args, so make it a variable argument. BTF_SET8_END is incorrect, it should just be empty. Reported-by: kernel test robot Fixes: ab21d6063c01 ("bpf: Introduce 8-byte BTF set") Signed-off-by: Kumar Kartikeya Dwivedi Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20220722113605.6513-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/btf_ids.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 3cb0741e71d7..2aea877d644f 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -206,7 +206,7 @@ extern struct btf_id_set8 name; #define BTF_ID_LIST(name) static u32 __maybe_unused name[5]; #define BTF_ID(prefix, name) -#define BTF_ID_FLAGS(prefix, name, flags) +#define BTF_ID_FLAGS(prefix, name, ...) #define BTF_ID_UNUSED #define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n]; #define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1]; @@ -215,7 +215,7 @@ extern struct btf_id_set8 name; #define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 }; #define BTF_SET_END(name) #define BTF_SET8_START(name) static struct btf_id_set8 __maybe_unused name = { 0 }; -#define BTF_SET8_END(name) static struct btf_id_set8 __maybe_unused name = { 0 }; +#define BTF_SET8_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ -- cgit v1.2.3 From 478af190cb6c501efaa8de2b9c9418ece2e4d0bd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 Jul 2022 10:59:17 -0700 Subject: iomap: remove iomap_writepage Unused now. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Chaitanya Kulkarni Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e552097c67e0..911888560d3e 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -303,9 +303,6 @@ void iomap_finish_ioends(struct iomap_ioend *ioend, int error); void iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends); void iomap_sort_ioends(struct list_head *ioend_list); -int iomap_writepage(struct page *page, struct writeback_control *wbc, - struct iomap_writepage_ctx *wpc, - const struct iomap_writeback_ops *ops); int iomap_writepages(struct address_space *mapping, struct writeback_control *wbc, struct iomap_writepage_ctx *wpc, const struct iomap_writeback_ops *ops); -- cgit v1.2.3 From f96f644ab97abeed3f7007c953836a574ce928cc Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 19 Jul 2022 17:21:23 -0700 Subject: ftrace: Add modify_ftrace_direct_multi_nolock This is similar to modify_ftrace_direct_multi, but does not acquire direct_mutex. This is useful when direct_mutex is already locked by the user. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/bpf/20220720002126.803253-2-song@kernel.org --- include/linux/ftrace.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 979f6bfa2c25..acb35243ce5d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -340,6 +340,7 @@ unsigned long ftrace_find_rec_direct(unsigned long ip); int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); +int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr); #else struct ftrace_ops; @@ -384,6 +385,10 @@ static inline int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned lo { return -ENODEV; } +static inline int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr) +{ + return -ENODEV; +} #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS -- cgit v1.2.3 From 53cd885bc5c3ea283cc9c00ca6446c778f00bfba Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 19 Jul 2022 17:21:24 -0700 Subject: ftrace: Allow IPMODIFY and DIRECT ops on the same function IPMODIFY (livepatch) and DIRECT (bpf trampoline) ops are both important users of ftrace. It is necessary to allow them work on the same function at the same time. First, DIRECT ops no longer specify IPMODIFY flag. Instead, DIRECT flag is handled together with IPMODIFY flag in __ftrace_hash_update_ipmodify(). Then, a callback function, ops_func, is added to ftrace_ops. This is used by ftrace core code to understand whether the DIRECT ops can share with an IPMODIFY ops. To share with IPMODIFY ops, the DIRECT ops need to implement the callback function and adjust the direct trampoline accordingly. If DIRECT ops is attached before the IPMODIFY ops, ftrace core code calls ENABLE_SHARE_IPMODIFY_PEER on the DIRECT ops before registering the IPMODIFY ops. If IPMODIFY ops is attached before the DIRECT ops, ftrace core code calls ENABLE_SHARE_IPMODIFY_SELF in __ftrace_hash_update_ipmodify. Owner of the DIRECT ops may return 0 if the DIRECT trampoline can share with IPMODIFY, so error code otherwise. The error code is propagated to register_ftrace_direct_multi so that onwer of the DIRECT trampoline can handle it properly. For more details, please refer to comment before enum ftrace_ops_cmd. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/all/20220602193706.2607681-2-song@kernel.org/ Link: https://lore.kernel.org/all/20220718055449.3960512-1-song@kernel.org/ Link: https://lore.kernel.org/bpf/20220720002126.803253-3-song@kernel.org --- include/linux/ftrace.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index acb35243ce5d..0b61371e287b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -208,6 +208,43 @@ enum { FTRACE_OPS_FL_DIRECT = BIT(17), }; +/* + * FTRACE_OPS_CMD_* commands allow the ftrace core logic to request changes + * to a ftrace_ops. Note, the requests may fail. + * + * ENABLE_SHARE_IPMODIFY_SELF - enable a DIRECT ops to work on the same + * function as an ops with IPMODIFY. Called + * when the DIRECT ops is being registered. + * This is called with both direct_mutex and + * ftrace_lock are locked. + * + * ENABLE_SHARE_IPMODIFY_PEER - enable a DIRECT ops to work on the same + * function as an ops with IPMODIFY. Called + * when the other ops (the one with IPMODIFY) + * is being registered. + * This is called with direct_mutex locked. + * + * DISABLE_SHARE_IPMODIFY_PEER - disable a DIRECT ops to work on the same + * function as an ops with IPMODIFY. Called + * when the other ops (the one with IPMODIFY) + * is being unregistered. + * This is called with direct_mutex locked. + */ +enum ftrace_ops_cmd { + FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF, + FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER, + FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER, +}; + +/* + * For most ftrace_ops_cmd, + * Returns: + * 0 - Success. + * Negative on failure. The return value is dependent on the + * callback. + */ +typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd); + #ifdef CONFIG_DYNAMIC_FTRACE /* The hash used to know what functions callbacks trace */ struct ftrace_ops_hash { @@ -250,6 +287,7 @@ struct ftrace_ops { unsigned long trampoline; unsigned long trampoline_size; struct list_head list; + ftrace_ops_func_t ops_func; #endif }; -- cgit v1.2.3 From 316cba62dfb7878b7353177e6a7da9cc0c979cde Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 19 Jul 2022 17:21:25 -0700 Subject: bpf, x64: Allow to use caller address from stack Currently we call the original function by using the absolute address given at the JIT generation. That's not usable when having trampoline attached to multiple functions, or the target address changes dynamically (in case of live patch). In such cases we need to take the return address from the stack. Adding support to retrieve the original function address from the stack by adding new BPF_TRAMP_F_ORIG_STACK flag for arch_prepare_bpf_trampoline function. Basically we take the return address of the 'fentry' call: function + 0: call fentry # stores 'function + 5' address on stack function + 5: ... The 'function + 5' address will be used as the address for the original function to call. Signed-off-by: Jiri Olsa Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220720002126.803253-4-song@kernel.org --- include/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a97751d845c9..1d22df8bf306 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -751,6 +751,11 @@ struct btf_func_model { /* Return the return value of fentry prog. Only used by bpf_struct_ops. */ #define BPF_TRAMP_F_RET_FENTRY_RET BIT(4) +/* Get original function from stack instead of from provided direct address. + * Makes sense for trampolines with fexit or fmod_ret programs. + */ +#define BPF_TRAMP_F_ORIG_STACK BIT(5) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ -- cgit v1.2.3 From 00963a2e75a872e5fce4d0115ac2786ec86b57a6 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 19 Jul 2022 17:21:26 -0700 Subject: bpf: Support bpf_trampoline on functions with IPMODIFY (e.g. livepatch) When tracing a function with IPMODIFY ftrace_ops (livepatch), the bpf trampoline must follow the instruction pointer saved on stack. This needs extra handling for bpf trampolines with BPF_TRAMP_F_CALL_ORIG flag. Implement bpf_tramp_ftrace_ops_func and use it for the ftrace_ops used by BPF trampoline. This enables tracing functions with livepatch. This also requires moving bpf trampoline to *_ftrace_direct_mult APIs. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/all/20220602193706.2607681-2-song@kernel.org/ Link: https://lore.kernel.org/bpf/20220720002126.803253-5-song@kernel.org --- include/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1d22df8bf306..20c26aed7896 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -47,6 +47,7 @@ struct kobject; struct mem_cgroup; struct module; struct bpf_func_state; +struct ftrace_ops; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -756,6 +757,11 @@ struct btf_func_model { */ #define BPF_TRAMP_F_ORIG_STACK BIT(5) +/* This trampoline is on a function with another ftrace_ops with IPMODIFY, + * e.g., a live patch. This flag is set and cleared by ftrace call backs, + */ +#define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ @@ -838,9 +844,11 @@ struct bpf_tramp_image { struct bpf_trampoline { /* hlist for trampoline_table */ struct hlist_node hlist; + struct ftrace_ops *fops; /* serializes access to fields of this trampoline */ struct mutex mutex; refcount_t refcnt; + u32 flags; u64 key; struct { struct btf_func_model model; -- cgit v1.2.3 From 189c6c33ff421def040b904fb14ef76c5bf5af4c Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 28 Jun 2022 16:30:59 +0200 Subject: PCI: Extend isolated function probing to s390 Like the jailhouse hypervisor, s390's PCI architecture allows passing isolated PCI functions to a guest OS instance. As of now this is was not utilized even with multi-function support as the s390 PCI code makes sure that only virtual PCI busses including a function with devfn 0 are presented to the PCI subsystem. A subsequent change will remove this restriction. Allow probing such functions by replacing the existing check for jailhouse_paravirt() with a new hypervisor_isolated_pci_functions() helper. Link: https://lore.kernel.org/r/20220628143100.3228092-5-schnelle@linux.ibm.com Signed-off-by: Niklas Schnelle Signed-off-by: Bjorn Helgaas Reviewed-by: Pierre Morel Cc: Jan Kiszka --- include/linux/hypervisor.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h index fc08b433c856..9efbc54e35e5 100644 --- a/include/linux/hypervisor.h +++ b/include/linux/hypervisor.h @@ -32,4 +32,12 @@ static inline bool jailhouse_paravirt(void) #endif /* !CONFIG_X86 */ +static inline bool hypervisor_isolated_pci_functions(void) +{ + if (IS_ENABLED(CONFIG_S390)) + return true; + + return jailhouse_paravirt(); +} + #endif /* __LINUX_HYPEVISOR_H */ -- cgit v1.2.3 From abb4970ac33514c84b143516583eaf8cc47abd67 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Sat, 23 Jul 2022 06:49:42 +0900 Subject: PCI: Move isa_dma_bridge_buggy out of asm/dma.h The isa_dma_bridge_buggy symbol is only used for x86_32, and only x86_32 platforms or quirks ever set it. Add a new linux/isa-dma.h header that #defines isa_dma_bridge_buggy to 0 except on x86_32, where we keep it as a variable, and remove all the arch- specific definitions. [bhelgaas: commit log] Suggested-by: Arnd Bergmann Suggested-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220722214944.831438-3-shorne@gmail.com Signed-off-by: Stafford Horne Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Acked-by: Geert Uytterhoeven --- include/linux/isa-dma.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/linux/isa-dma.h (limited to 'include/linux') diff --git a/include/linux/isa-dma.h b/include/linux/isa-dma.h new file mode 100644 index 000000000000..61504a8c1b9e --- /dev/null +++ b/include/linux/isa-dma.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LINUX_ISA_DMA_H +#define __LINUX_ISA_DMA_H + +#include + +#if defined(CONFIG_PCI) && defined(CONFIG_X86_32) +extern int isa_dma_bridge_buggy; +#else +#define isa_dma_bridge_buggy (0) +#endif + +#endif /* __LINUX_ISA_DMA_H */ -- cgit v1.2.3 From e8f90717ed3b58e81c480b3aa38e641c0da5a456 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 22 Jul 2022 19:02:47 -0700 Subject: vfio: Make vfio_unpin_pages() return void There's only one caller that checks its return value with a WARN_ON_ONCE, while all other callers don't check the return value at all. Above that, an undo function should not fail. So, simplify the API to return void by embedding similar WARN_ONs. Also for users to pinpoint which condition fails, separate WARN_ON lines, yet remove the "driver->ops->unpin_pages" check, since it's unreasonable for callers to unpin on something totally random that wasn't even pinned. And remove NULL pointer checks for they would trigger oops vs. warnings. Note that npage is already validated in the vfio core, thus drop the same check in the type1 code. Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Kirti Wankhede Tested-by: Terrence Xu Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20220723020256.30081-2-nicolinc@nvidia.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 19cefbaa3d06..9f7d74c24925 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -163,8 +163,8 @@ bool vfio_file_has_dev(struct file *file, struct vfio_device *device); int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, int npage, int prot, unsigned long *phys_pfn); -int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, - int npage); +void vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, + int npage); int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, size_t len, bool write); -- cgit v1.2.3 From 6a010a49b63ac8465851a79185d8deff966f8e1a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 23 Jul 2022 04:28:28 -1000 Subject: cgroup: Make !percpu threadgroup_rwsem operations optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 3942a9bd7b58 ("locking, rcu, cgroup: Avoid synchronize_sched() in __cgroup_procs_write()") disabled percpu operations on threadgroup_rwsem because the impiled synchronize_rcu() on write locking was pushing up the latencies too much for android which constantly moves processes between cgroups. This makes the hotter paths - fork and exit - slower as they're always forced into the slow path. There is no reason to force this on everyone especially given that more common static usage pattern can now completely avoid write-locking the rwsem. Write-locking is elided when turning on and off controllers on empty sub-trees and CLONE_INTO_CGROUP enables seeding a cgroup without grabbing the rwsem. Restore the default percpu operations and introduce the mount option "favordynmods" and config option CGROUP_FAVOR_DYNMODS for users who need lower latencies for the dynamic operations. Signed-off-by: Tejun Heo Cc: Christian Brauner Cc: Michal Koutn� Cc: Peter Zijlstra Cc: John Stultz Cc: Dmitry Shmidt Cc: Oleg Nesterov --- include/linux/cgroup-defs.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 672de25e3ec8..63bf43c7ca3b 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -88,20 +88,33 @@ enum { */ CGRP_ROOT_NS_DELEGATE = (1 << 3), + /* + * Reduce latencies on dynamic cgroup modifications such as task + * migrations and controller on/offs by disabling percpu operation on + * cgroup_threadgroup_rwsem. This makes hot path operations such as + * forks and exits into the slow path and more expensive. + * + * The static usage pattern of creating a cgroup, enabling controllers, + * and then seeding it with CLONE_INTO_CGROUP doesn't require write + * locking cgroup_threadgroup_rwsem and thus doesn't benefit from + * favordynmod. + */ + CGRP_ROOT_FAVOR_DYNMODS = (1 << 4), + /* * Enable cpuset controller in v1 cgroup to use v2 behavior. */ - CGRP_ROOT_CPUSET_V2_MODE = (1 << 4), + CGRP_ROOT_CPUSET_V2_MODE = (1 << 16), /* * Enable legacy local memory.events. */ - CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 5), + CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 17), /* * Enable recursive subtree protection */ - CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 6), + CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 18), }; /* cftype->flags */ -- cgit v1.2.3 From ba8ec7a607e98e8491a1fcf924a2e6c96ac9d413 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 23 Jul 2022 14:47:28 -0400 Subject: SUNRPC: Shrink size of struct rpc_task Move the field 'tk_rpc_status' so that we eliminate a 4 byte hole in the structure. For x86_64, this shrinks the size of the struct by 8 bytes. 'pahole' output before the change: /* size: 232, cachelines: 4, members: 27 */ /* sum members: 222, holes: 1, sum holes: 4 */ /* sum bitfield members: 8 bits (1 bytes) */ /* padding: 5 */ /* last cacheline: 40 bytes */ 'pahole' output after the change: /* size: 224, cachelines: 4, members: 27 */ /* padding: 1 */ /* last cacheline: 32 bytes */ Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 1d7a3e51b795..acc62647317c 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -61,8 +61,6 @@ struct rpc_task { struct rpc_wait tk_wait; /* RPC wait */ } u; - int tk_rpc_status; /* Result of last RPC operation */ - /* * RPC call state */ @@ -82,6 +80,8 @@ struct rpc_task { ktime_t tk_start; /* RPC task init timestamp */ pid_t tk_owner; /* Process id for batching tasks */ + + int tk_rpc_status; /* Result of last RPC operation */ unsigned short tk_flags; /* misc flags */ unsigned short tk_timeouts; /* maj timeouts */ -- cgit v1.2.3 From 69d966510d9f5de81588b37d23a9ee8ccc477b23 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 22 Jul 2022 14:12:20 -0400 Subject: nfs: only issue commit in DIO codepath if we have uncommitted data Currently, we try to determine whether to issue a commit based on nfs_write_need_commit which looks at the current verifier. In the case where we got a short write and then tried to follow it up with one that failed, the verifier can't be trusted. What we really want to know is whether the pgio request had any successful writes that came back as UNSTABLE. Add a new flag to the pgio request, and use that to indicate that we've had a successful unstable write. Only issue a commit if that flag is set. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0e3aa0f5f324..e86cf6642d21 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1600,6 +1600,7 @@ enum { NFS_IOHDR_STAT, NFS_IOHDR_RESEND_PNFS, NFS_IOHDR_RESEND_MDS, + NFS_IOHDR_UNSTABLE_WRITES, }; struct nfs_io_completion; -- cgit v1.2.3 From 4f5f3b6028343d687d0533329b130e4b8280ab32 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 21 Jul 2022 14:21:31 -0400 Subject: SUNRPC: Introduce xdr_stream_move_subsegment() I do this by creating an xdr subsegment for the range we will be operating over. This lets me shift data to the correct place without potentially overwriting anything already there. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5860f32e3958..7dcc6c31fe29 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -262,6 +262,8 @@ extern unsigned int xdr_align_data(struct xdr_stream *, unsigned int offset, uns extern unsigned int xdr_expand_hole(struct xdr_stream *, unsigned int offset, unsigned int length); extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, unsigned int len); +extern unsigned int xdr_stream_move_subsegment(struct xdr_stream *xdr, unsigned int offset, + unsigned int target, unsigned int length); /** * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. -- cgit v1.2.3 From 7c4cd5f4d2dd4a028a46bfb696b0cd387caadf33 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 21 Jul 2022 14:21:32 -0400 Subject: SUNRPC: Add a function for directly setting the xdr page len We need to do this step during READ_PLUS decoding so that we know pages are the right length and any extra data has been preserved in the tail. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 7dcc6c31fe29..8cd38a9994ca 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -258,6 +258,7 @@ extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(const struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +extern void xdr_set_pagelen(struct xdr_stream *, unsigned int len); extern unsigned int xdr_align_data(struct xdr_stream *, unsigned int offset, unsigned int length); extern unsigned int xdr_expand_hole(struct xdr_stream *, unsigned int offset, unsigned int length); extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, -- cgit v1.2.3 From e1bd87608d4b6f87813f79b91e834de610f1049b Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 21 Jul 2022 14:21:33 -0400 Subject: SUNRPC: Add a function for zeroing out a portion of an xdr_stream This will be used during READ_PLUS decoding for handling HOLE segments. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 8cd38a9994ca..f0ab06acab61 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -265,6 +265,8 @@ extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf unsigned int len); extern unsigned int xdr_stream_move_subsegment(struct xdr_stream *xdr, unsigned int offset, unsigned int target, unsigned int length); +extern unsigned int xdr_stream_zero(struct xdr_stream *xdr, unsigned int offset, + unsigned int length); /** * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. -- cgit v1.2.3 From 29946fbcb2c31a2a367887dc58a2e7e5b012e285 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 21 Jul 2022 14:21:35 -0400 Subject: SUNRPC: Remove xdr_align_data() and xdr_expand_hole() These functions are no longer needed now that the NFS client places data and hole segments directly. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index f0ab06acab61..f38c97f45354 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -259,8 +259,6 @@ extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(const struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); extern void xdr_set_pagelen(struct xdr_stream *, unsigned int len); -extern unsigned int xdr_align_data(struct xdr_stream *, unsigned int offset, unsigned int length); -extern unsigned int xdr_expand_hole(struct xdr_stream *, unsigned int offset, unsigned int length); extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, unsigned int len); extern unsigned int xdr_stream_move_subsegment(struct xdr_stream *xdr, unsigned int offset, -- cgit v1.2.3 From ab1c84d855cf2c284fa0f4b17fc04063659c54a1 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 16 Jun 2022 13:57:19 +0100 Subject: io_uring: make io_uring_types.h public Move io_uring types to linux/include, need them public so tracing can see the definitions and we can clean trace/events/io_uring.h Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/a15f12e8cb7289b2de0deaddcc7518d98a132d17.1655384063.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 554 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 554 insertions(+) create mode 100644 include/linux/io_uring_types.h (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h new file mode 100644 index 000000000000..779c72da5b8f --- /dev/null +++ b/include/linux/io_uring_types.h @@ -0,0 +1,554 @@ +#ifndef IO_URING_TYPES_H +#define IO_URING_TYPES_H + +#include +#include +#include +#include + +struct io_wq_work_node { + struct io_wq_work_node *next; +}; + +struct io_wq_work_list { + struct io_wq_work_node *first; + struct io_wq_work_node *last; +}; + +struct io_wq_work { + struct io_wq_work_node list; + unsigned flags; + /* place it here instead of io_kiocb as it fills padding and saves 4B */ + int cancel_seq; +}; + +struct io_fixed_file { + /* file * with additional FFS_* flags */ + unsigned long file_ptr; +}; + +struct io_file_table { + struct io_fixed_file *files; + unsigned long *bitmap; + unsigned int alloc_hint; +}; + +struct io_hash_bucket { + spinlock_t lock; + struct hlist_head list; +} ____cacheline_aligned_in_smp; + +struct io_hash_table { + struct io_hash_bucket *hbs; + unsigned hash_bits; +}; + +struct io_uring { + u32 head ____cacheline_aligned_in_smp; + u32 tail ____cacheline_aligned_in_smp; +}; + +/* + * This data is shared with the application through the mmap at offsets + * IORING_OFF_SQ_RING and IORING_OFF_CQ_RING. + * + * The offsets to the member fields are published through struct + * io_sqring_offsets when calling io_uring_setup. + */ +struct io_rings { + /* + * Head and tail offsets into the ring; the offsets need to be + * masked to get valid indices. + * + * The kernel controls head of the sq ring and the tail of the cq ring, + * and the application controls tail of the sq ring and the head of the + * cq ring. + */ + struct io_uring sq, cq; + /* + * Bitmasks to apply to head and tail offsets (constant, equals + * ring_entries - 1) + */ + u32 sq_ring_mask, cq_ring_mask; + /* Ring sizes (constant, power of 2) */ + u32 sq_ring_entries, cq_ring_entries; + /* + * Number of invalid entries dropped by the kernel due to + * invalid index stored in array + * + * Written by the kernel, shouldn't be modified by the + * application (i.e. get number of "new events" by comparing to + * cached value). + * + * After a new SQ head value was read by the application this + * counter includes all submissions that were dropped reaching + * the new SQ head (and possibly more). + */ + u32 sq_dropped; + /* + * Runtime SQ flags + * + * Written by the kernel, shouldn't be modified by the + * application. + * + * The application needs a full memory barrier before checking + * for IORING_SQ_NEED_WAKEUP after updating the sq tail. + */ + atomic_t sq_flags; + /* + * Runtime CQ flags + * + * Written by the application, shouldn't be modified by the + * kernel. + */ + u32 cq_flags; + /* + * Number of completion events lost because the queue was full; + * this should be avoided by the application by making sure + * there are not more requests pending than there is space in + * the completion queue. + * + * Written by the kernel, shouldn't be modified by the + * application (i.e. get number of "new events" by comparing to + * cached value). + * + * As completion events come in out of order this counter is not + * ordered with any other data. + */ + u32 cq_overflow; + /* + * Ring buffer of completion events. + * + * The kernel writes completion events fresh every time they are + * produced, so the application is allowed to modify pending + * entries. + */ + struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp; +}; + +struct io_restriction { + DECLARE_BITMAP(register_op, IORING_REGISTER_LAST); + DECLARE_BITMAP(sqe_op, IORING_OP_LAST); + u8 sqe_flags_allowed; + u8 sqe_flags_required; + bool registered; +}; + +struct io_submit_link { + struct io_kiocb *head; + struct io_kiocb *last; +}; + +struct io_submit_state { + /* inline/task_work completion list, under ->uring_lock */ + struct io_wq_work_node free_list; + /* batch completion logic */ + struct io_wq_work_list compl_reqs; + struct io_submit_link link; + + bool plug_started; + bool need_plug; + bool flush_cqes; + unsigned short submit_nr; + struct blk_plug plug; +}; + +struct io_ev_fd { + struct eventfd_ctx *cq_ev_fd; + unsigned int eventfd_async: 1; + struct rcu_head rcu; +}; + +struct io_ring_ctx { + /* const or read-mostly hot data */ + struct { + struct percpu_ref refs; + + struct io_rings *rings; + unsigned int flags; + enum task_work_notify_mode notify_method; + unsigned int compat: 1; + unsigned int drain_next: 1; + unsigned int restricted: 1; + unsigned int off_timeout_used: 1; + unsigned int drain_active: 1; + unsigned int drain_disabled: 1; + unsigned int has_evfd: 1; + unsigned int syscall_iopoll: 1; + } ____cacheline_aligned_in_smp; + + /* submission data */ + struct { + struct mutex uring_lock; + + /* + * Ring buffer of indices into array of io_uring_sqe, which is + * mmapped by the application using the IORING_OFF_SQES offset. + * + * This indirection could e.g. be used to assign fixed + * io_uring_sqe entries to operations and only submit them to + * the queue when needed. + * + * The kernel modifies neither the indices array nor the entries + * array. + */ + u32 *sq_array; + struct io_uring_sqe *sq_sqes; + unsigned cached_sq_head; + unsigned sq_entries; + + /* + * Fixed resources fast path, should be accessed only under + * uring_lock, and updated through io_uring_register(2) + */ + struct io_rsrc_node *rsrc_node; + int rsrc_cached_refs; + atomic_t cancel_seq; + struct io_file_table file_table; + unsigned nr_user_files; + unsigned nr_user_bufs; + struct io_mapped_ubuf **user_bufs; + + struct io_submit_state submit_state; + + struct io_buffer_list *io_bl; + struct xarray io_bl_xa; + struct list_head io_buffers_cache; + + struct io_hash_table cancel_table_locked; + struct list_head cq_overflow_list; + struct list_head apoll_cache; + struct xarray personalities; + u32 pers_next; + } ____cacheline_aligned_in_smp; + + /* IRQ completion list, under ->completion_lock */ + struct io_wq_work_list locked_free_list; + unsigned int locked_free_nr; + + const struct cred *sq_creds; /* cred used for __io_sq_thread() */ + struct io_sq_data *sq_data; /* if using sq thread polling */ + + struct wait_queue_head sqo_sq_wait; + struct list_head sqd_list; + + unsigned long check_cq; + + struct { + /* + * We cache a range of free CQEs we can use, once exhausted it + * should go through a slower range setup, see __io_get_cqe() + */ + struct io_uring_cqe *cqe_cached; + struct io_uring_cqe *cqe_sentinel; + + unsigned cached_cq_tail; + unsigned cq_entries; + struct io_ev_fd __rcu *io_ev_fd; + struct wait_queue_head cq_wait; + unsigned cq_extra; + } ____cacheline_aligned_in_smp; + + struct { + spinlock_t completion_lock; + + /* + * ->iopoll_list is protected by the ctx->uring_lock for + * io_uring instances that don't use IORING_SETUP_SQPOLL. + * For SQPOLL, only the single threaded io_sq_thread() will + * manipulate the list, hence no extra locking is needed there. + */ + struct io_wq_work_list iopoll_list; + struct io_hash_table cancel_table; + bool poll_multi_queue; + + struct list_head io_buffers_comp; + } ____cacheline_aligned_in_smp; + + /* timeouts */ + struct { + spinlock_t timeout_lock; + atomic_t cq_timeouts; + struct list_head timeout_list; + struct list_head ltimeout_list; + unsigned cq_last_tm_flush; + } ____cacheline_aligned_in_smp; + + /* Keep this last, we don't need it for the fast path */ + + struct io_restriction restrictions; + struct task_struct *submitter_task; + + /* slow path rsrc auxilary data, used by update/register */ + struct io_rsrc_node *rsrc_backup_node; + struct io_mapped_ubuf *dummy_ubuf; + struct io_rsrc_data *file_data; + struct io_rsrc_data *buf_data; + + struct delayed_work rsrc_put_work; + struct llist_head rsrc_put_llist; + struct list_head rsrc_ref_list; + spinlock_t rsrc_ref_lock; + + struct list_head io_buffers_pages; + + #if defined(CONFIG_UNIX) + struct socket *ring_sock; + #endif + /* hashed buffered write serialization */ + struct io_wq_hash *hash_map; + + /* Only used for accounting purposes */ + struct user_struct *user; + struct mm_struct *mm_account; + + /* ctx exit and cancelation */ + struct llist_head fallback_llist; + struct delayed_work fallback_work; + struct work_struct exit_work; + struct list_head tctx_list; + struct completion ref_comp; + + /* io-wq management, e.g. thread count */ + u32 iowq_limits[2]; + bool iowq_limits_set; + + struct list_head defer_list; + unsigned sq_thread_idle; +}; + +enum { + REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, + REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, + REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT, + REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT, + REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, + REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, + REQ_F_CQE_SKIP_BIT = IOSQE_CQE_SKIP_SUCCESS_BIT, + + /* first byte is taken by user flags, shift it to not overlap */ + REQ_F_FAIL_BIT = 8, + REQ_F_INFLIGHT_BIT, + REQ_F_CUR_POS_BIT, + REQ_F_NOWAIT_BIT, + REQ_F_LINK_TIMEOUT_BIT, + REQ_F_NEED_CLEANUP_BIT, + REQ_F_POLLED_BIT, + REQ_F_BUFFER_SELECTED_BIT, + REQ_F_BUFFER_RING_BIT, + REQ_F_REISSUE_BIT, + REQ_F_CREDS_BIT, + REQ_F_REFCOUNT_BIT, + REQ_F_ARM_LTIMEOUT_BIT, + REQ_F_ASYNC_DATA_BIT, + REQ_F_SKIP_LINK_CQES_BIT, + REQ_F_SINGLE_POLL_BIT, + REQ_F_DOUBLE_POLL_BIT, + REQ_F_PARTIAL_IO_BIT, + REQ_F_CQE32_INIT_BIT, + REQ_F_APOLL_MULTISHOT_BIT, + REQ_F_CLEAR_POLLIN_BIT, + REQ_F_HASH_LOCKED_BIT, + /* keep async read/write and isreg together and in order */ + REQ_F_SUPPORT_NOWAIT_BIT, + REQ_F_ISREG_BIT, + + /* not a real bit, just to check we're not overflowing the space */ + __REQ_F_LAST_BIT, +}; + +enum { + /* ctx owns file */ + REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), + /* drain existing IO first */ + REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT), + /* linked sqes */ + REQ_F_LINK = BIT(REQ_F_LINK_BIT), + /* doesn't sever on completion < 0 */ + REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT), + /* IOSQE_ASYNC */ + REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT), + /* IOSQE_BUFFER_SELECT */ + REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), + /* IOSQE_CQE_SKIP_SUCCESS */ + REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT), + + /* fail rest of links */ + REQ_F_FAIL = BIT(REQ_F_FAIL_BIT), + /* on inflight list, should be cancelled and waited on exit reliably */ + REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), + /* read/write uses file position */ + REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), + /* must not punt to workers */ + REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), + /* has or had linked timeout */ + REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), + /* needs cleanup */ + REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), + /* already went through poll handler */ + REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), + /* buffer already selected */ + REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), + /* buffer selected from ring, needs commit */ + REQ_F_BUFFER_RING = BIT(REQ_F_BUFFER_RING_BIT), + /* caller should reissue async */ + REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), + /* supports async reads/writes */ + REQ_F_SUPPORT_NOWAIT = BIT(REQ_F_SUPPORT_NOWAIT_BIT), + /* regular file */ + REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), + /* has creds assigned */ + REQ_F_CREDS = BIT(REQ_F_CREDS_BIT), + /* skip refcounting if not set */ + REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT), + /* there is a linked timeout that has to be armed */ + REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), + /* ->async_data allocated */ + REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT), + /* don't post CQEs while failing linked requests */ + REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT), + /* single poll may be active */ + REQ_F_SINGLE_POLL = BIT(REQ_F_SINGLE_POLL_BIT), + /* double poll may active */ + REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT), + /* request has already done partial IO */ + REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT), + /* fast poll multishot mode */ + REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT), + /* ->extra1 and ->extra2 are initialised */ + REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT), + /* recvmsg special flag, clear EPOLLIN */ + REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT), + /* hashed into ->cancel_hash_locked, protected by ->uring_lock */ + REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT), +}; + +typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked); + +struct io_task_work { + union { + struct io_wq_work_node node; + struct llist_node fallback_node; + }; + io_req_tw_func_t func; +}; + +struct io_cqe { + __u64 user_data; + __s32 res; + /* fd initially, then cflags for completion */ + union { + __u32 flags; + int fd; + }; +}; + +/* + * Each request type overlays its private data structure on top of this one. + * They must not exceed this one in size. + */ +struct io_cmd_data { + struct file *file; + /* each command gets 56 bytes of data */ + __u8 data[56]; +}; + +#define io_kiocb_to_cmd(req) ((void *) &(req)->cmd) +#define cmd_to_io_kiocb(ptr) ((struct io_kiocb *) ptr) + +struct io_kiocb { + union { + /* + * NOTE! Each of the io_kiocb union members has the file pointer + * as the first entry in their struct definition. So you can + * access the file pointer through any of the sub-structs, + * or directly as just 'file' in this struct. + */ + struct file *file; + struct io_cmd_data cmd; + }; + + u8 opcode; + /* polled IO has completed */ + u8 iopoll_completed; + /* + * Can be either a fixed buffer index, or used with provided buffers. + * For the latter, before issue it points to the buffer group ID, + * and after selection it points to the buffer ID itself. + */ + u16 buf_index; + unsigned int flags; + + struct io_cqe cqe; + + struct io_ring_ctx *ctx; + struct task_struct *task; + + struct io_rsrc_node *rsrc_node; + + union { + /* store used ubuf, so we can prevent reloading */ + struct io_mapped_ubuf *imu; + + /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ + struct io_buffer *kbuf; + + /* + * stores buffer ID for ring provided buffers, valid IFF + * REQ_F_BUFFER_RING is set. + */ + struct io_buffer_list *buf_list; + }; + + union { + /* used by request caches, completion batching and iopoll */ + struct io_wq_work_node comp_list; + /* cache ->apoll->events */ + __poll_t apoll_events; + }; + atomic_t refs; + atomic_t poll_refs; + struct io_task_work io_task_work; + /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ + union { + struct hlist_node hash_node; + struct { + u64 extra1; + u64 extra2; + }; + }; + /* internal polling, see IORING_FEAT_FAST_POLL */ + struct async_poll *apoll; + /* opcode allocated if it needs to store data for async defer */ + void *async_data; + /* linked requests, IFF REQ_F_HARDLINK or REQ_F_LINK are set */ + struct io_kiocb *link; + /* custom credentials, valid IFF REQ_F_CREDS is set */ + const struct cred *creds; + struct io_wq_work work; +}; + +struct io_cancel_data { + struct io_ring_ctx *ctx; + union { + u64 data; + struct file *file; + }; + u32 flags; + int seq; +}; + +struct io_overflow_cqe { + struct list_head list; + struct io_uring_cqe cqe; +}; + +struct io_mapped_ubuf { + u64 ubuf; + u64 ubuf_end; + unsigned int nr_bvecs; + unsigned long acct_pages; + struct bio_vec bvec[]; +}; + +#endif -- cgit v1.2.3 From ad163a7e2562230c77102c60f668bac440e60cce Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 18 Jun 2022 19:44:33 -0600 Subject: io_uring: move a few private types to local headers Commit 3a3d47fa9cfd ("io_uring: make io_uring_types.h public") moved a bunch of io_uring types to a kernel wide header, so we could make tracing a bit saner rather than pass in a ton of arguments. However, there are a few types in there that are not really needed to be system wide. Move the cancel data and mapped buffers back to the appropriate io_uring local headers. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 779c72da5b8f..2015f3ea7cb7 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -528,27 +528,9 @@ struct io_kiocb { struct io_wq_work work; }; -struct io_cancel_data { - struct io_ring_ctx *ctx; - union { - u64 data; - struct file *file; - }; - u32 flags; - int seq; -}; - struct io_overflow_cqe { struct list_head list; struct io_uring_cqe cqe; }; -struct io_mapped_ubuf { - u64 ubuf; - u64 ubuf_end; - unsigned int nr_bvecs; - unsigned long acct_pages; - struct bio_vec bvec[]; -}; - #endif -- cgit v1.2.3 From d9dee4302a7cbd6c0142dbdf6d150acc7459de0d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 19 Jun 2022 12:26:08 +0100 Subject: io_uring: remove ->flush_cqes optimisation It's not clear how widely used IOSQE_CQE_SKIP_SUCCESS is, and how often ->flush_cqes flag prevents from completion being flushed. Sometimes it's high level of concurrency that enables it at least for one CQE, but sometimes it doesn't save much because nobody waiting on the CQ. Remove ->flush_cqes flag and the optimisation, it should benefit the normal use case. Note, that there is no spurious eventfd problem with that as checks for spuriousness were incorporated into io_eventfd_signal(). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/692e81eeddccc096f449a7960365fa7b4a18f8e6.1655637157.git.asml.silence@gmail.com [axboe: remove now dead state->flush_cqes variable] Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 2015f3ea7cb7..6bcd7bff6479 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -148,7 +148,6 @@ struct io_submit_state { bool plug_started; bool need_plug; - bool flush_cqes; unsigned short submit_nr; struct blk_plug plug; }; -- cgit v1.2.3 From 305bef98870816ae58357d647521891ec558a92e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 20 Jun 2022 01:25:55 +0100 Subject: io_uring: hide eventfd assumptions in eventfd paths Some io_uring-eventfd users assume that there won't be spurious wakeups. That assumption has to be honoured by all io_cqring_ev_posted() callers, which is inconvenient and from time to time leads to problems but should be maintained to not break the userspace. Instead of making the callers track whether a CQE was posted or not, hide it inside io_eventfd_signal(). It saves ->cached_cq_tail it saw last time and triggers the eventfd only when ->cached_cq_tail changed since then. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0ffc66bae37a2513080b601e4370e147faaa72c5.1655684496.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 6bcd7bff6479..5987f8acca38 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -314,6 +314,8 @@ struct io_ring_ctx { struct list_head defer_list; unsigned sq_thread_idle; + /* protected by ->completion_lock */ + unsigned evfd_last_cq_tail; }; enum { -- cgit v1.2.3 From f88262e60bb9cb5740891672ce9f405e7f9393e5 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Wed, 22 Jun 2022 06:40:23 -0700 Subject: io_uring: lockless task list With networking use cases we see contention on the spinlock used to protect the task_list when multiple threads try and add completions at once. Instead we can use a lockless list, and assume that the first caller to add to the list is responsible for kicking off task work. Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220622134028.2013417-4-dylany@fb.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 5987f8acca38..918165a20053 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -428,7 +428,7 @@ typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked); struct io_task_work { union { - struct io_wq_work_node node; + struct llist_node node; struct llist_node fallback_node; }; io_req_tw_func_t func; -- cgit v1.2.3 From 3218e5d32dbcf1b9c6dc589eca21deebb14215fa Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 25 Jun 2022 11:52:59 +0100 Subject: io_uring: fuse fallback_node and normal tw node Now as both normal and fallback paths use llist, just keep one node head in struct io_task_work and kill off ->fallback_node. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/d04ebde409f7b162fe247b361b4486b193293e46.1656153285.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 918165a20053..3ca8f363f504 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -427,10 +427,7 @@ enum { typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked); struct io_task_work { - union { - struct llist_node node; - struct llist_node fallback_node; - }; + struct llist_node node; io_req_tw_func_t func; }; -- cgit v1.2.3 From 6e73dffbb93cb8797cd4e42e98d837edf0f1a967 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 25 Jun 2022 11:55:38 +0100 Subject: io_uring: let to set a range for file slot allocation From recently io_uring provides an option to allocate a file index for operation registering fixed files. However, it's utterly unusable with mixed approaches when for a part of files the userspace knows better where to place it, as it may race and users don't have any sane way to pick a slot and hoping it will not be taken. Let the userspace to register a range of fixed file slots in which the auto-allocation happens. The use case is splittting the fixed table in two parts, where on of them is used for auto-allocation and another for slot-specified operations. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/66ab0394e436f38437cf7c44676e1920d09687ad.1656154403.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 3ca8f363f504..26ef11e978d4 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -233,6 +233,9 @@ struct io_ring_ctx { unsigned long check_cq; + unsigned int file_alloc_start; + unsigned int file_alloc_end; + struct { /* * We cache a range of free CQEs we can use, once exhausted it -- cgit v1.2.3 From 9b797a37c4bd83b03cedcfbd15852b836f5e562c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Jul 2022 14:16:20 -0600 Subject: io_uring: add abstraction around apoll cache In preparation for adding limits, and one more user, abstract out the core bits of the allocation+free cache. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 26ef11e978d4..b548da03b563 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -158,6 +158,10 @@ struct io_ev_fd { struct rcu_head rcu; }; +struct io_alloc_cache { + struct hlist_head list; +}; + struct io_ring_ctx { /* const or read-mostly hot data */ struct { @@ -216,7 +220,7 @@ struct io_ring_ctx { struct io_hash_table cancel_table_locked; struct list_head cq_overflow_list; - struct list_head apoll_cache; + struct io_alloc_cache apoll_cache; struct xarray personalities; u32 pers_next; } ____cacheline_aligned_in_smp; -- cgit v1.2.3 From 9731bc9855dc169f27433fef3c4d0ff3496c512d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Jul 2022 14:20:54 -0600 Subject: io_uring: impose max limit on apoll cache Caches like this tend to grow to the peak size, and then never get any smaller. Impose a max limit on the size, to prevent it from growing too big. A somewhat randomly chosen 512 is the max size we'll allow the cache to get. If a batch of frees come in and would bring it over that, we simply start kfree'ing the surplus. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index b548da03b563..bf8f95332eda 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -160,6 +160,7 @@ struct io_ev_fd { struct io_alloc_cache { struct hlist_head list; + unsigned int nr_cached; }; struct io_ring_ctx { -- cgit v1.2.3 From 43e0bbbd0b0e30d232fd8e9e908125b5c49a9fbc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Jul 2022 14:30:09 -0600 Subject: io_uring: add netmsg cache For recvmsg/sendmsg, if they don't complete inline, we currently need to allocate a struct io_async_msghdr for each request. This is a somewhat large struct. Hook up sendmsg/recvmsg to use the io_alloc_cache. This reduces the alloc + free overhead considerably, yielding 4-5% of extra performance running netbench. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index bf8f95332eda..d54b8b7e0746 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -222,8 +222,7 @@ struct io_ring_ctx { struct io_hash_table cancel_table_locked; struct list_head cq_overflow_list; struct io_alloc_cache apoll_cache; - struct xarray personalities; - u32 pers_next; + struct io_alloc_cache netmsg_cache; } ____cacheline_aligned_in_smp; /* IRQ completion list, under ->completion_lock */ @@ -241,6 +240,9 @@ struct io_ring_ctx { unsigned int file_alloc_start; unsigned int file_alloc_end; + struct xarray personalities; + u32 pers_next; + struct { /* * We cache a range of free CQEs we can use, once exhausted it -- cgit v1.2.3 From 7fa875b8e53c288d616234b9daf417b0650ce1cc Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Thu, 14 Jul 2022 04:02:56 -0700 Subject: net: copy from user before calling __copy_msghdr this is in preparation for multishot receive from io_uring, where it needs to have access to the original struct user_msghdr. functionally this should be a no-op. Acked-by: Paolo Abeni Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220714110258.1336200-2-dylany@fb.com Signed-off-by: Jens Axboe --- include/linux/socket.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 17311ad9f9af..be24f1c8568a 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -416,10 +416,9 @@ extern int recvmsg_copy_msghdr(struct msghdr *msg, struct user_msghdr __user *umsg, unsigned flags, struct sockaddr __user **uaddr, struct iovec **iov); -extern int __copy_msghdr_from_user(struct msghdr *kmsg, - struct user_msghdr __user *umsg, - struct sockaddr __user **save_addr, - struct iovec __user **uiov, size_t *nsegs); +extern int __copy_msghdr(struct msghdr *kmsg, + struct user_msghdr *umsg, + struct sockaddr __user **save_addr); /* helpers which do the actual work for syscalls */ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, -- cgit v1.2.3 From fe6c9c6e3e3e332b998393d214fba9d09ab0acb0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 10:51:46 -0700 Subject: mm: Add balance_dirty_pages_ratelimited_flags() function This adds the helper function balance_dirty_pages_ratelimited_flags(). It adds the parameter flags to balance_dirty_pages_ratelimited(). The flags parameter is passed to balance_dirty_pages(). For async buffered writes the flag value will be BDP_ASYNC. If balance_dirty_pages() gets called for async buffered write, we don't want to wait. Instead we need to indicate to the caller that throttling is needed so that it can stop writing and offload the rest of the write to a context that can block. The new helper function is also used by balance_dirty_pages_ratelimited(). Signed-off-by: Jan Kara Signed-off-by: Stefan Roesch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623175157.1715274-4-shr@fb.com [axboe: fix kerneltest bot 'ret' issue] Signed-off-by: Jens Axboe --- include/linux/writeback.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index da21d63f70e2..b8c9610c2313 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -364,7 +364,14 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); void wb_update_bandwidth(struct bdi_writeback *wb); + +/* Invoke balance dirty pages in async mode. */ +#define BDP_ASYNC 0x0001 + void balance_dirty_pages_ratelimited(struct address_space *mapping); +int balance_dirty_pages_ratelimited_flags(struct address_space *mapping, + unsigned int flags); + bool wb_over_bg_thresh(struct bdi_writeback *wb); typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, -- cgit v1.2.3 From 8017553980d0bbfef3e66c583363828565afd6da Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 23 Jun 2022 10:51:50 -0700 Subject: fs: add a FMODE_BUF_WASYNC flags for f_mode This introduces the flag FMODE_BUF_WASYNC. If devices support async buffered writes, this flag can be set. It also modifies the check in generic_write_checks to take async buffered writes into consideration. Signed-off-by: Stefan Roesch Reviewed-by: Christoph Hellwig Reviewed-by: Christian Brauner (Microsoft) Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20220623175157.1715274-8-shr@fb.com Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ad5e3520fae..bc84847c201e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -180,6 +180,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File supports async buffered reads */ #define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000) +/* File supports async nowait buffered writes */ +#define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000) + /* * Attribute flags. These should be or-ed together to figure out what * has been changed! -- cgit v1.2.3 From 66fa3cedf16abc82d19b943e3289c82e685419d5 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 23 Jun 2022 10:51:53 -0700 Subject: fs: Add async write file modification handling. This adds a file_modified_async() function to return -EAGAIN if the request either requires to remove privileges or needs to update the file modification time. This is required for async buffered writes, so the request gets handled in the io worker of io-uring. Signed-off-by: Stefan Roesch Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Christian Brauner (Microsoft) Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20220623175157.1715274-11-shr@fb.com Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index bc84847c201e..c0d99b5a166b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2390,6 +2390,7 @@ static inline void file_accessed(struct file *file) } extern int file_modified(struct file *file); +int kiocb_modified(struct kiocb *iocb); int sync_inode_metadata(struct inode *inode, int wait); -- cgit v1.2.3 From e70cb60893ca64b7df06864aa16c1cf6d6c671db Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:37 +0100 Subject: io_uring: export io_put_task() Make io_put_task() available to non-core parts of io_uring, we'll need it for notification infrastructure. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/3686807d4c03b72e389947b0e8692d4d44334ef0.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index d54b8b7e0746..368c34d14b13 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -4,6 +4,7 @@ #include #include #include +#include #include struct io_wq_work_node { @@ -43,6 +44,30 @@ struct io_hash_table { unsigned hash_bits; }; +/* + * Arbitrary limit, can be raised if need be + */ +#define IO_RINGFD_REG_MAX 16 + +struct io_uring_task { + /* submission side */ + int cached_refs; + const struct io_ring_ctx *last; + struct io_wq *io_wq; + struct file *registered_rings[IO_RINGFD_REG_MAX]; + + struct xarray xa; + struct wait_queue_head wait; + atomic_t in_idle; + atomic_t inflight_tracked; + struct percpu_counter inflight; + + struct { /* task_work */ + struct llist_head task_list; + struct callback_head task_work; + } ____cacheline_aligned_in_smp; +}; + struct io_uring { u32 head ____cacheline_aligned_in_smp; u32 tail ____cacheline_aligned_in_smp; -- cgit v1.2.3 From eb42cebb2cf24c48f60c32856a4bba93d42659c8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:38 +0100 Subject: io_uring: add zc notification infrastructure Add internal part of send zerocopy notifications. There are two main structures, the first one is struct io_notif, which carries inside struct ubuf_info and maps 1:1 to it. io_uring will be binding a number of zerocopy send requests to it and ask to complete (aka flush) it. When flushed and all attached requests and skbs complete, it'll generate one and only one CQE. There are intended to be passed into the network layer as struct msghdr::msg_ubuf. The second concept is notification slots. The userspace will be able to register an array of slots and subsequently addressing them by the index in the array. Slots are independent of each other. Each slot can have only one notifier at a time (called active notifier) but many notifiers during the lifetime. When active, a notifier not going to post any completion but the userspace can attach requests to it by specifying the corresponding slot while issueing send zc requests. Eventually, the userspace will want to "flush" the notifier losing any way to attach new requests to it, however it can use the next atomatically added notifier of this slot or of any other slot. When the network layer is done with all enqueued skbs attached to a notifier and doesn't need the specified in them user data, the flushed notifier will post a CQE. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/3ecf54c31a85762bf679b0a432c9f43ecf7e61cc.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 368c34d14b13..f7fab3758cb9 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -34,6 +34,9 @@ struct io_file_table { unsigned int alloc_hint; }; +struct io_notif; +struct io_notif_slot; + struct io_hash_bucket { spinlock_t lock; struct hlist_head list; @@ -237,6 +240,8 @@ struct io_ring_ctx { unsigned nr_user_files; unsigned nr_user_bufs; struct io_mapped_ubuf **user_bufs; + struct io_notif_slot *notif_slots; + unsigned nr_notif_slots; struct io_submit_state submit_state; -- cgit v1.2.3 From eb4a299b2f95437af6183946c2a2e850621cefdb Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:39 +0100 Subject: io_uring: cache struct io_notif kmalloc'ing struct io_notif is too expensive when done frequently, cache them as many other resources in io_uring. Keep two list, the first one is from where we're getting notifiers, it's protected by ->uring_lock. The second is protected by ->completion_lock, to which we queue released notifiers. Then we splice one list into another when needed. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9dec18f7fcbab9f4bd40b96e5ae158b119945230.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index f7fab3758cb9..144493cbadb5 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -249,6 +249,9 @@ struct io_ring_ctx { struct xarray io_bl_xa; struct list_head io_buffers_cache; + /* struct io_notif cache, protected by uring_lock */ + struct list_head notif_list; + struct io_hash_table cancel_table_locked; struct list_head cq_overflow_list; struct io_alloc_cache apoll_cache; @@ -259,6 +262,10 @@ struct io_ring_ctx { struct io_wq_work_list locked_free_list; unsigned int locked_free_nr; + /* struct io_notif cache protected by completion_lock */ + struct list_head notif_list_locked; + unsigned int notif_locked_nr; + const struct cred *sq_creds; /* cred used for __io_sq_thread() */ struct io_sq_data *sq_data; /* if using sq thread polling */ -- cgit v1.2.3 From 9d9b010f12cc4e254bbba32d79c965b564a8957f Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Sun, 24 Jul 2022 23:21:52 +0100 Subject: irqchip/mmp: Declare init functions in common header file The functions icu_init_irq and mmp2_init_icu are exported from this code, so declare them in the header file to avoid the following sparse warnings: drivers/irqchip/irq-mmp.c:248:13: warning: symbol 'icu_init_irq' was not declared. Should it be static? drivers/irqchip/irq-mmp.c:271:13: warning: symbol 'mmp2_init_icu' was not declared. Should it be static? Signed-off-by: Ben Dooks [maz: fixup commit message] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220724222152.551850-1-ben-linux@fluff.org --- include/linux/irqchip/mmp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/mmp.h b/include/linux/irqchip/mmp.h index cb8455c87c8a..aa1813749a4f 100644 --- a/include/linux/irqchip/mmp.h +++ b/include/linux/irqchip/mmp.h @@ -4,4 +4,7 @@ extern struct irq_chip icu_irq_chip; +extern void icu_init_irq(void); +extern void mmp2_init_icu(void); + #endif /* __IRQCHIP_MMP_H */ -- cgit v1.2.3 From d349ab99eec7ab0f977fc4aac27aa476907acf90 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 17 Jul 2022 12:35:24 +0200 Subject: random: handle archrandom with multiple longs The archrandom interface was originally designed for x86, which supplies RDRAND/RDSEED for receiving random words into registers, resulting in one function to generate an int and another to generate a long. However, other architectures don't follow this. On arm64, the SMCCC TRNG interface can return between one and three longs. On s390, the CPACF TRNG interface can return arbitrary amounts, with four longs having the same cost as one. On UML, the os_getrandom() interface can return arbitrary amounts. So change the api signature to take a "max_longs" parameter designating the maximum number of longs requested, and then return the number of longs generated. Since callers need to check this return value and loop anyway, each arch implementation does not bother implementing its own loop to try again to fill the maximum number of longs. Additionally, all existing callers pass in a constant max_longs parameter. Taken together, these two things mean that the codegen doesn't really change much for one-word-at-a-time platforms, while performance is greatly improved on platforms such as s390. Acked-by: Heiko Carstens Acked-by: Catalin Marinas Acked-by: Mark Rutland Acked-by: Michael Ellerman Acked-by: Borislav Petkov Signed-off-by: Jason A. Donenfeld --- include/linux/random.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 865770e29f3e..3fec206487f6 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -112,19 +112,19 @@ declare_get_random_var_wait(long, unsigned long) * Called from the boot CPU during startup; not valid to call once * secondary CPUs are up and preemption is possible. */ -#ifndef arch_get_random_seed_long_early -static inline bool __init arch_get_random_seed_long_early(unsigned long *v) +#ifndef arch_get_random_seed_longs_early +static inline size_t __init arch_get_random_seed_longs_early(unsigned long *v, size_t max_longs) { WARN_ON(system_state != SYSTEM_BOOTING); - return arch_get_random_seed_long(v); + return arch_get_random_seed_longs(v, max_longs); } #endif -#ifndef arch_get_random_long_early -static inline bool __init arch_get_random_long_early(unsigned long *v) +#ifndef arch_get_random_longs_early +static inline bool __init arch_get_random_longs_early(unsigned long *v, size_t max_longs) { WARN_ON(system_state != SYSTEM_BOOTING); - return arch_get_random_long(v); + return arch_get_random_longs(v, max_longs); } #endif -- cgit v1.2.3 From 7ffcdaa670164a2ad3844a5ef6df5423782ba290 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 25 Jul 2022 09:32:21 -0400 Subject: SUNRPC expose functions for offline remote xprt functionality Re-arrange the code that make offline transport and delete transport callable functions. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 522bbf937957..0d51b9f9ea37 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -505,4 +505,7 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) return test_and_set_bit(XPRT_BINDING, &xprt->state); } +void xprt_set_offline_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps); +void xprt_set_online_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps); +void xprt_delete_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps); #endif /* _LINUX_SUNRPC_XPRT_H */ -- cgit v1.2.3 From 895245ccea251ff54ea19bc364c9a49007918115 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 25 Jul 2022 09:32:22 -0400 Subject: SUNRPC add function to offline remove trunkable transports Iterate thru available transports in the xprt_switch for all trunkable transports offline and possibly remote them as well. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 90501404fa49..d14333f4947a 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -234,6 +234,7 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *, struct rpc_xprt_switch *, struct rpc_xprt *, void *); +void rpc_clnt_manage_trunked_xprts(struct rpc_clnt *); const char *rpc_proc_name(const struct rpc_task *task); -- cgit v1.2.3 From 95d0d30c66b855f614e677b8cd0455eed0765a6f Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 25 Jul 2022 09:32:24 -0400 Subject: SUNRPC create an iterator to list only OFFLINE xprts Create a new iterator helper that will go thru the all the transports in the switch and return transports that are marked OFFLINE. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtmultipath.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h index bbb8a5fa0816..688ca7eb1d01 100644 --- a/include/linux/sunrpc/xprtmultipath.h +++ b/include/linux/sunrpc/xprtmultipath.h @@ -63,6 +63,9 @@ extern void xprt_iter_init(struct rpc_xprt_iter *xpi, extern void xprt_iter_init_listall(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps); +extern void xprt_iter_init_listoffline(struct rpc_xprt_iter *xpi, + struct rpc_xprt_switch *xps); + extern void xprt_iter_destroy(struct rpc_xprt_iter *xpi); extern struct rpc_xprt_switch *xprt_iter_xchg_switch( -- cgit v1.2.3 From 9368fd6c75053630e95a6dbd17c9522e82101276 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 25 Jul 2022 09:32:25 -0400 Subject: SUNRPC enable back offline transports in trunking discovery When we are adding a transport to a xprt_switch that's already on the list but has been marked OFFLINE, then make the state ONLINE since it's been tested now. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index d14333f4947a..71a3a1dd7e81 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -242,6 +242,7 @@ void rpc_clnt_xprt_switch_put(struct rpc_clnt *); void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, const struct sockaddr *sap); +void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt); void rpc_cleanup_clids(void); static inline int rpc_reply_expected(struct rpc_task *task) -- cgit v1.2.3 From 497e6464d6adcee64f071b18fc826e63cfd2f0a5 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 25 Jul 2022 09:32:26 -0400 Subject: SUNRPC create an rpc function that allows xprt removal from rpc_clnt Expose a function that allows a removal of xprt from the rpc_clnt. When called from NFS that's running a trunked transport then don't decrement the active transport counter. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/xprtmultipath.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 71a3a1dd7e81..7a43fd514398 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -240,6 +240,7 @@ const char *rpc_proc_name(const struct rpc_task *task); void rpc_clnt_xprt_switch_put(struct rpc_clnt *); void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); +void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, const struct sockaddr *sap); void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt); diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h index 688ca7eb1d01..9fff0768d942 100644 --- a/include/linux/sunrpc/xprtmultipath.h +++ b/include/linux/sunrpc/xprtmultipath.h @@ -55,7 +55,7 @@ extern void rpc_xprt_switch_set_roundrobin(struct rpc_xprt_switch *xps); extern void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt); extern void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, - struct rpc_xprt *xprt); + struct rpc_xprt *xprt, bool offline); extern void xprt_iter_init(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps); -- cgit v1.2.3 From 273d6aed9e5a1859dda15256f45561315c3d237a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 25 Jul 2022 09:32:29 -0400 Subject: SUNRPC export xprt_iter_rewind function Make xprt_iter_rewind callable outside of xprtmultipath.c Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtmultipath.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h index 9fff0768d942..c0514c684b2c 100644 --- a/include/linux/sunrpc/xprtmultipath.h +++ b/include/linux/sunrpc/xprtmultipath.h @@ -68,6 +68,8 @@ extern void xprt_iter_init_listoffline(struct rpc_xprt_iter *xpi, extern void xprt_iter_destroy(struct rpc_xprt_iter *xpi); +extern void xprt_iter_rewind(struct rpc_xprt_iter *xpi); + extern struct rpc_xprt_switch *xprt_iter_xchg_switch( struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *newswitch); -- cgit v1.2.3 From 92cc04f60ab4ae199eee507e5cd4d5aa6c722e9c Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 25 Jul 2022 09:32:30 -0400 Subject: SUNRPC create a function that probes only offline transports For only offline transports, attempt to check connectivity via a NULL call and, if that succeeds, call a provided session trunking detection function. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 7a43fd514398..75eea5ebb179 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -235,6 +235,8 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *, struct rpc_xprt *, void *); void rpc_clnt_manage_trunked_xprts(struct rpc_clnt *); +void rpc_clnt_probe_trunked_xprts(struct rpc_clnt *, + struct rpc_add_xprt_test *); const char *rpc_proc_name(const struct rpc_task *task); -- cgit v1.2.3 From 39ade048a32ea653b94dcfbf816b0b13a6be8a33 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Wed, 6 Jul 2022 13:15:19 +0200 Subject: highmem: Make __kunmap_{local,atomic}() take const void pointer __kunmap_ {local,atomic}() currently take pointers to void. However, this is semantically incorrect, since these functions do not change the memory their arguments point to. Therefore, make this semantics explicit by modifying the __kunmap_{local,atomic}() prototypes to take pointers to const void. As a side effect, compilers may produce more efficient code. Acked-by: Andrew Morton Acked-by: Helge Deller # parisc Suggested-by: David Sterba Suggested-by: Ira Weiny Reviewed-by: Ira Weiny Signed-off-by: Fabio M. De Francesco Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/linux/highmem-internal.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index cddb42ff0473..034b1106d022 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -8,7 +8,7 @@ #ifdef CONFIG_KMAP_LOCAL void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); void *__kmap_local_page_prot(struct page *page, pgprot_t prot); -void kunmap_local_indexed(void *vaddr); +void kunmap_local_indexed(const void *vaddr); void kmap_local_fork(struct task_struct *tsk); void __kmap_local_sched_out(void); void __kmap_local_sched_in(void); @@ -89,7 +89,7 @@ static inline void *kmap_local_pfn(unsigned long pfn) return __kmap_local_pfn_prot(pfn, kmap_prot); } -static inline void __kunmap_local(void *vaddr) +static inline void __kunmap_local(const void *vaddr) { kunmap_local_indexed(vaddr); } @@ -121,7 +121,7 @@ static inline void *kmap_atomic_pfn(unsigned long pfn) return __kmap_local_pfn_prot(pfn, kmap_prot); } -static inline void __kunmap_atomic(void *addr) +static inline void __kunmap_atomic(const void *addr) { kunmap_local_indexed(addr); pagefault_enable(); @@ -197,7 +197,7 @@ static inline void *kmap_local_pfn(unsigned long pfn) return kmap_local_page(pfn_to_page(pfn)); } -static inline void __kunmap_local(void *addr) +static inline void __kunmap_local(const void *addr) { #ifdef ARCH_HAS_FLUSH_ON_KUNMAP kunmap_flush_on_unmap(addr); @@ -224,7 +224,7 @@ static inline void *kmap_atomic_pfn(unsigned long pfn) return kmap_atomic(pfn_to_page(pfn)); } -static inline void __kunmap_atomic(void *addr) +static inline void __kunmap_atomic(const void *addr) { #ifdef ARCH_HAS_FLUSH_ON_KUNMAP kunmap_flush_on_unmap(addr); -- cgit v1.2.3 From 65ea1b66482f415d51cd46515b02477257330339 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Sat, 9 Jul 2022 08:18:38 +0900 Subject: block: add bdev_max_segments() helper Add bdev_max_segments() like other queue parameters. Reviewed-by: Johannes Thumshirn Reviewed-by: Jens Axboe Reviewed-by: Christoph Hellwig Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2f7b43444c5f..62e3ff52ab03 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1206,6 +1206,11 @@ bdev_max_zone_append_sectors(struct block_device *bdev) return queue_max_zone_append_sectors(bdev_get_queue(bdev)); } +static inline unsigned int bdev_max_segments(struct block_device *bdev) +{ + return queue_max_segments(bdev_get_queue(bdev)); +} + static inline unsigned queue_logical_block_size(const struct request_queue *q) { int retval = 512; -- cgit v1.2.3 From 44abdd1646e1fbfb781972c0bffc90b4eb3e87b3 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 22 Jul 2022 19:02:51 -0700 Subject: vfio: Pass in starting IOVA to vfio_pin/unpin_pages API The vfio_pin/unpin_pages() so far accepted arrays of PFNs of user IOVA. Among all three callers, there was only one caller possibly passing in a non-contiguous PFN list, which is now ensured to have contiguous PFN inputs too. Pass in the starting address with "iova" alone to simplify things, so callers no longer need to maintain a PFN list or to pin/unpin one page at a time. This also allows VFIO to use more efficient implementations of pin/unpin_pages. For now, also update vfio_iommu_type1 to fit this new parameter too, while keeping its input intact (being user_iova) since we don't want to spend too much effort swapping its parameters and local variables at that level. Reviewed-by: Christoph Hellwig Reviewed-by: Kirti Wankhede Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Tony Krowiak Acked-by: Eric Farman Tested-by: Terrence Xu Tested-by: Eric Farman Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20220723020256.30081-6-nicolinc@nvidia.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 9f7d74c24925..9e3b6abcf890 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -161,10 +161,9 @@ bool vfio_file_has_dev(struct file *file, struct vfio_device *device); #define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long)) -int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, +int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, int npage, int prot, unsigned long *phys_pfn); -void vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, - int npage); +void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage); int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, size_t len, bool write); -- cgit v1.2.3 From 8561aa4fb7d72011c2352af2b1b9caf588942181 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 22 Jul 2022 19:02:54 -0700 Subject: vfio: Rename user_iova of vfio_dma_rw() Following the updated vfio_pin/unpin_pages(), use the simpler "iova". Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Tested-by: Terrence Xu Tested-by: Eric Farman Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20220723020256.30081-9-nicolinc@nvidia.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 9e3b6abcf890..acefd663e63b 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -164,7 +164,7 @@ bool vfio_file_has_dev(struct file *file, struct vfio_device *device); int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, int npage, int prot, unsigned long *phys_pfn); void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage); -int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, +int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, size_t len, bool write); /* -- cgit v1.2.3 From 34a255e67615995f729254307a0581c143e03752 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 22 Jul 2022 19:02:56 -0700 Subject: vfio: Replace phys_pfn with pages for vfio_pin_pages() Most of the callers of vfio_pin_pages() want "struct page *" and the low-level mm code to pin pages returns a list of "struct page *" too. So there's no gain in converting "struct page *" to PFN in between. Replace the output parameter "phys_pfn" list with a "pages" list, to simplify callers. This also allows us to replace the vfio_iommu_type1 implementation with a more efficient one. And drop the pfn_valid check in the gvt code, as there is no need to do such a check at a page-backed struct page pointer. For now, also update vfio_iommu_type1 to fit this new parameter too. Reviewed-by: Christoph Hellwig Reviewed-by: Kirti Wankhede Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Acked-by: Eric Farman Tested-by: Terrence Xu Tested-by: Eric Farman Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20220723020256.30081-11-nicolinc@nvidia.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index acefd663e63b..e05ddc6fe6a5 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -162,7 +162,7 @@ bool vfio_file_has_dev(struct file *file, struct vfio_device *device); #define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long)) int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, - int npage, int prot, unsigned long *phys_pfn); + int npage, int prot, struct page **pages); void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage); int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, size_t len, bool write); -- cgit v1.2.3 From e948d32c54fa45cc1601c98956bdbbf5f17a3db2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 23 Jul 2022 19:57:17 +0200 Subject: video: fbdev: imxfb: Drop platform data support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No source file but the driver itself includes the header containing the platform data definition. The last user is gone since commit 8485adf17a15 ("ARM: imx: Remove imx device directory"). So we can safely drop platform data support. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- include/linux/platform_data/video-imxfb.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/video-imxfb.h b/include/linux/platform_data/video-imxfb.h index 02812651af7d..b80a156a6617 100644 --- a/include/linux/platform_data/video-imxfb.h +++ b/include/linux/platform_data/video-imxfb.h @@ -55,16 +55,4 @@ struct imx_fb_videomode { unsigned char bpp; }; -struct imx_fb_platform_data { - struct imx_fb_videomode *mode; - int num_modes; - - u_int pwmr; - u_int lscr1; - u_int dmacr; - - int (*init)(struct platform_device *); - void (*exit)(struct platform_device *); -}; - #endif /* ifndef __MACH_IMXFB_H__ */ -- cgit v1.2.3 From e2279cc92919f37b4af985cb28ae350bb3e62e71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 23 Jul 2022 19:57:18 +0200 Subject: video: fbdev: imxfb: Drop unused symbols from header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only file that includes is the imxfb driver. Drop all symbols that are unused there. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- include/linux/platform_data/video-imxfb.h | 35 ------------------------------- 1 file changed, 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/video-imxfb.h b/include/linux/platform_data/video-imxfb.h index b80a156a6617..a16837c5e43c 100644 --- a/include/linux/platform_data/video-imxfb.h +++ b/include/linux/platform_data/video-imxfb.h @@ -8,45 +8,10 @@ #include #define PCR_TFT (1 << 31) -#define PCR_COLOR (1 << 30) -#define PCR_PBSIZ_1 (0 << 28) -#define PCR_PBSIZ_2 (1 << 28) -#define PCR_PBSIZ_4 (2 << 28) -#define PCR_PBSIZ_8 (3 << 28) -#define PCR_BPIX_1 (0 << 25) -#define PCR_BPIX_2 (1 << 25) -#define PCR_BPIX_4 (2 << 25) #define PCR_BPIX_8 (3 << 25) #define PCR_BPIX_12 (4 << 25) #define PCR_BPIX_16 (5 << 25) #define PCR_BPIX_18 (6 << 25) -#define PCR_PIXPOL (1 << 24) -#define PCR_FLMPOL (1 << 23) -#define PCR_LPPOL (1 << 22) -#define PCR_CLKPOL (1 << 21) -#define PCR_OEPOL (1 << 20) -#define PCR_SCLKIDLE (1 << 19) -#define PCR_END_SEL (1 << 18) -#define PCR_END_BYTE_SWAP (1 << 17) -#define PCR_REV_VS (1 << 16) -#define PCR_ACD_SEL (1 << 15) -#define PCR_ACD(x) (((x) & 0x7f) << 8) -#define PCR_SCLK_SEL (1 << 7) -#define PCR_SHARP (1 << 6) -#define PCR_PCD(x) ((x) & 0x3f) - -#define PWMR_CLS(x) (((x) & 0x1ff) << 16) -#define PWMR_LDMSK (1 << 15) -#define PWMR_SCR1 (1 << 10) -#define PWMR_SCR0 (1 << 9) -#define PWMR_CC_EN (1 << 8) -#define PWMR_PW(x) ((x) & 0xff) - -#define LSCR1_PS_RISE_DELAY(x) (((x) & 0x7f) << 26) -#define LSCR1_CLS_RISE_DELAY(x) (((x) & 0x3f) << 16) -#define LSCR1_REV_TOGGLE_DELAY(x) (((x) & 0xf) << 8) -#define LSCR1_GRAY2(x) (((x) & 0xf) << 4) -#define LSCR1_GRAY1(x) (((x) & 0xf)) struct imx_fb_videomode { struct fb_videomode mode; -- cgit v1.2.3 From ded77a74ee6bc3dea72ad41129823a812e4b64f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 23 Jul 2022 19:57:19 +0200 Subject: video: fbdev: imxfb: Fold into only user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No source file but the driver itself includes the header containing the platform data definition. The last user is gone since commit 8485adf17a15 ("ARM: imx: Remove imx device directory"). Move the remaining symbols directly into the driver and remove the then unused header file. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- include/linux/platform_data/video-imxfb.h | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 include/linux/platform_data/video-imxfb.h (limited to 'include/linux') diff --git a/include/linux/platform_data/video-imxfb.h b/include/linux/platform_data/video-imxfb.h deleted file mode 100644 index a16837c5e43c..000000000000 --- a/include/linux/platform_data/video-imxfb.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This structure describes the machine which we are running on. - */ -#ifndef __MACH_IMXFB_H__ -#define __MACH_IMXFB_H__ - -#include - -#define PCR_TFT (1 << 31) -#define PCR_BPIX_8 (3 << 25) -#define PCR_BPIX_12 (4 << 25) -#define PCR_BPIX_16 (5 << 25) -#define PCR_BPIX_18 (6 << 25) - -struct imx_fb_videomode { - struct fb_videomode mode; - u32 pcr; - bool aus_mode; - unsigned char bpp; -}; - -#endif /* ifndef __MACH_IMXFB_H__ */ -- cgit v1.2.3 From 42399301203e3cddef36cde457228f9247618313 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 8 Jul 2022 10:50:52 -0600 Subject: lib/scatterlist: add flag for indicating P2PDMA segments in an SGL Introduce a dma_flags field in struct scatterlist. These flags will be used by dma_[un]map_sg_p2pdma() to determine when a given SGL segments dma_address points to a PCI bus address. dma_unmap_sg_p2pdma() will need to perform different cleanup when a segment is marked as a bus address. The dma_flags field will fit in the existing padding on 64BIT systems (assuming CONFIG_NEED_SG_DMA_LENGTH is also set). The new bit will only be used when CONFIG_PCI_P2PDMA is set; this means PCI P2PDMA will require CONFIG_64BIT. This should be acceptable as the majority of P2PDMA use cases are restricted to newer root complexes and roughly require the extra address space for memory BARs used in the transactions. Signed-off-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig --- include/linux/scatterlist.h | 69 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 7ff9d6386c12..375a5e90d86a 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -16,6 +16,9 @@ struct scatterlist { #ifdef CONFIG_NEED_SG_DMA_LENGTH unsigned int dma_length; #endif +#ifdef CONFIG_PCI_P2PDMA + unsigned int dma_flags; +#endif }; /* @@ -245,6 +248,72 @@ static inline void sg_unmark_end(struct scatterlist *sg) sg->page_link &= ~SG_END; } +/* + * CONFGI_PCI_P2PDMA depends on CONFIG_64BIT which means there is 4 bytes + * in struct scatterlist (assuming also CONFIG_NEED_SG_DMA_LENGTH is set). + * Use this padding for DMA flags bits to indicate when a specific + * dma address is a bus address. + */ +#ifdef CONFIG_PCI_P2PDMA + +#define SG_DMA_BUS_ADDRESS (1 << 0) + +/** + * sg_dma_is_bus address - Return whether a given segment was marked + * as a bus address + * @sg: SG entry + * + * Description: + * Returns true if sg_dma_mark_bus_address() has been called on + * this segment. + **/ +static inline bool sg_is_dma_bus_address(struct scatterlist *sg) +{ + return sg->dma_flags & SG_DMA_BUS_ADDRESS; +} + +/** + * sg_dma_mark_bus address - Mark the scatterlist entry as a bus address + * @sg: SG entry + * + * Description: + * Marks the passed in sg entry to indicate that the dma_address is + * a bus address and doesn't need to be unmapped. This should only be + * used by dma_map_sg() implementations to mark bus addresses + * so they can be properly cleaned up in dma_unmap_sg(). + **/ +static inline void sg_dma_mark_bus_address(struct scatterlist *sg) +{ + sg->dma_flags |= SG_DMA_BUS_ADDRESS; +} + +/** + * sg_unmark_bus_address - Unmark the scatterlist entry as a bus address + * @sg: SG entry + * + * Description: + * Clears the bus address mark. + **/ +static inline void sg_dma_unmark_bus_address(struct scatterlist *sg) +{ + sg->dma_flags &= ~SG_DMA_BUS_ADDRESS; +} + +#else + +static inline bool sg_is_dma_bus_address(struct scatterlist *sg) +{ + return false; +} +static inline void sg_dma_mark_bus_address(struct scatterlist *sg) +{ +} +static inline void sg_dma_unmark_bus_address(struct scatterlist *sg) +{ +} + +#endif + /** * sg_phys - Return physical address of an sg entry * @sg: SG entry -- cgit v1.2.3 From 5e180ff326b43bd3fb72892e1083b2707159aa6e Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 8 Jul 2022 10:50:54 -0600 Subject: PCI/P2PDMA: Introduce helpers for dma_map_sg implementations Add pci_p2pdma_map_segment() as a helper for dma_map_sg() implementations. It takes an scatterlist segment that must point to a pci_p2pdma struct page and will map it if the mapping requires a bus address. The return value indicates whether the mapping required a bus address or whether the caller still needs to map the segment normally. If the segment should not be mapped, -EREMOTEIO is returned. This helper uses a state structure to track the changes to the pgmap across calls and avoid needing to lookup into the xarray for every page. The prototype for the helper is added to dma-map-ops.h as it is only useful to dma map implementations and don't need to pollute the public pci-p2pdma header. Signed-off-by: Logan Gunthorpe Acked-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig --- include/linux/dma-map-ops.h | 53 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 98ceba6fa848..99cec59dbfcb 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -380,4 +380,57 @@ static inline void debug_dma_dump_mappings(struct device *dev) extern const struct dma_map_ops dma_dummy_ops; +enum pci_p2pdma_map_type { + /* + * PCI_P2PDMA_MAP_UNKNOWN: Used internally for indicating the mapping + * type hasn't been calculated yet. Functions that return this enum + * never return this value. + */ + PCI_P2PDMA_MAP_UNKNOWN = 0, + + /* + * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will + * traverse the host bridge and the host bridge is not in the + * allowlist. DMA Mapping routines should return an error when + * this is returned. + */ + PCI_P2PDMA_MAP_NOT_SUPPORTED, + + /* + * PCI_P2PDMA_BUS_ADDR: Indicates that two devices can talk to + * each other directly through a PCI switch and the transaction will + * not traverse the host bridge. Such a mapping should program + * the DMA engine with PCI bus addresses. + */ + PCI_P2PDMA_MAP_BUS_ADDR, + + /* + * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk + * to each other, but the transaction traverses a host bridge on the + * allowlist. In this case, a normal mapping either with CPU physical + * addresses (in the case of dma-direct) or IOVA addresses (in the + * case of IOMMUs) should be used to program the DMA engine. + */ + PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, +}; + +struct pci_p2pdma_map_state { + struct dev_pagemap *pgmap; + int map; + u64 bus_off; +}; + +#ifdef CONFIG_PCI_P2PDMA +enum pci_p2pdma_map_type +pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, + struct scatterlist *sg); +#else /* CONFIG_PCI_P2PDMA */ +static inline enum pci_p2pdma_map_type +pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, + struct scatterlist *sg) +{ + return PCI_P2PDMA_MAP_NOT_SUPPORTED; +} +#endif /* CONFIG_PCI_P2PDMA */ + #endif /* _LINUX_DMA_MAP_OPS_H */ -- cgit v1.2.3 From 159bf19270e80b5bc4b13aa88072dcb390b4d297 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 8 Jul 2022 10:50:57 -0600 Subject: dma-mapping: add flags to dma_map_ops to indicate PCI P2PDMA support Add a flags member to the dma_map_ops structure with one flag to indicate support for PCI P2PDMA. Also, add a helper to check if a device supports PCI P2PDMA. Signed-off-by: Logan Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig --- include/linux/dma-map-ops.h | 10 ++++++++++ include/linux/dma-mapping.h | 5 +++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 99cec59dbfcb..010df04358aa 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -11,7 +11,17 @@ struct cma; +/* + * Values for struct dma_map_ops.flags: + * + * DMA_F_PCI_P2PDMA_SUPPORTED: Indicates the dma_map_ops implementation can + * handle PCI P2PDMA pages in the map_sg/unmap_sg operation. + */ +#define DMA_F_PCI_P2PDMA_SUPPORTED (1 << 0) + struct dma_map_ops { + unsigned int flags; + void *(*alloc)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index fe3849434b2a..25a30906289d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -140,6 +140,7 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, unsigned long attrs); bool dma_can_mmap(struct device *dev); int dma_supported(struct device *dev, u64 mask); +bool dma_pci_p2pdma_supported(struct device *dev); int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); @@ -251,6 +252,10 @@ static inline int dma_supported(struct device *dev, u64 mask) { return 0; } +static inline bool dma_pci_p2pdma_supported(struct device *dev) +{ + return false; +} static inline int dma_set_mask(struct device *dev, u64 mask) { return -EIO; -- cgit v1.2.3 From 0d06132fc84bd91379300768c75a19474e06d0c5 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 8 Jul 2022 10:51:04 -0600 Subject: PCI/P2PDMA: Remove pci_p2pdma_[un]map_sg() This interface is superseded by support in dma_map_sg() which now supports heterogeneous scatterlists. There are no longer any users, so remove it. Signed-off-by: Logan Gunthorpe Acked-by: Bjorn Helgaas Reviewed-by: Jason Gunthorpe Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig --- include/linux/pci-p2pdma.h | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h index 8318a97c9c61..2c07aa6b7665 100644 --- a/include/linux/pci-p2pdma.h +++ b/include/linux/pci-p2pdma.h @@ -30,10 +30,6 @@ struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev, unsigned int *nents, u32 length); void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl); void pci_p2pmem_publish(struct pci_dev *pdev, bool publish); -int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs); -void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs); int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev, bool *use_p2pdma); ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev, @@ -83,17 +79,6 @@ static inline void pci_p2pmem_free_sgl(struct pci_dev *pdev, static inline void pci_p2pmem_publish(struct pci_dev *pdev, bool publish) { } -static inline int pci_p2pdma_map_sg_attrs(struct device *dev, - struct scatterlist *sg, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - return 0; -} -static inline void pci_p2pdma_unmap_sg_attrs(struct device *dev, - struct scatterlist *sg, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ -} static inline int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev, bool *use_p2pdma) { @@ -119,16 +104,4 @@ static inline struct pci_dev *pci_p2pmem_find(struct device *client) return pci_p2pmem_find_many(&client, 1); } -static inline int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir) -{ - return pci_p2pdma_map_sg_attrs(dev, sg, nents, dir, 0); -} - -static inline void pci_p2pdma_unmap_sg(struct device *dev, - struct scatterlist *sg, int nents, enum dma_data_direction dir) -{ - pci_p2pdma_unmap_sg_attrs(dev, sg, nents, dir, 0); -} - #endif /* _LINUX_PCI_P2P_H */ -- cgit v1.2.3 From 019e442bb0d5e7f25ada8eb4252e22e62d17a95e Mon Sep 17 00:00:00 2001 From: Axe Yang Date: Tue, 26 Jul 2022 14:28:41 +0800 Subject: mmc: core: Add support for SDIO wakeup interrupt If wakeup-source flag is set in host dts node, parse EAI information from SDIO CCCR interrupt externsion segment for in-band wakeup. If async interrupt is supported by SDIO card then enable it and set enable_async_irq flag in sdio_cccr structure to 1. The parse flow is implemented in sdio_read_cccr(). Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Axe Yang Link: https://lore.kernel.org/r/20220726062842.18846-3-axe.yang@mediatek.com Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 8 +++++++- include/linux/mmc/sdio.h | 5 +++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 156a7b673a28..8a30de08e913 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -219,7 +219,8 @@ struct sdio_cccr { wide_bus:1, high_power:1, high_speed:1, - disable_cd:1; + disable_cd:1, + enable_async_irq:1; }; struct sdio_cis { @@ -343,6 +344,11 @@ static inline bool mmc_large_sector(struct mmc_card *card) return card->ext_csd.data_sector_size == 4096; } +static inline int mmc_card_enable_async_irq(struct mmc_card *card) +{ + return card->cccr.enable_async_irq; +} + bool mmc_card_is_blockaddr(struct mmc_card *card); #define mmc_card_mmc(c) ((c)->type == MMC_TYPE_MMC) diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h index 2a05d1ac4f0e..1ef400f28642 100644 --- a/include/linux/mmc/sdio.h +++ b/include/linux/mmc/sdio.h @@ -159,6 +159,11 @@ #define SDIO_DTSx_SET_TYPE_A (1 << SDIO_DRIVE_DTSx_SHIFT) #define SDIO_DTSx_SET_TYPE_C (2 << SDIO_DRIVE_DTSx_SHIFT) #define SDIO_DTSx_SET_TYPE_D (3 << SDIO_DRIVE_DTSx_SHIFT) + +#define SDIO_CCCR_INTERRUPT_EXT 0x16 +#define SDIO_INTERRUPT_EXT_SAI (1 << 0) +#define SDIO_INTERRUPT_EXT_EAI (1 << 1) + /* * Function Basic Registers (FBR) */ -- cgit v1.2.3 From 46126db9c86110e5fc1e369b9bb89735ddefdae4 Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Mon, 18 Jul 2022 14:18:10 +0200 Subject: flow_dissector: Add PPPoE dissectors Allow to dissect PPPoE specific fields which are: - session ID (16 bits) - ppp protocol (16 bits) - type (16 bits) - this is PPPoE ethertype, for now only ETH_P_PPP_SES is supported, possible ETH_P_PPP_DISC in the future The goal is to make the following TC command possible: # tc filter add dev ens6f0 ingress prio 1 protocol ppp_ses \ flower \ pppoe_sid 12 \ ppp_proto ip \ action drop Note that only PPPoE Session is supported. Signed-off-by: Wojciech Drewek Acked-by: Guillaume Nault Signed-off-by: Tony Nguyen --- include/linux/ppp_defs.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ppp_defs.h b/include/linux/ppp_defs.h index 9d2b388fae1a..b7e57fdbd413 100644 --- a/include/linux/ppp_defs.h +++ b/include/linux/ppp_defs.h @@ -11,4 +11,18 @@ #include #define PPP_FCS(fcs, c) crc_ccitt_byte(fcs, c) + +/** + * ppp_proto_is_valid - checks if PPP protocol is valid + * @proto: PPP protocol + * + * Assumes proto is not compressed. + * Protocol is valid if the value is odd and the least significant bit of the + * most significant octet is 0 (see RFC 1661, section 2). + */ +static inline bool ppp_proto_is_valid(u16 proto) +{ + return !!((proto & 0x0101) == 0x0001); +} + #endif /* _PPP_DEFS_H_ */ -- cgit v1.2.3 From 04ad63f086d1a9649b8b082748cbc7a570ade461 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 11 Jan 2022 08:06:40 -0800 Subject: cxl/region: Introduce cxl_pmem_region objects The LIBNVDIMM subsystem is a platform agnostic representation of system NVDIMM / persistent memory resources. To date, the CXL subsystem's interaction with LIBNVDIMM has been to register an nvdimm-bridge device and cxl_nvdimm objects to proxy CXL capabilities into existing LIBNVDIMM subsystem mechanics. With regions the approach is the same. Create a new cxl_pmem_region object to proxy CXL region details into a LIBNVDIMM definition. With this enabling LIBNVDIMM can partition CXL persistent memory regions with legacy namespace labels. A follow-on patch will add CXL region label and CXL namespace label support to persist region configurations across driver reload / system-reset events. Co-developed-by: Ben Widawsky Signed-off-by: Ben Widawsky Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165784340111.1758207.3036498385188290968.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 0d61e07b6827..c74acfa1a3fe 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -59,6 +59,9 @@ enum { /* Platform provides asynchronous flush mechanism */ ND_REGION_ASYNC = 3, + /* Region was created by CXL subsystem */ + ND_REGION_CXL = 4, + /* mark newly adjusted resources as requiring a label update */ DPA_RESOURCE_ADJUSTED = 1 << 0, }; @@ -122,6 +125,7 @@ struct nd_region_desc { int numa_node; int target_node; unsigned long flags; + int memregion; struct device_node *of_node; int (*flush)(struct nd_region *nd_region, struct bio *bio); }; @@ -259,6 +263,7 @@ static inline struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, cmd_mask, num_flush, flush_wpq, NULL, NULL, NULL); } void nvdimm_delete(struct nvdimm *nvdimm); +void nvdimm_region_delete(struct nd_region *nd_region); const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); -- cgit v1.2.3 From a11821495fd4d9b5c97945db929e02c473b7a5d9 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 24 Jul 2022 20:46:28 +0200 Subject: i2c: extend documentation about retvals of master_xfer functions It was stated how the error codes should be. It was not stated what the regular case should return. Add this. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index fbda5ada2afc..8eab5017bff3 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -537,7 +537,8 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info, * * The return codes from the ``master_xfer{_atomic}`` fields should indicate the * type of error code that occurred during the transfer, as documented in the - * Kernel Documentation file Documentation/i2c/fault-codes.rst. + * Kernel Documentation file Documentation/i2c/fault-codes.rst. Otherwise, the + * number of messages executed should be returned. */ struct i2c_algorithm { /* -- cgit v1.2.3 From 04b9407197789c81fffac52921e703cb47967d6a Mon Sep 17 00:00:00 2001 From: Daniil Lunev Date: Wed, 27 Jul 2022 16:44:24 +1000 Subject: vfs: function to prevent re-use of block-device-based superblocks The function is to be called from filesystem-specific code to mark a superblock to be ignored by superblock test and thus never re-used. The function also unregisters bdi if the bdi is per-superblock to avoid collision if a new superblock is created to represent the filesystem. generic_shutdown_super() skips unregistering bdi for a retired superlock as it assumes retire function has already done it. This patch adds the functionality only for the block-device-based supers, since the primary use case of the feature is to gracefully handle force unmount of external devices, mounted with FUSE. This can be further extended to cover all superblocks, if the need arises. Signed-off-by: Daniil Lunev Reviewed-by: Christoph Hellwig Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ad5e3520fae..246120ee0573 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1412,6 +1412,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */ #define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */ #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */ +#define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */ /* Possible states of 'frozen' field */ enum { @@ -2432,6 +2433,7 @@ extern struct dentry *mount_nodev(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); +void retire_super(struct super_block *sb); void generic_shutdown_super(struct super_block *sb); void kill_block_super(struct super_block *sb); void kill_anon_super(struct super_block *sb); -- cgit v1.2.3 From 7c56a8733d0a2a4be2438a7512566e5ce552fccf Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Wed, 13 Jul 2022 17:47:27 +0200 Subject: watchdog: export lockup_detector_reconfigure In some circumstances it may be interesting to reconfigure the watchdog from inside the kernel. On PowerPC, this may helpful before and after a LPAR migration (LPM) is initiated, because it implies some latencies, watchdog, and especially NMI watchdog is expected to be triggered during this operation. Reconfiguring the watchdog with a factor, would prevent it to happen too frequently during LPM. Rename lockup_detector_reconfigure() as __lockup_detector_reconfigure() and create a new function lockup_detector_reconfigure() calling __lockup_detector_reconfigure() under the protection of watchdog_mutex. Signed-off-by: Laurent Dufour [mpe: Squash in build fix from Laurent, reported by Sachin] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220713154729.80789-3-ldufour@linux.ibm.com --- include/linux/nmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 750c7f395ca9..f700ff2df074 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -122,6 +122,8 @@ int watchdog_nmi_probe(void); int watchdog_nmi_enable(unsigned int cpu); void watchdog_nmi_disable(unsigned int cpu); +void lockup_detector_reconfigure(void); + /** * touch_nmi_watchdog - restart NMI watchdog timeout. * -- cgit v1.2.3 From 9f8267b9b29937d997c3b4cec426252aae6311b5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 27 Jul 2022 13:49:48 +0200 Subject: misc: Mark MICROCODE_MINOR unused This one is unused since 181b6f40e9ea ("x86/microcode: Rip out the OLD_INTERFACE") so comment it out. Link: https://lore.kernel.org/r/20220525161232.14924-1-bp@alien8.de Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220727114948.30123-1-bp@alien8.de Signed-off-by: Greg Kroah-Hartman --- include/linux/miscdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 0676f18093f9..c0fea6ca5076 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -44,7 +44,7 @@ #define AGPGART_MINOR 175 #define TOSH_MINOR_DEV 181 #define HWRNG_MINOR 183 -#define MICROCODE_MINOR 184 +/*#define MICROCODE_MINOR 184 unused */ #define KEYPAD_MINOR 185 #define IRNET_MINOR 187 #define D7S_MINOR 193 -- cgit v1.2.3 From 26c6c2f8a907c9e3a2f24990552a4d77235791e6 Mon Sep 17 00:00:00 2001 From: Weitao Wang Date: Tue, 26 Jul 2022 15:49:18 +0800 Subject: USB: HCD: Fix URB giveback issue in tasklet function Usb core introduce the mechanism of giveback of URB in tasklet context to reduce hardware interrupt handling time. On some test situation(such as FIO with 4KB block size), when tasklet callback function called to giveback URB, interrupt handler add URB node to the bh->head list also. If check bh->head list again after finish all URB giveback of local_list, then it may introduce a "dynamic balance" between giveback URB and add URB to bh->head list. This tasklet callback function may not exit for a long time, which will cause other tasklet function calls to be delayed. Some real-time applications(such as KB and Mouse) will see noticeable lag. In order to prevent the tasklet function from occupying the cpu for a long time at a time, new URBS will not be added to the local_list even though the bh->head list is not empty. But also need to ensure the left URB giveback to be processed in time, so add a member high_prio for structure giveback_urb_bh to prioritize tasklet and schelule this tasklet again if bh->head list is not empty. At the same time, we are able to prioritize tasklet through structure member high_prio. So, replace the local high_prio_bh variable with this structure member in usb_hcd_giveback_urb. Fixes: 94dfd7edfd5c ("USB: HCD: support giveback of URB in tasklet context") Cc: stable Reviewed-by: Alan Stern Signed-off-by: Weitao Wang Link: https://lore.kernel.org/r/20220726074918.5114-1-WeitaoWang-oc@zhaoxin.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 2c1fc9212cf2..98d1921f02b1 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -66,6 +66,7 @@ struct giveback_urb_bh { bool running; + bool high_prio; spinlock_t lock; struct list_head head; struct tasklet_struct bh; -- cgit v1.2.3 From 6eabfc018e8d1033e7fc1efce30a872e2dccb537 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 26 Jul 2022 10:38:21 -0700 Subject: regulator: core: Allow specifying an initial load w/ the bulk API There are a number of drivers that follow a pattern that looks like this: 1. Use the regulator bulk API to get a bunch of regulators. 2. Set the load on each of the regulators to use whenever the regulators are enabled. Let's make this easier by just allowing the drivers to pass the load in. As part of this change we need to move the error printing in regulator_bulk_get() around; let's switch to the new dev_err_probe() to simplify it. Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20220726103631.v2.4.Ie85f68215ada39f502a96dcb8a1f3ad977e3f68a@changeid Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index bbf6590a6dec..5779f4466e62 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -171,10 +171,13 @@ struct regulator; /** * struct regulator_bulk_data - Data used for bulk regulator operations. * - * @supply: The name of the supply. Initialised by the user before - * using the bulk regulator APIs. - * @consumer: The regulator consumer for the supply. This will be managed - * by the bulk API. + * @supply: The name of the supply. Initialised by the user before + * using the bulk regulator APIs. + * @init_load_uA: After getting the regulator, regulator_set_load() will be + * called with this load. Initialised by the user before + * using the bulk regulator APIs. + * @consumer: The regulator consumer for the supply. This will be managed + * by the bulk API. * * The regulator APIs provide a series of regulator_bulk_() API calls as * a convenience to consumers which require multiple supplies. This @@ -182,6 +185,7 @@ struct regulator; */ struct regulator_bulk_data { const char *supply; + int init_load_uA; struct regulator *consumer; /* private: Internal use */ -- cgit v1.2.3 From 1de452a0edda26f1483d1d934f692eab13ba669a Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 26 Jul 2022 10:38:23 -0700 Subject: regulator: core: Allow drivers to define their init data as const Drivers tend to want to define the names of their regulators somewhere in their source file as "static const". This means, inevitable, that every driver out there open codes something like this: static const char * const supply_names[] = { "vcc", "vccl", }; static int get_regulators(struct my_data *data) { int i; data->supplies = devm_kzalloc(...) if (!data->supplies) return -ENOMEM; for (i = 0; i < ARRAY_SIZE(supply_names); i++) data->supplies[i].supply = supply_names[i]; return devm_regulator_bulk_get(data->dev, ARRAY_SIZE(supply_names), data->supplies); } Let's make this more convenient by doing providing a helper that does the copy. I have chosen to have the "const" input structure here be the exact same structure as the normal one passed to devm_regulator_bulk_get(). This is slightly inefficent since the input data can't possibly have anything useful for "ret" or consumer and thus we waste 8 bytes per structure. This seems an OK tradeoff for not introducing an extra structure. Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20220726103631.v2.6.I38fc508a73135a5c1b873851f3553ff2a3a625f5@changeid Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 5779f4466e62..bc6cda706d1f 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -244,6 +244,10 @@ int __must_check regulator_bulk_get(struct device *dev, int num_consumers, struct regulator_bulk_data *consumers); int __must_check devm_regulator_bulk_get(struct device *dev, int num_consumers, struct regulator_bulk_data *consumers); +int __must_check devm_regulator_bulk_get_const( + struct device *dev, int num_consumers, + const struct regulator_bulk_data *in_consumers, + struct regulator_bulk_data **out_consumers); int __must_check regulator_bulk_enable(int num_consumers, struct regulator_bulk_data *consumers); int regulator_bulk_disable(int num_consumers, -- cgit v1.2.3 From 14b146b688ad9593f5eee93d51a34d09a47e50b5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 27 Jul 2022 10:30:41 +0100 Subject: io_uring: notification completion optimisation We want to use all optimisations that we have for io_uring requests like completion batching, memory caching and more but for zc notifications. Fortunately, notification perfectly fit the request model so we can overlay them onto struct io_kiocb and use all the infratructure. Most of the fields of struct io_notif natively fits into io_kiocb, so we replace struct io_notif with struct io_kiocb carrying struct io_notif_data in the cmd cache line. Then we adapt io_alloc_notif() to use io_alloc_req()/io_alloc_req_refill(), and kill leftovers of hand coded caching. __io_notif_complete_tw() is converted to use io_uring's tw infra. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9e010125175e80baf51f0ca63bdc7cc6a4a9fa56.1658913593.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 144493cbadb5..f7fab3758cb9 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -249,9 +249,6 @@ struct io_ring_ctx { struct xarray io_bl_xa; struct list_head io_buffers_cache; - /* struct io_notif cache, protected by uring_lock */ - struct list_head notif_list; - struct io_hash_table cancel_table_locked; struct list_head cq_overflow_list; struct io_alloc_cache apoll_cache; @@ -262,10 +259,6 @@ struct io_ring_ctx { struct io_wq_work_list locked_free_list; unsigned int locked_free_nr; - /* struct io_notif cache protected by completion_lock */ - struct list_head notif_list_locked; - unsigned int notif_locked_nr; - const struct cred *sq_creds; /* cred used for __io_sq_thread() */ struct io_sq_data *sq_data; /* if using sq thread polling */ -- cgit v1.2.3 From 0113780870b1597ae49f30abfa4957c239f913d3 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 26 Jul 2022 10:19:11 +0300 Subject: RDMA/mlx5: Rename the mkey cache variables and functions After replacing the MR cache with an Mkey cache, rename the variables and functions to fit the new meaning. Link: https://lore.kernel.org/r/20220726071911.122765-6-michaelgur@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 76d7661e3e63..220597c2f436 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -728,10 +728,10 @@ enum { }; enum { - MR_CACHE_LAST_STD_ENTRY = 20, + MKEY_CACHE_LAST_STD_ENTRY = 20, MLX5_IMR_MTT_CACHE_ENTRY, MLX5_IMR_KSM_CACHE_ENTRY, - MAX_MR_CACHE_ENTRIES + MAX_MKEY_CACHE_ENTRIES }; struct mlx5_profile { @@ -740,7 +740,7 @@ struct mlx5_profile { struct { int size; int limit; - } mr_cache[MAX_MR_CACHE_ENTRIES]; + } mr_cache[MAX_MKEY_CACHE_ENTRIES]; }; struct mlx5_hca_cap { -- cgit v1.2.3 From 103e10c69c611efabccf57d799c4b191d53ee765 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 22 Jul 2022 09:57:46 +0300 Subject: ACPI: property: Add support for parsing buffer property UUID Add support for newly added buffer property UUID, as defined in the DSD guide section 3.3 [1] Link: https://github.com/UEFI/DSD-Guide/blob/main/src/dsd-guide.adoc#buffer-data-extension-uuid # [1] Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 44975c1bbe12..7c46f152106b 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1243,7 +1243,7 @@ static inline bool acpi_dev_has_props(const struct acpi_device *adev) struct acpi_device_properties * acpi_data_add_props(struct acpi_device_data *data, const guid_t *guid, - const union acpi_object *properties); + union acpi_object *properties); int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname, void **valptr); -- cgit v1.2.3 From 72691a269f0baad6d5f4aa7af97c29081b86d70f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 27 Jul 2022 13:02:27 -0400 Subject: SUNRPC: Don't reuse bvec on retransmission of the request If a request is re-encoded and then retransmitted, we need to make sure that we also re-encode the bvec, in case the page lists have changed. Fixes: ff053dbbaffe ("SUNRPC: Move the call to xprt_send_pagedata() out of xprt_sock_sendmsg()") Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 0d51b9f9ea37..b9f59aabee53 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -144,7 +144,8 @@ struct rpc_xprt_ops { unsigned short (*get_srcport)(struct rpc_xprt *xprt); int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(struct rpc_task *task); - int (*prepare_request)(struct rpc_rqst *req); + int (*prepare_request)(struct rpc_rqst *req, + struct xdr_buf *buf); int (*send_request)(struct rpc_rqst *req); void (*wait_for_reply_request)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); -- cgit v1.2.3 From c452d49849d48bd37ae97fc2bc92c6435707c35f Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Mon, 25 Jul 2022 12:24:59 +0300 Subject: mtd: spi-nor: s/addr_width/addr_nbytes Address width was an unfortunate name, as it means the number of IO lines used for the address, whereas in the code it is used as the number of address bytes. s/addr_width/addr_nbytes throughout the entire SPI NOR framework. Signed-off-by: Tudor Ambarus Reviewed-by: Michael Walle Acked-by: Pratyush Yadav Link: https://lore.kernel.org/r/20220725092505.446315-2-tudor.ambarus@microchip.com --- include/linux/mtd/spi-nor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 1ede4c89805a..42218a1164f6 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -351,7 +351,7 @@ struct spi_nor_flash_parameter; * @bouncebuf_size: size of the bounce buffer * @info: SPI NOR part JEDEC MFR ID and other info * @manufacturer: SPI NOR manufacturer - * @addr_width: number of address bytes + * @addr_nbytes: number of address bytes * @erase_opcode: the opcode for erasing a sector * @read_opcode: the read opcode * @read_dummy: the dummy needed by the read operation @@ -381,7 +381,7 @@ struct spi_nor { size_t bouncebuf_size; const struct flash_info *info; const struct spi_nor_manufacturer *manufacturer; - u8 addr_width; + u8 addr_nbytes; u8 erase_opcode; u8 read_opcode; u8 read_dummy; -- cgit v1.2.3 From e60a7233684aa8bbe9090537720fe6a1e901d823 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 28 Jul 2022 08:10:51 +0200 Subject: Documentation: serial: move uart_ops documentation to the struct While it's a lot of text, it always helps to keep it up to date when it's by the source. (And not in a separate file.) The documentation tooling also makes sure that all members of the structure are documented. (If not, it complains loudly.) Finally, there needs to be no comments inlined in the structure, so they are dropped as they are superfluous now. The compilation time of this header (tested with serial_core.c) didn't change in my testing at all. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20220728061056.20799-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 345 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 329 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index a6fa7c40c330..35d2f9e8027d 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -31,9 +31,336 @@ struct serial_struct; struct device; struct gpio_desc; -/* +/** + * struct uart_ops -- interface between serial_core and the driver + * * This structure describes all the operations that can be done on the - * physical hardware. See Documentation/driver-api/serial/driver.rst for details. + * physical hardware. + * + * @tx_empty: ``unsigned int ()(struct uart_port *port)`` + * + * This function tests whether the transmitter fifo and shifter for the + * @port is empty. If it is empty, this function should return + * %TIOCSER_TEMT, otherwise return 0. If the port does not support this + * operation, then it should return %TIOCSER_TEMT. + * + * Locking: none. + * Interrupts: caller dependent. + * This call must not sleep + * + * @set_mctrl: ``void ()(struct uart_port *port, unsigned int mctrl)`` + * + * This function sets the modem control lines for @port to the state + * described by @mctrl. The relevant bits of @mctrl are: + * + * - %TIOCM_RTS RTS signal. + * - %TIOCM_DTR DTR signal. + * - %TIOCM_OUT1 OUT1 signal. + * - %TIOCM_OUT2 OUT2 signal. + * - %TIOCM_LOOP Set the port into loopback mode. + * + * If the appropriate bit is set, the signal should be driven + * active. If the bit is clear, the signal should be driven + * inactive. + * + * Locking: @port->lock taken. + * Interrupts: locally disabled. + * This call must not sleep + * + * @get_mctrl: ``unsigned int ()(struct uart_port *port)`` + * + * Returns the current state of modem control inputs of @port. The state + * of the outputs should not be returned, since the core keeps track of + * their state. The state information should include: + * + * - %TIOCM_CAR state of DCD signal + * - %TIOCM_CTS state of CTS signal + * - %TIOCM_DSR state of DSR signal + * - %TIOCM_RI state of RI signal + * + * The bit is set if the signal is currently driven active. If + * the port does not support CTS, DCD or DSR, the driver should + * indicate that the signal is permanently active. If RI is + * not available, the signal should not be indicated as active. + * + * Locking: @port->lock taken. + * Interrupts: locally disabled. + * This call must not sleep + * + * @stop_tx: ``void ()(struct uart_port *port)`` + * + * Stop transmitting characters. This might be due to the CTS line + * becoming inactive or the tty layer indicating we want to stop + * transmission due to an %XOFF character. + * + * The driver should stop transmitting characters as soon as possible. + * + * Locking: @port->lock taken. + * Interrupts: locally disabled. + * This call must not sleep + * + * @start_tx: ``void ()(struct uart_port *port)`` + * + * Start transmitting characters. + * + * Locking: @port->lock taken. + * Interrupts: locally disabled. + * This call must not sleep + * + * @throttle: ``void ()(struct uart_port *port)`` + * + * Notify the serial driver that input buffers for the line discipline are + * close to full, and it should somehow signal that no more characters + * should be sent to the serial port. + * This will be called only if hardware assisted flow control is enabled. + * + * Locking: serialized with @unthrottle() and termios modification by the + * tty layer. + * + * @unthrottle: ``void ()(struct uart_port *port)`` + * + * Notify the serial driver that characters can now be sent to the serial + * port without fear of overrunning the input buffers of the line + * disciplines. + * + * This will be called only if hardware assisted flow control is enabled. + * + * Locking: serialized with @throttle() and termios modification by the + * tty layer. + * + * @send_xchar: ``void ()(struct uart_port *port, char ch)`` + * + * Transmit a high priority character, even if the port is stopped. This + * is used to implement XON/XOFF flow control and tcflow(). If the serial + * driver does not implement this function, the tty core will append the + * character to the circular buffer and then call start_tx() / stop_tx() + * to flush the data out. + * + * Do not transmit if @ch == '\0' (%__DISABLED_CHAR). + * + * Locking: none. + * Interrupts: caller dependent. + * + * @stop_rx: ``void ()(struct uart_port *port)`` + * + * Stop receiving characters; the @port is in the process of being closed. + * + * Locking: @port->lock taken. + * Interrupts: locally disabled. + * This call must not sleep + * + * @enable_ms: ``void ()(struct uart_port *port)`` + * + * Enable the modem status interrupts. + * + * This method may be called multiple times. Modem status interrupts + * should be disabled when the @shutdown() method is called. + * + * Locking: @port->lock taken. + * Interrupts: locally disabled. + * This call must not sleep + * + * @break_ctl: ``void ()(struct uart_port *port, int ctl)`` + * + * Control the transmission of a break signal. If @ctl is nonzero, the + * break signal should be transmitted. The signal should be terminated + * when another call is made with a zero @ctl. + * + * Locking: caller holds tty_port->mutex + * + * @startup: ``int ()(struct uart_port *port)`` + * + * Grab any interrupt resources and initialise any low level driver state. + * Enable the port for reception. It should not activate RTS nor DTR; + * this will be done via a separate call to @set_mctrl(). + * + * This method will only be called when the port is initially opened. + * + * Locking: port_sem taken. + * Interrupts: globally disabled. + * + * @shutdown: ``void ()(struct uart_port *port)`` + * + * Disable the @port, disable any break condition that may be in effect, + * and free any interrupt resources. It should not disable RTS nor DTR; + * this will have already been done via a separate call to @set_mctrl(). + * + * Drivers must not access @port->state once this call has completed. + * + * This method will only be called when there are no more users of this + * @port. + * + * Locking: port_sem taken. + * Interrupts: caller dependent. + * + * @flush_buffer: ``void ()(struct uart_port *port)`` + * + * Flush any write buffers, reset any DMA state and stop any ongoing DMA + * transfers. + * + * This will be called whenever the @port->state->xmit circular buffer is + * cleared. + * + * Locking: @port->lock taken. + * Interrupts: locally disabled. + * This call must not sleep + * + * @set_termios: ``void ()(struct uart_port *port, struct ktermios *new, + * struct ktermios *old)`` + * + * Change the @port parameters, including word length, parity, stop bits. + * Update @port->read_status_mask and @port->ignore_status_mask to + * indicate the types of events we are interested in receiving. Relevant + * ktermios::c_cflag bits are: + * + * - %CSIZE - word size + * - %CSTOPB - 2 stop bits + * - %PARENB - parity enable + * - %PARODD - odd parity (when %PARENB is in force) + * - %ADDRB - address bit (changed through uart_port::rs485_config()). + * - %CREAD - enable reception of characters (if not set, still receive + * characters from the port, but throw them away). + * - %CRTSCTS - if set, enable CTS status change reporting. + * - %CLOCAL - if not set, enable modem status change reporting. + * + * Relevant ktermios::c_iflag bits are: + * + * - %INPCK - enable frame and parity error events to be passed to the TTY + * layer. + * - %BRKINT / %PARMRK - both of these enable break events to be passed to + * the TTY layer. + * - %IGNPAR - ignore parity and framing errors. + * - %IGNBRK - ignore break errors. If %IGNPAR is also set, ignore overrun + * errors as well. + * + * The interaction of the ktermios::c_iflag bits is as follows (parity + * error given as an example): + * + * ============ ======= ======= ========================================= + * Parity error INPCK IGNPAR + * ============ ======= ======= ========================================= + * n/a 0 n/a character received, marked as %TTY_NORMAL + * None 1 n/a character received, marked as %TTY_NORMAL + * Yes 1 0 character received, marked as %TTY_PARITY + * Yes 1 1 character discarded + * ============ ======= ======= ========================================= + * + * Other flags may be used (eg, xon/xoff characters) if your hardware + * supports hardware "soft" flow control. + * + * Locking: caller holds tty_port->mutex + * Interrupts: caller dependent. + * This call must not sleep + * + * @set_ldisc: ``void ()(struct uart_port *port, struct ktermios *termios)`` + * + * Notifier for discipline change. See + * Documentation/driver-api/tty/tty_ldisc.rst. + * + * Locking: caller holds tty_port->mutex + * + * @pm: ``void ()(struct uart_port *port, unsigned int state, + * unsigned int oldstate)`` + * + * Perform any power management related activities on the specified @port. + * @state indicates the new state (defined by enum uart_pm_state), + * @oldstate indicates the previous state. + * + * This function should not be used to grab any resources. + * + * This will be called when the @port is initially opened and finally + * closed, except when the @port is also the system console. This will + * occur even if %CONFIG_PM is not set. + * + * Locking: none. + * Interrupts: caller dependent. + * + * @type: ``const char *()(struct uart_port *port)`` + * + * Return a pointer to a string constant describing the specified @port, + * or return %NULL, in which case the string 'unknown' is substituted. + * + * Locking: none. + * Interrupts: caller dependent. + * + * @release_port: ``void ()(struct uart_port *port)`` + * + * Release any memory and IO region resources currently in use by the + * @port. + * + * Locking: none. + * Interrupts: caller dependent. + * + * @request_port: ``int ()(struct uart_port *port)`` + * + * Request any memory and IO region resources required by the port. If any + * fail, no resources should be registered when this function returns, and + * it should return -%EBUSY on failure. + * + * Locking: none. + * Interrupts: caller dependent. + * + * @config_port: ``void ()(struct uart_port *port, int type)`` + * + * Perform any autoconfiguration steps required for the @port. @type + * contains a bit mask of the required configuration. %UART_CONFIG_TYPE + * indicates that the port requires detection and identification. + * @port->type should be set to the type found, or %PORT_UNKNOWN if no + * port was detected. + * + * %UART_CONFIG_IRQ indicates autoconfiguration of the interrupt signal, + * which should be probed using standard kernel autoprobing techniques. + * This is not necessary on platforms where ports have interrupts + * internally hard wired (eg, system on a chip implementations). + * + * Locking: none. + * Interrupts: caller dependent. + * + * @verify_port: ``int ()(struct uart_port *port, + * struct serial_struct *serinfo)`` + * + * Verify the new serial port information contained within @serinfo is + * suitable for this port type. + * + * Locking: none. + * Interrupts: caller dependent. + * + * @ioctl: ``int ()(struct uart_port *port, unsigned int cmd, + * unsigned long arg)`` + * + * Perform any port specific IOCTLs. IOCTL commands must be defined using + * the standard numbering system found in . + * + * Locking: none. + * Interrupts: caller dependent. + * + * @poll_init: ``int ()(struct uart_port *port)`` + * + * Called by kgdb to perform the minimal hardware initialization needed to + * support @poll_put_char() and @poll_get_char(). Unlike @startup(), this + * should not request interrupts. + * + * Locking: %tty_mutex and tty_port->mutex taken. + * Interrupts: n/a. + * + * @poll_put_char: ``void ()(struct uart_port *port, unsigned char ch)`` + * + * Called by kgdb to write a single character @ch directly to the serial + * @port. It can and should block until there is space in the TX FIFO. + * + * Locking: none. + * Interrupts: caller dependent. + * This call must not sleep + * + * @poll_get_char: ``int ()(struct uart_port *port)`` + * + * Called by kgdb to read a single character directly from the serial + * port. If data is available, it should be returned; otherwise the + * function should return %NO_POLL_CHAR immediately. + * + * Locking: none. + * Interrupts: caller dependent. + * This call must not sleep */ struct uart_ops { unsigned int (*tx_empty)(struct uart_port *); @@ -56,22 +383,8 @@ struct uart_ops { void (*set_ldisc)(struct uart_port *, struct ktermios *); void (*pm)(struct uart_port *, unsigned int state, unsigned int oldstate); - - /* - * Return a string describing the type of the port - */ const char *(*type)(struct uart_port *); - - /* - * Release IO and memory resources used by the port. - * This includes iounmap if necessary. - */ void (*release_port)(struct uart_port *); - - /* - * Request IO and memory resources used by the port. - * This includes iomapping the port if necessary. - */ int (*request_port)(struct uart_port *); void (*config_port)(struct uart_port *, int); int (*verify_port)(struct uart_port *, struct serial_struct *); -- cgit v1.2.3 From 5f10376b6bc1e2773f56977980ab08c9e4fa91a7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 Jul 2022 14:56:52 -0700 Subject: add missing includes and forward declarations to networking includes under linux/ Similarly to a recent include/net/ cleanup, this patch adds missing includes to networking headers under include/linux. All these problems are currently masked by the existing users including the missing dependency before the broken header. Link: https://lore.kernel.org/all/20220723045755.2676857-1-kuba@kernel.org/ v1 Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20220726215652.158167-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/linux/atm_tcp.h | 2 ++ include/linux/dsa/tag_qca.h | 5 +++++ include/linux/hippidevice.h | 4 ++++ include/linux/if_eql.h | 1 + include/linux/if_hsr.h | 4 ++++ include/linux/if_rmnet.h | 2 ++ include/linux/if_tap.h | 11 ++++++----- include/linux/mdio/mdio-xgene.h | 4 ++++ include/linux/nl802154.h | 2 ++ include/linux/phy_fixed.h | 3 +++ include/linux/ppp-comp.h | 2 +- include/linux/ppp_channel.h | 2 ++ include/linux/ptp_kvm.h | 2 ++ include/linux/ptp_pch.h | 4 ++++ include/linux/seq_file_net.h | 1 + include/linux/sungem_phy.h | 2 ++ include/linux/usb/usbnet.h | 6 ++++++ 17 files changed, 51 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atm_tcp.h b/include/linux/atm_tcp.h index c8ecf6f68fb5..2558439d849b 100644 --- a/include/linux/atm_tcp.h +++ b/include/linux/atm_tcp.h @@ -9,6 +9,8 @@ #include +struct atm_vcc; +struct module; struct atm_tcp_ops { int (*attach)(struct atm_vcc *vcc,int itf); diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h index 4359fb0221cf..50be7cbd93a5 100644 --- a/include/linux/dsa/tag_qca.h +++ b/include/linux/dsa/tag_qca.h @@ -3,6 +3,11 @@ #ifndef __TAG_QCA_H #define __TAG_QCA_H +#include + +struct dsa_switch; +struct sk_buff; + #define QCA_HDR_LEN 2 #define QCA_HDR_VERSION 0x2 diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index 9dc01f7ab5b4..07414c241e65 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -23,6 +23,10 @@ #ifdef __KERNEL__ +struct neigh_parms; +struct net_device; +struct sk_buff; + struct hippi_cb { __u32 ifield; }; diff --git a/include/linux/if_eql.h b/include/linux/if_eql.h index d75601d613cc..07f9b660b741 100644 --- a/include/linux/if_eql.h +++ b/include/linux/if_eql.h @@ -21,6 +21,7 @@ #include #include +#include #include typedef struct slave { diff --git a/include/linux/if_hsr.h b/include/linux/if_hsr.h index 408539d5ea5f..0404f5bf4f30 100644 --- a/include/linux/if_hsr.h +++ b/include/linux/if_hsr.h @@ -2,6 +2,10 @@ #ifndef _LINUX_IF_HSR_H_ #define _LINUX_IF_HSR_H_ +#include + +struct net_device; + /* used to differentiate various protocols */ enum hsr_version { HSR_V0 = 0, diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h index 10e7521ecb6c..839d1e48b85e 100644 --- a/include/linux/if_rmnet.h +++ b/include/linux/if_rmnet.h @@ -5,6 +5,8 @@ #ifndef _LINUX_IF_RMNET_H_ #define _LINUX_IF_RMNET_H_ +#include + struct rmnet_map_header { u8 flags; /* MAP_CMD_FLAG, MAP_PAD_LEN_MASK */ u8 mux_id; diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 915a187cfabd..553552fa635c 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -2,14 +2,18 @@ #ifndef _LINUX_IF_TAP_H_ #define _LINUX_IF_TAP_H_ +#include +#include + +struct file; +struct socket; + #if IS_ENABLED(CONFIG_TAP) struct socket *tap_get_socket(struct file *); struct ptr_ring *tap_get_ptr_ring(struct file *file); #else #include #include -struct file; -struct socket; static inline struct socket *tap_get_socket(struct file *f) { return ERR_PTR(-EINVAL); @@ -20,9 +24,6 @@ static inline struct ptr_ring *tap_get_ptr_ring(struct file *f) } #endif /* CONFIG_TAP */ -#include -#include - /* * Maximum times a tap device can be opened. This can be used to * configure the number of receive queue, e.g. for multiqueue virtio. diff --git a/include/linux/mdio/mdio-xgene.h b/include/linux/mdio/mdio-xgene.h index 8af93ada8b64..9e588965dc83 100644 --- a/include/linux/mdio/mdio-xgene.h +++ b/include/linux/mdio/mdio-xgene.h @@ -8,6 +8,10 @@ #ifndef __MDIO_XGENE_H__ #define __MDIO_XGENE_H__ +#include +#include +#include + #define BLOCK_XG_MDIO_CSR_OFFSET 0x5000 #define BLOCK_DIAG_CSR_OFFSET 0xd000 #define XGENET_CONFIG_REG_ADDR 0x20 diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index b22782225f27..cbe5fd1dd2e7 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -8,6 +8,8 @@ #ifndef NL802154_H #define NL802154_H +#include + #define IEEE802154_NL_NAME "802.15.4 MAC" #define IEEE802154_MCAST_COORD_NAME "coordinator" #define IEEE802154_MCAST_BEACON_NAME "beacon" diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 52bc8e487ef7..1acafd86ab13 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -2,6 +2,8 @@ #ifndef __PHY_FIXED_H #define __PHY_FIXED_H +#include + struct fixed_phy_status { int link; int speed; @@ -12,6 +14,7 @@ struct fixed_phy_status { struct device_node; struct gpio_desc; +struct net_device; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); diff --git a/include/linux/ppp-comp.h b/include/linux/ppp-comp.h index 9d3ffc8f5ea6..fb847e47f148 100644 --- a/include/linux/ppp-comp.h +++ b/include/linux/ppp-comp.h @@ -9,7 +9,7 @@ #include - +struct compstat; struct module; /* diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h index 91f9a928344e..45e6e427ceb8 100644 --- a/include/linux/ppp_channel.h +++ b/include/linux/ppp_channel.h @@ -20,6 +20,8 @@ #include #include +struct net_device_path; +struct net_device_path_ctx; struct ppp_channel; struct ppp_channel_ops { diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h index f960a719f0d5..c2e28deef33a 100644 --- a/include/linux/ptp_kvm.h +++ b/include/linux/ptp_kvm.h @@ -8,6 +8,8 @@ #ifndef _PTP_KVM_H_ #define _PTP_KVM_H_ +#include + struct timespec64; struct clocksource; diff --git a/include/linux/ptp_pch.h b/include/linux/ptp_pch.h index 51818198c292..7ba643b62c15 100644 --- a/include/linux/ptp_pch.h +++ b/include/linux/ptp_pch.h @@ -10,6 +10,10 @@ #ifndef _PTP_PCH_H_ #define _PTP_PCH_H_ +#include + +struct pci_dev; + void pch_ch_control_write(struct pci_dev *pdev, u32 val); u32 pch_ch_event_read(struct pci_dev *pdev); void pch_ch_event_write(struct pci_dev *pdev, u32 val); diff --git a/include/linux/seq_file_net.h b/include/linux/seq_file_net.h index b97912fdbae7..79638395bc32 100644 --- a/include/linux/seq_file_net.h +++ b/include/linux/seq_file_net.h @@ -3,6 +3,7 @@ #define __SEQ_FILE_NET_H__ #include +#include struct net; extern struct net init_net; diff --git a/include/linux/sungem_phy.h b/include/linux/sungem_phy.h index 3a11fa41a131..c505f30e8b68 100644 --- a/include/linux/sungem_phy.h +++ b/include/linux/sungem_phy.h @@ -2,6 +2,8 @@ #ifndef __SUNGEM_PHY_H__ #define __SUNGEM_PHY_H__ +#include + struct mii_phy; /* Operations supported by any kind of PHY */ diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 1b4d72d5e891..b42b72391a8d 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -23,6 +23,12 @@ #ifndef __LINUX_USB_USBNET_H #define __LINUX_USB_USBNET_H +#include +#include +#include +#include +#include + /* interface from usbnet core to each USB networking link we handle */ struct usbnet { /* housekeeping */ -- cgit v1.2.3 From 7fb48d25b5ce3bc488dbb019bf1736248181de9a Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 27 Jul 2022 19:16:34 +0900 Subject: can: dev: add generic function can_ethtool_op_get_ts_info_hwts() Add function can_ethtool_op_get_ts_info_hwts(). This function will be used by CAN devices with hardware TX/RX timestamping support to implement ethtool_ops::get_ts_info. This function does not offer support to activate/deactivate hardware timestamps at device level nor support the filter options (which is currently the case for all CAN devices with hardware timestamping support). The fact that hardware timestamp can not be deactivated at hardware level does not impact the userland. As long as the user do not set SO_TIMESTAMPING using a setsockopt() or ioctl(), the kernel will not emit TX timestamps (RX timestamps will still be reproted as it is the case currently). Drivers which need more fine grained control remains free to implement their own function, but we foresee that the generic function introduced here will be sufficient for the majority. Signed-off-by: Vincent Mailhol Link: https://lore.kernel.org/all/20220727101641.198847-8-mailhol.vincent@wanadoo.fr Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index e22dc03c850e..752bd45d8ebf 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -20,6 +20,7 @@ #include #include #include +#include #include /* @@ -162,6 +163,8 @@ struct can_priv *safe_candev_priv(struct net_device *dev); int open_candev(struct net_device *dev); void close_candev(struct net_device *dev); int can_change_mtu(struct net_device *dev, int new_mtu); +int can_ethtool_op_get_ts_info_hwts(struct net_device *dev, + struct ethtool_ts_info *info); int register_candev(struct net_device *dev); void unregister_candev(struct net_device *dev); -- cgit v1.2.3 From 90f942c5a6d775bad1be33ba214755314105da4a Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 27 Jul 2022 19:16:35 +0900 Subject: can: dev: add generic function can_eth_ioctl_hwts() Tools based on libpcap (such as tcpdump) expect the SIOCSHWTSTAMP ioctl call to be supported. This is also specified in the kernel doc [1]. The purpose of this ioctl is to toggle the hardware timestamps. Currently, CAN devices which support hardware timestamping have those always activated. can_eth_ioctl_hwts() is a dumb function that will always succeed when requested to set tx_type to HWTSTAMP_TX_ON or rx_filter to HWTSTAMP_FILTER_ALL. [1] Kernel doc: Timestamping, section 3.1 "Hardware Timestamping Implementation: Device Drivers" Link: https://docs.kernel.org/networking/timestamping.html#hardware-timestamping-implementation-device-drivers Signed-off-by: Vincent Mailhol Link: https://lore.kernel.org/all/20220727101641.198847-9-mailhol.vincent@wanadoo.fr Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 752bd45d8ebf..c3e50e537e39 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -163,6 +163,7 @@ struct can_priv *safe_candev_priv(struct net_device *dev); int open_candev(struct net_device *dev); void close_candev(struct net_device *dev); int can_change_mtu(struct net_device *dev, int new_mtu); +int can_eth_ioctl_hwts(struct net_device *netdev, struct ifreq *ifr, int cmd); int can_ethtool_op_get_ts_info_hwts(struct net_device *dev, struct ethtool_ts_info *info); -- cgit v1.2.3 From cceeeb6a6d02e7b9a74ddd27a3225013b34174aa Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 27 Jun 2022 11:50:51 +0200 Subject: wait: Fix __wait_event_hrtimeout for RT/DL tasks Changes to hrtimer mode (potentially made by __hrtimer_init_sleeper on PREEMPT_RT) are not visible to hrtimer_start_range_ns, thus not accounted for by hrtimer_start_expires call paths. In particular, __wait_event_hrtimeout suffers from this problem as we have, for example: fs/aio.c::read_events wait_event_interruptible_hrtimeout __wait_event_hrtimeout hrtimer_init_sleeper_on_stack <- this might "mode |= HRTIMER_MODE_HARD" on RT if task runs at RT/DL priority hrtimer_start_range_ns WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard) fires since the latter doesn't see the change of mode done by init_sleeper Fix it by making __wait_event_hrtimeout call hrtimer_sleeper_start_expires, which is aware of the special RT/DL case, instead of hrtimer_start_range_ns. Reported-by: Bruno Goncalves Signed-off-by: Juri Lelli Signed-off-by: Thomas Gleixner Reviewed-by: Daniel Bristot de Oliveira Reviewed-by: Valentin Schneider Link: https://lore.kernel.org/r/20220627095051.42470-1-juri.lelli@redhat.com --- include/linux/wait.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 851e07da2583..58cfbf81447c 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -544,10 +544,11 @@ do { \ \ hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \ HRTIMER_MODE_REL); \ - if ((timeout) != KTIME_MAX) \ - hrtimer_start_range_ns(&__t.timer, timeout, \ - current->timer_slack_ns, \ - HRTIMER_MODE_REL); \ + if ((timeout) != KTIME_MAX) { \ + hrtimer_set_expires_range_ns(&__t.timer, timeout, \ + current->timer_slack_ns); \ + hrtimer_sleeper_start_expires(&__t, HRTIMER_MODE_REL); \ + } \ \ __ret = ___wait_event(wq_head, condition, state, 0, 0, \ if (!__t.task) { \ -- cgit v1.2.3 From 4102c4042a33c682021683ec26c2dca3fd9d7cc2 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 29 Jun 2022 17:10:12 +0200 Subject: thermal/core: Remove DROP_FULL and RAISE_FULL The trends DROP_FULL and RAISE_FULL are not used and were never used in the past AFAICT. Remove these conditions as they seems to not be handled anywhere. Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20220629151012.3115773-2-daniel.lezcano@linaro.org Signed-off-by: Daniel Lezcano --- include/linux/thermal.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 365733b428d8..231bac2768fb 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -40,8 +40,6 @@ enum thermal_trend { THERMAL_TREND_STABLE, /* temperature is stable */ THERMAL_TREND_RAISING, /* temperature is raising */ THERMAL_TREND_DROPPING, /* temperature is dropping */ - THERMAL_TREND_RAISE_FULL, /* apply highest cooling action */ - THERMAL_TREND_DROP_FULL, /* apply lowest cooling action */ }; /* Thermal notification reason */ -- cgit v1.2.3 From 646274ddaf756dc549fc98cc581fc1ce4fef517a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 22 Jul 2022 22:00:01 +0200 Subject: thermal/of: Move thermal_trip structure to thermal.h The structure thermal_trip is now generic and will be usable by the different sensor drivers in place of their own structure. Move its definition to thermal.h to make it accessible. Cc: Alexandre Bailon Cc: Kevin Hilman Signed-off-by: Daniel Lezcano Reviewed-by: Lukasz Luba Link: https://lore.kernel.org/r/20220722200007.1839356-5-daniel.lezcano@linexp.org Signed-off-by: Daniel Lezcano --- include/linux/thermal.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 231bac2768fb..7e66970f0464 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -78,6 +78,18 @@ struct thermal_zone_device_ops { void (*critical)(struct thermal_zone_device *); }; +/** + * struct thermal_trip - representation of a point in temperature domain + * @temperature: temperature value in miliCelsius + * @hysteresis: relative hysteresis in miliCelsius + * @type: trip point type + */ +struct thermal_trip { + int temperature; + int hysteresis; + enum thermal_trip_type type; +}; + struct thermal_cooling_device_ops { int (*get_max_state) (struct thermal_cooling_device *, unsigned long *); int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *); -- cgit v1.2.3 From e5bfcd30f88fdb0ce830229e7ccdeddcb7a59b04 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 22 Jul 2022 22:00:04 +0200 Subject: thermal/core: Rename 'trips' to 'num_trips' In order to use thermal trips defined in the thermal structure, rename the 'trips' field to 'num_trips' to have the 'trips' field containing the thermal trip points. Cc: Alexandre Bailon Cc: Kevin Hilman Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20220722200007.1839356-8-daniel.lezcano@linexp.org Signed-off-by: Daniel Lezcano --- include/linux/thermal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 7e66970f0464..ae579a70cc1a 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -123,7 +123,7 @@ struct thermal_cooling_device { * @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis * @mode: current mode of this thermal zone * @devdata: private pointer for device private data - * @trips: number of trip points the thermal zone supports + * @num_trips: number of trip points the thermal zone supports * @trips_disabled; bitmap for disabled trips * @passive_delay_jiffies: number of jiffies to wait between polls when * performing passive cooling. @@ -163,7 +163,7 @@ struct thermal_zone_device { struct thermal_attr *trip_hyst_attrs; enum thermal_device_mode mode; void *devdata; - int trips; + int num_trips; unsigned long trips_disabled; /* bitmap for disabled trips */ unsigned long passive_delay_jiffies; unsigned long polling_delay_jiffies; -- cgit v1.2.3 From fae11de507f0e420ba3733729317559e7cea6274 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 22 Jul 2022 22:00:05 +0200 Subject: thermal/core: Add thermal_trip in thermal_zone The thermal trip points are properties of a thermal zone and the different sub systems should be able to save them in the thermal zone structure instead of having their own definition. Give the opportunity to the drivers to create a thermal zone with thermal trips which will be accessible directly from the thermal core framework. As we added the thermal trip points structure in the thermal zone, let's extend the thermal zone register function to have the thermal trip structures as a parameter and store it in the 'trips' field of the thermal zone structure. The thermal zone contains the trip point, we can store them directly when registering the thermal zone. That will allow another step forward to remove the duplicate thermal zone structure we find in the thermal_of code. Cc: Alexandre Bailon Cc: Kevin Hilman Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20220722200007.1839356-9-daniel.lezcano@linexp.org Signed-off-by: Daniel Lezcano --- include/linux/thermal.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index ae579a70cc1a..1386c713885d 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -123,6 +123,7 @@ struct thermal_cooling_device { * @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis * @mode: current mode of this thermal zone * @devdata: private pointer for device private data + * @trips: an array of struct thermal_trip * @num_trips: number of trip points the thermal zone supports * @trips_disabled; bitmap for disabled trips * @passive_delay_jiffies: number of jiffies to wait between polls when @@ -163,6 +164,7 @@ struct thermal_zone_device { struct thermal_attr *trip_hyst_attrs; enum thermal_device_mode mode; void *devdata; + struct thermal_trip *trips; int num_trips; unsigned long trips_disabled; /* bitmap for disabled trips */ unsigned long passive_delay_jiffies; @@ -376,8 +378,14 @@ void devm_thermal_zone_of_sensor_unregister(struct device *dev, struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, void *, struct thermal_zone_device_ops *, struct thermal_zone_params *, int, int); + void thermal_zone_device_unregister(struct thermal_zone_device *); +struct thermal_zone_device * +thermal_zone_device_register_with_trips(const char *, struct thermal_trip *, int, int, + void *, struct thermal_zone_device_ops *, + struct thermal_zone_params *, int, int); + int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *, unsigned long, unsigned long, -- cgit v1.2.3 From 6dd71251b9aeedd540fd7003bc5f73d59dd6dcb2 Mon Sep 17 00:00:00 2001 From: Xin Gao Date: Fri, 22 Jul 2022 10:23:37 +0800 Subject: platform/x86: pmc_atom: Fix comment typo The double `of' is duplicated in line 50, remove one. Signed-off-by: Xin Gao Link: https://lore.kernel.org/r/20220722022337.15903-1-gaoxin@cdjrlc.com Signed-off-by: Hans de Goede --- include/linux/platform_data/x86/pmc_atom.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h index 6807839c718b..3edfb6d4e67a 100644 --- a/include/linux/platform_data/x86/pmc_atom.h +++ b/include/linux/platform_data/x86/pmc_atom.h @@ -47,7 +47,7 @@ #define PMC_S0I2_TMR 0x88 #define PMC_S0I3_TMR 0x8C #define PMC_S0_TMR 0x90 -/* Sleep state counter is in units of of 32us */ +/* Sleep state counter is in units of 32us */ #define PMC_TMR_SHIFT 5 /* Power status of power islands */ -- cgit v1.2.3 From 9dd1cd3220eca534f2d47afad7ce85f4c40118d8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 20 Jul 2022 13:58:04 -0400 Subject: dm: fix dm-raid crash if md_handle_request() splits bio Commit ca522482e3eaf ("dm: pass NULL bdev to bio_alloc_clone") introduced the optimization to _not_ perform bio_associate_blkg()'s relatively costly work when DM core clones its bio. But in doing so it exposed the possibility for DM's cloned bio to alter DM target behavior (e.g. crash) if a target were to issue IO without first calling bio_set_dev(). The DM raid target can trigger an MD crash due to its need to split the DM bio that is passed to md_handle_request(). The split will recurse to submit_bio_noacct() using a bio with an uninitialized ->bi_blkg. This NULL bio->bi_blkg causes blk_throtl_bio() to dereference a NULL blkg_to_tg(bio->bi_blkg). Fix this in DM core by adding a new 'needs_bio_set_dev' target flag that will make alloc_tio() call bio_set_dev() on behalf of the target. dm-raid is the only target that requires this flag. bio_set_dev() initializes the DM cloned bio's ->bi_blkg, using bio_associate_blkg, before passing the bio to md_handle_request(). Long-term fix would be to audit and refactor MD code to rely on DM to split its bio, using dm_accept_partial_bio(), but there are MD raid personalities (e.g. raid1 and raid10) whose implementation are tightly coupled to handling the bio splitting inline. Fixes: ca522482e3eaf ("dm: pass NULL bdev to bio_alloc_clone") Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 920085dd7f3b..04c6acf7faaa 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -373,6 +373,12 @@ struct dm_target { * after returning DM_MAPIO_SUBMITTED from its map function. */ bool accounts_remapped_io:1; + + /* + * Set if the target will submit the DM bio without first calling + * bio_set_dev(). NOTE: ideally a target should _not_ need this. + */ + bool needs_bio_set_dev:1; }; void *dm_per_bio_data(struct bio *bio, size_t data_size); -- cgit v1.2.3 From 0fcb100d50835d6823723ef0898cd565b3796e0a Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Fri, 22 Jul 2022 09:38:21 +0000 Subject: dm bufio: Add flags argument to dm_bufio_client_create Add a flags argument to dm_bufio_client_create and update all the callers. This is in preparation to add the DM_BUFIO_NO_SLEEP flag. Signed-off-by: Nathan Huckleberry Signed-off-by: Mike Snitzer --- include/linux/dm-bufio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 90bd558a17f5..e21480715255 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -24,7 +24,8 @@ struct dm_bufio_client * dm_bufio_client_create(struct block_device *bdev, unsigned block_size, unsigned reserved_buffers, unsigned aux_size, void (*alloc_callback)(struct dm_buffer *), - void (*write_callback)(struct dm_buffer *)); + void (*write_callback)(struct dm_buffer *), + unsigned int flags); /* * Release a buffered IO cache. -- cgit v1.2.3 From b32d45824aa7e07a0c3257a16e3a2a691b11b39a Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Fri, 22 Jul 2022 09:38:22 +0000 Subject: dm bufio: Add DM_BUFIO_CLIENT_NO_SLEEP flag Add an optional flag that ensures dm_bufio_client does not sleep (primary focus is to service dm_bufio_get without sleeping). This allows the dm-bufio cache to be queried from interrupt context. To ensure that dm-bufio does not sleep, dm-bufio must use a spinlock instead of a mutex. Additionally, to avoid deadlocks, special care must be taken so that dm-bufio does not sleep while holding the spinlock. But again: the scope of this no_sleep is initially confined to dm_bufio_get, so __alloc_buffer_wait_no_callback is _not_ changed to avoid sleeping because __bufio_new avoids allocation for NF_GET. Signed-off-by: Nathan Huckleberry Signed-off-by: Mike Snitzer --- include/linux/dm-bufio.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index e21480715255..15d9e15ca830 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -17,6 +17,11 @@ struct dm_bufio_client; struct dm_buffer; +/* + * Flags for dm_bufio_client_create + */ +#define DM_BUFIO_CLIENT_NO_SLEEP 0x1 + /* * Create a buffered IO cache on a given device */ -- cgit v1.2.3 From 71610ab1d017e131a9888ef8acd035284fb0e1dd Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 20 Jul 2022 15:21:51 +0800 Subject: LoongArch: Remove clock setting during cpu hotplug stage On physical machine we can save power by disabling clock of hot removed cpu. However as different platforms require different methods to configure clocks, the code is platform-specific, and probably belongs to firmware/pmu or cpu regulator, rather than generic arch/loongarch code. Also, there is no such register on QEMU virt machine since the clock/frequency regulation is not emulated. This patch removes the hard-coded clock register accesses in generic LoongArch cpu hotplug flow. Reviewed-by: WANG Xuerui Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 19f0dbfdd7fe..b66c5f389159 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -130,7 +130,6 @@ enum cpuhp_state { CPUHP_ZCOMP_PREPARE, CPUHP_TIMERS_PREPARE, CPUHP_MIPS_SOC_PREPARE, - CPUHP_LOONGARCH_SOC_PREPARE, CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, CPUHP_BRINGUP_CPU, -- cgit v1.2.3 From 4ab0e470c06dc741f6583578da44961669331b78 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 29 Jul 2022 17:15:00 +0530 Subject: KVM: Add gfp_custom flag in struct kvm_mmu_memory_cache The kvm_mmu_topup_memory_cache() always uses GFP_KERNEL_ACCOUNT for memory allocation which prevents it's use in atomic context. To address this limitation of kvm_mmu_topup_memory_cache(), we add gfp_custom flag in struct kvm_mmu_memory_cache. When the gfp_custom flag is set to some GFP_xyz flags, the kvm_mmu_topup_memory_cache() will use that instead of GFP_KERNEL_ACCOUNT. Signed-off-by: Anup Patel Reviewed-by: Atish Patra Signed-off-by: Anup Patel --- include/linux/kvm_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index ac1ebb37a0ff..1dcfba68076a 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -87,6 +87,7 @@ struct gfn_to_pfn_cache { struct kvm_mmu_memory_cache { int nobjs; gfp_t gfp_zero; + gfp_t gfp_custom; struct kmem_cache *kmem_cache; void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE]; }; -- cgit v1.2.3 From 0ad722f159e44983ddea1929ffd90d0c20a86f24 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 15 Jul 2022 17:36:16 +0200 Subject: PCI: Remove pci_mmap_page_range() wrapper The ARCH_GENERIC_PCI_MMAP_RESOURCE symbol came up in a recent discussion, and I noticed that this was left behind by an unfinished cleanup from 2017. The only architecture that still relies on providing its own pci_mmap_page_range() helper instead of using the generic pci_mmap_resource_range() is sparc. Presumably the reasons for this have not changed, but at least this can be simplified by converting sparc to use the same interface as the others. The only difference between the two is the device-specific offset that gets added to or subtracted from vma->vm_pgoff. Change the only caller of pci_mmap_page_range() in common code to subtract this offset and call the modern interface, while adding it back in the sparc implementation to preserve the existing behavior. This removes the complexities of the dual interfaces from the common code, and keeps it all specific to the sparc architecture code. According to David Miller, the sparc code lets user space poke into the VGA I/O port registers by mmapping the I/O space of the parent bridge device, which is something that the generic pci_mmap_resource_range() code apparently does not. Link: https://lore.kernel.org/lkml/1519887203.622.3.camel@infradead.org/t/ Link: https://lore.kernel.org/lkml/20220714214657.2402250-3-shorne@gmail.com/ Link: https://lore.kernel.org/r/20220715153617.3393420-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Bjorn Helgaas Cc: David Woodhouse Cc: David S. Miller Cc: Stafford Horne --- include/linux/pci.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 81a57b498f22..060af91bafcd 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1909,24 +1909,14 @@ pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, #include -/* These two functions provide almost identical functionality. Depending - * on the architecture, one will be implemented as a wrapper around the - * other (in drivers/pci/mmap.c). - * +/* * pci_mmap_resource_range() maps a specific BAR, and vm->vm_pgoff * is expected to be an offset within that region. * - * pci_mmap_page_range() is the legacy architecture-specific interface, - * which accepts a "user visible" resource address converted by - * pci_resource_to_user(), as used in the legacy mmap() interface in - * /proc/bus/pci/. */ int pci_mmap_resource_range(struct pci_dev *dev, int bar, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); -int pci_mmap_page_range(struct pci_dev *pdev, int bar, - struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine); #ifndef arch_can_pci_mmap_wc #define arch_can_pci_mmap_wc() 0 -- cgit v1.2.3 From 909fcb195201f7c3aa81523cc182a4c9b52210e5 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Thu, 30 Jun 2022 00:53:21 +0200 Subject: clk: divider: Introduce devm_clk_hw_register_divider_parent_hw() Add the devres variant of clk_hw_register_divider_parent_hw() for registering a divider clock with clk_hw parent pointer instead of parent name. Signed-off-by: Marijn Suijten Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220629225331.357308-2-marijn.suijten@somainline.org Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index c10dc4c659e2..4e07621849e6 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -831,6 +831,25 @@ struct clk *clk_register_divider_table(struct device *dev, const char *name, __devm_clk_hw_register_divider((dev), NULL, (name), (parent_name), NULL, \ NULL, (flags), (reg), (shift), (width), \ (clk_divider_flags), NULL, (lock)) +/** + * devm_clk_hw_register_divider_parent_hw - register a divider clock with the clock framework + * @dev: device registering this clock + * @name: name of this clock + * @parent_hw: pointer to parent clk + * @flags: framework-specific flags + * @reg: register address to adjust divider + * @shift: number of bits to shift the bitfield + * @width: width of the bitfield + * @clk_divider_flags: divider-specific flags for this clock + * @lock: shared register lock for this clock + */ +#define devm_clk_hw_register_divider_parent_hw(dev, name, parent_hw, flags, \ + reg, shift, width, \ + clk_divider_flags, lock) \ + __devm_clk_hw_register_divider((dev), NULL, (name), NULL, \ + (parent_hw), NULL, (flags), (reg), \ + (shift), (width), (clk_divider_flags), \ + NULL, (lock)) /** * devm_clk_hw_register_divider_table - register a table based divider clock * with the clock framework (devres variant) -- cgit v1.2.3 From df63af17f3375239e0be124844f304b4a4b60665 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Thu, 30 Jun 2022 00:53:22 +0200 Subject: clk: mux: Introduce devm_clk_hw_register_mux_parent_hws() Add the devres variant of clk_hw_register_mux_hws() for registering a mux clock with clk_hw parent pointers instead of parent names. Signed-off-by: Marijn Suijten Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220629225331.357308-3-marijn.suijten@somainline.org Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 4e07621849e6..316c7e082934 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -980,6 +980,13 @@ struct clk *clk_register_mux_table(struct device *dev, const char *name, (parent_names), NULL, NULL, (flags), (reg), \ (shift), BIT((width)) - 1, (clk_mux_flags), \ NULL, (lock)) +#define devm_clk_hw_register_mux_parent_hws(dev, name, parent_hws, \ + num_parents, flags, reg, shift, \ + width, clk_mux_flags, lock) \ + __devm_clk_hw_register_mux((dev), NULL, (name), (num_parents), NULL, \ + (parent_hws), NULL, (flags), (reg), \ + (shift), BIT((width)) - 1, \ + (clk_mux_flags), NULL, (lock)) int clk_mux_val_to_index(struct clk_hw *hw, const u32 *table, unsigned int flags, unsigned int val); -- cgit v1.2.3 From 6ebd5247ad2aa210b3ff4481c6ed8aa32a032b12 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Thu, 30 Jun 2022 00:53:23 +0200 Subject: clk: fixed-factor: Introduce *clk_hw_register_fixed_factor_parent_hw() Add the devres and non-devres variant of clk_hw_register_fixed_factor_parent_hw() for registering a fixed factor clock with clk_hw parent pointer instead of parent name. Signed-off-by: Marijn Suijten Link: https://lore.kernel.org/r/20220629225331.357308-4-marijn.suijten@somainline.org Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 316c7e082934..94458cb669f0 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1032,6 +1032,14 @@ struct clk_hw *devm_clk_hw_register_fixed_factor(struct device *dev, struct clk_hw *devm_clk_hw_register_fixed_factor_index(struct device *dev, const char *name, unsigned int index, unsigned long flags, unsigned int mult, unsigned int div); + +struct clk_hw *devm_clk_hw_register_fixed_factor_parent_hw(struct device *dev, + const char *name, const struct clk_hw *parent_hw, + unsigned long flags, unsigned int mult, unsigned int div); + +struct clk_hw *clk_hw_register_fixed_factor_parent_hw(struct device *dev, + const char *name, const struct clk_hw *parent_hw, + unsigned long flags, unsigned int mult, unsigned int div); /** * struct clk_fractional_divider - adjustable fractional divider clock * -- cgit v1.2.3 From c770f31d8f580ed4b965c64f924ec1cc50e41734 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 19 Jul 2022 09:18:35 -0400 Subject: SUNRPC: Fix xdr_encode_bool() I discovered that xdr_encode_bool() was returning the same address that was passed in the @p parameter. The documenting comment states that the intent is to return the address of the next buffer location, just like the other "xdr_encode_*" helpers. The result was the encoded results of NFSv3 PATHCONF operations were not formed correctly. Fixes: ded04a587f6c ("NFSD: Update the NFSv3 PATHCONF3res encoder to use struct xdr_stream") Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton --- include/linux/sunrpc/xdr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5860f32e3958..986c8a17ca5e 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -419,8 +419,8 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) */ static inline __be32 *xdr_encode_bool(__be32 *p, u32 n) { - *p = n ? xdr_one : xdr_zero; - return p++; + *p++ = n ? xdr_one : xdr_zero; + return p; } /** -- cgit v1.2.3 From 184cefbe62627730c30282df12bcff9aae4816ea Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Mon, 13 Jun 2022 09:40:06 -0400 Subject: NLM: Defend against file_lock changes after vfs_test_lock() Instead of trusting that struct file_lock returns completely unchanged after vfs_test_lock() when there's no conflicting lock, stash away our nlm_lockowner reference so we can properly release it for all cases. This defends against another file_lock implementation overwriting fl_owner when the return type is F_UNLCK. Reported-by: Roberto Bergantinos Corpas Tested-by: Roberto Bergantinos Corpas Signed-off-by: Benjamin Coddington Signed-off-by: Chuck Lever --- include/linux/lockd/lockd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index fcef192e5e45..70ce419e2709 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -292,6 +292,7 @@ void nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t); __be32 nlm_lookup_file(struct svc_rqst *, struct nlm_file **, struct nlm_lock *); void nlm_release_file(struct nlm_file *); +void nlmsvc_put_lockowner(struct nlm_lockowner *); void nlmsvc_release_lockowner(struct nlm_lock *); void nlmsvc_mark_resources(struct net *); void nlmsvc_free_host_resources(struct nlm_host *); -- cgit v1.2.3 From 5304877936c0a67e1a01464d113bae4c81eacdb6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Jul 2022 14:40:03 -0400 Subject: NFSD: Fix strncpy() fortify warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In function ‘strncpy’, inlined from ‘nfsd4_ssc_setup_dul’ at /home/cel/src/linux/manet/fs/nfsd/nfs4proc.c:1392:3, inlined from ‘nfsd4_interssc_connect’ at /home/cel/src/linux/manet/fs/nfsd/nfs4proc.c:1489:11: /home/cel/src/linux/manet/include/linux/fortify-string.h:52:33: warning: ‘__builtin_strncpy’ specified bound 63 equals destination size [-Wstringop-truncation] 52 | #define __underlying_strncpy __builtin_strncpy | ^ /home/cel/src/linux/manet/include/linux/fortify-string.h:89:16: note: in expansion of macro ‘__underlying_strncpy’ 89 | return __underlying_strncpy(p, q, size); | ^~~~~~~~~~~~~~~~~~~~ Signed-off-by: Chuck Lever --- include/linux/nfs_ssc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h index 222ae8883e85..75843c00f326 100644 --- a/include/linux/nfs_ssc.h +++ b/include/linux/nfs_ssc.h @@ -64,7 +64,7 @@ struct nfsd4_ssc_umount_item { refcount_t nsui_refcnt; unsigned long nsui_expire; struct vfsmount *nsui_vfsmount; - char nsui_ipaddr[RPC_MAX_ADDRBUFLEN]; + char nsui_ipaddr[RPC_MAX_ADDRBUFLEN + 1]; }; #endif -- cgit v1.2.3 From fef3e9066d19230f661048ca86937d954c12cd50 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Thu, 14 Jul 2022 16:41:47 +0800 Subject: writeback: remove inode_to_wb_is_valid() inode_to_wb_is_valid() is no longer used since commit fe55d563d417 ("remove inode_congested()"), remove it. Link: https://lkml.kernel.org/r/20220714084147.140324-1-xiujianfeng@huawei.com Signed-off-by: Xiu Jianfeng Reviewed-by: Johannes Thumshirn Reviewed-by: Jan Kara Signed-off-by: Andrew Morton --- include/linux/backing-dev.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e84b745a6811..439815cc1ab9 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -229,18 +229,6 @@ wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) return wb; } -/** - * inode_to_wb_is_valid - test whether an inode has a wb associated - * @inode: inode of interest - * - * Returns %true if @inode has a wb associated. May be called without any - * locking. - */ -static inline bool inode_to_wb_is_valid(struct inode *inode) -{ - return inode->i_wb; -} - /** * inode_to_wb - determine the wb of an inode * @inode: inode of interest @@ -339,11 +327,6 @@ wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) return &bdi->wb; } -static inline bool inode_to_wb_is_valid(struct inode *inode) -{ - return true; -} - static inline struct bdi_writeback *inode_to_wb(struct inode *inode) { return &inode_to_bdi(inode)->wb; -- cgit v1.2.3 From 73b73bac90d97400e29e585c678c4d0ebfd2680d Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Thu, 14 Jul 2022 06:49:18 +0000 Subject: mm: vmpressure: don't count proactive reclaim in vmpressure memory.reclaim is a cgroup v2 interface that allows users to proactively reclaim memory from a memcg, without real memory pressure. Reclaim operations invoke vmpressure, which is used: (a) To notify userspace of reclaim efficiency in cgroup v1, and (b) As a signal for a memcg being under memory pressure for networking (see mem_cgroup_under_socket_pressure()). For (a), vmpressure notifications in v1 are not affected by this change since memory.reclaim is a v2 feature. For (b), the effects of the vmpressure signal (according to Shakeel [1]) are as follows: 1. Reducing send and receive buffers of the current socket. 2. May drop packets on the rx path. 3. May throttle current thread on the tx path. Since proactive reclaim is invoked directly by userspace, not by memory pressure, it makes sense not to throttle networking. Hence, this change makes sure that proactive reclaim caused by memory.reclaim does not trigger vmpressure. [1] https://lore.kernel.org/lkml/CALvZod68WdrXEmBpOkadhB5GPYmCXaDZzXH=yyGOCAjFRn4NDQ@mail.gmail.com/ [yosryahmed@google.com: update documentation] Link: https://lkml.kernel.org/r/20220721173015.2643248-1-yosryahmed@google.com Link: https://lkml.kernel.org/r/20220714064918.2576464-1-yosryahmed@google.com Signed-off-by: Yosry Ahmed Acked-by: Shakeel Butt Acked-by: Michal Hocko Acked-by: David Rientjes Cc: Johannes Weiner Cc: Roman Gushchin Cc: Muchun Song Cc: Matthew Wilcox Cc: Vlastimil Babka Cc: David Hildenbrand Cc: Miaohe Lin Cc: NeilBrown Cc: Alistair Popple Cc: Suren Baghdasaryan Cc: Peter Xu Signed-off-by: Andrew Morton --- include/linux/swap.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 6d11c51b2b62..ea895b40e6ff 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -411,10 +411,13 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page, extern unsigned long zone_reclaimable_pages(struct zone *zone); extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); + +#define MEMCG_RECLAIM_MAY_SWAP (1 << 1) +#define MEMCG_RECLAIM_PROACTIVE (1 << 2) extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, - bool may_swap); + unsigned int reclaim_options); extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, -- cgit v1.2.3 From e408e695f5f1f60d784913afc45ff2c387a5aeb8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 14 Jul 2022 21:59:12 -0400 Subject: mm/shmem: support FS_IOC_[SG]ETFLAGS in tmpfs This allows userspace to set flags like FS_APPEND_FL, FS_IMMUTABLE_FL, FS_NODUMP_FL, etc., like all other standard Linux file systems. [akpm@linux-foundation.org: fix CONFIG_TMPFS_XATTR=n warnings] Link: https://lkml.kernel.org/r/20220715015912.2560575-1-tytso@mit.edu Signed-off-by: Theodore Ts'o Cc: Hugh Dickins Signed-off-by: Andrew Morton --- include/linux/shmem_fs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index a68f982f22d1..1b6c4013f691 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -25,9 +25,20 @@ struct shmem_inode_info { struct simple_xattrs xattrs; /* list of xattrs */ atomic_t stop_eviction; /* hold when working on inode */ struct timespec64 i_crtime; /* file creation time */ + unsigned int fsflags; /* flags for FS_IOC_[SG]ETFLAGS */ struct inode vfs_inode; }; +#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE +#define SHMEM_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE +#define SHMEM_FL_INHERITED FS_FL_USER_MODIFIABLE + +/* Flags that are appropriate for regular files (all but dir-specific ones). */ +#define SHMEM_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) + +/* Flags that are appropriate for non-directories/regular files. */ +#define SHMEM_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) + struct shmem_sb_info { unsigned long max_blocks; /* How many blocks are allowed */ struct percpu_counter used_blocks; /* How many are allocated */ -- cgit v1.2.3 From bb077c3ffd5362a6d9e60574e1bcc83fe8e3fb27 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 26 Jul 2022 21:18:16 +0800 Subject: mm: cleanup is_highmem() It is unnecessary to add CONFIG_HIGHMEM check in is_highmem(), which has been done in is_highmem_idx(), and move is_highmem() close to is_highmem_idx(). This has no functional impact. Link: https://lkml.kernel.org/r/20220726131816.149075-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 578247a341b2..e24b40c52468 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1137,15 +1137,6 @@ static inline int is_highmem_idx(enum zone_type idx) #endif } -#ifdef CONFIG_ZONE_DMA -bool has_managed_dma(void); -#else -static inline bool has_managed_dma(void) -{ - return false; -} -#endif - /** * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references @@ -1155,12 +1146,17 @@ static inline bool has_managed_dma(void) */ static inline int is_highmem(struct zone *zone) { -#ifdef CONFIG_HIGHMEM return is_highmem_idx(zone_idx(zone)); +} + +#ifdef CONFIG_ZONE_DMA +bool has_managed_dma(void); #else - return 0; -#endif +static inline bool has_managed_dma(void) +{ + return false; } +#endif /* These two functions are used to setup the per zone pages min values */ struct ctl_table; -- cgit v1.2.3 From fa7d574ba4f4f3f4f78d432c8545d9045daa89b1 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Tue, 19 Jul 2022 16:33:49 +0800 Subject: bdi: remove enum wb_congested_state enum wb_congested_state and the member 'congested' in bdi_writeback are useless since commit a88f2096d5a2 ("remove congestion tracking framework"), so remove it. Link: https://lkml.kernel.org/r/20220719083349.87547-1-xiujianfeng@huawei.com Signed-off-by: Xiu Jianfeng Reviewed-by: Jan Kara Cc: NeilBrown Signed-off-by: Andrew Morton --- include/linux/backing-dev-defs.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index e863c88df95f..ae12696ec492 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -28,11 +28,6 @@ enum wb_state { WB_start_all, /* nr_pages == 0 (all) work pending */ }; -enum wb_congested_state { - WB_async_congested, /* The async (write) queue is getting full */ - WB_sync_congested, /* The sync queue is getting full */ -}; - enum wb_stat_item { WB_RECLAIMABLE, WB_WRITEBACK, @@ -122,8 +117,6 @@ struct bdi_writeback { atomic_t writeback_inodes; /* number of inodes under writeback */ struct percpu_counter stat[NR_WB_STAT_ITEMS]; - unsigned long congested; /* WB_[a]sync_congested flags */ - unsigned long bw_time_stamp; /* last time write bw is updated */ unsigned long dirtied_stamp; unsigned long written_stamp; /* pages written at bw_time_stamp */ -- cgit v1.2.3 From 45f78b0a2743c4fd71b73400bd5d5339628bf538 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 27 Jul 2022 13:49:03 +0200 Subject: fs/dcache: Move the wakeup from __d_lookup_done() to the caller. __d_lookup_done() wakes waiters on dentry->d_wait. On PREEMPT_RT we are not allowed to do that with preemption disabled, since the wakeup acquired wait_queue_head::lock, which is a "sleeping" spinlock on RT. Calling it under dentry->d_lock is not a problem, since that is also a "sleeping" spinlock on the same configs. Unfortunately, two of its callers (__d_add() and __d_move()) are holding more than just ->d_lock and that needs to be dealt with. The key observation is that wakeup can be moved to any point before dropping ->d_lock. As a first step to solve this, move the wake up outside of the hlist_bl_lock() held section. This is safe because: Waiters get inserted into ->d_wait only after they'd taken ->d_lock and observed DCACHE_PAR_LOOKUP in flags. As long as they are woken up (and evicted from the queue) between the moment __d_lookup_done() has removed DCACHE_PAR_LOOKUP and dropping ->d_lock, we are safe, since the waitqueue ->d_wait points to won't get destroyed without having __d_lookup_done(dentry) called (under ->d_lock). ->d_wait is set only by d_alloc_parallel() and only in case when it returns a freshly allocated in-lookup dentry. Whenever that happens, we are guaranteed that __d_lookup_done() will be called for resulting dentry (under ->d_lock) before the wq in question gets destroyed. With two exceptions wq lives in call frame of the caller of d_alloc_parallel() and we have an explicit d_lookup_done() on the resulting in-lookup dentry before we leave that frame. One of those exceptions is nfs_call_unlink(), where wq is embedded into (dynamically allocated) struct nfs_unlinkdata. It is destroyed in nfs_async_unlink_release() after an explicit d_lookup_done() on the dentry wq went into. Remaining exception is d_add_ci(). There wq is what we'd found in ->d_wait of d_add_ci() argument. Callers of d_add_ci() are two instances of ->d_lookup() and they must have been given an in-lookup dentry. Which means that they'd been called by __lookup_slow() or lookup_open(), with wq in the call frame of one of those. Result of d_alloc_parallel() in d_add_ci() is fed to d_splice_alias(), which either returns non-NULL (and d_add_ci() does d_lookup_done()) or feeds dentry to __d_add() that will do __d_lookup_done() under ->d_lock. That concludes the analysis. Let __d_lookup_unhash(): 1) Lock the lookup hash and clear DCACHE_PAR_LOOKUP 2) Unhash the dentry 3) Retrieve and clear dentry::d_wait 4) Unlock the hash and return the retrieved waitqueue head pointer 5) Let the caller handle the wake up. 6) Rename __d_lookup_done() to __d_lookup_unhash_wake() to enforce build failures for OOT code that used __d_lookup_done() and is not aware of the new return value. This does not yet solve the PREEMPT_RT problem completely because preemption is still disabled due to i_dir_seq being held for write. This will be addressed in subsequent steps. An alternative solution would be to switch the waitqueue to a simple waitqueue, but aside of Linus not being a fan of them, moving the wake up closer to the place where dentry::lock is unlocked reduces lock contention time for the woken up waiter. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Link: https://lkml.kernel.org/r/20220613140712.77932-3-bigeasy@linutronix.de Signed-off-by: Al Viro --- include/linux/dcache.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index f5bba51480b2..c73e5e327e76 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -349,7 +349,7 @@ static inline void dont_mount(struct dentry *dentry) spin_unlock(&dentry->d_lock); } -extern void __d_lookup_done(struct dentry *); +extern void __d_lookup_unhash_wake(struct dentry *dentry); static inline int d_in_lookup(const struct dentry *dentry) { @@ -358,11 +358,8 @@ static inline int d_in_lookup(const struct dentry *dentry) static inline void d_lookup_done(struct dentry *dentry) { - if (unlikely(d_in_lookup(dentry))) { - spin_lock(&dentry->d_lock); - __d_lookup_done(dentry); - spin_unlock(&dentry->d_lock); - } + if (unlikely(d_in_lookup(dentry))) + __d_lookup_unhash_wake(dentry); } extern void dput(struct dentry *); -- cgit v1.2.3 From 102227b970a15256f5ffd12a6a276ddf978e6caf Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:40 +0200 Subject: rv: Add Runtime Verification (RV) interface RV is a lightweight (yet rigorous) method that complements classical exhaustive verification techniques (such as model checking and theorem proving) with a more practical approach to complex systems. RV works by analyzing the trace of the system's actual execution, comparing it against a formal specification of the system behavior. RV can give precise information on the runtime behavior of the monitored system while enabling the reaction for unexpected events, avoiding, for example, the propagation of a failure on safety-critical systems. The development of this interface roots in the development of the paper: De Oliveira, Daniel Bristot; Cucinotta, Tommaso; De Oliveira, Romulo Silva. Efficient formal verification for the Linux kernel. In: International Conference on Software Engineering and Formal Methods. Springer, Cham, 2019. p. 315-332. And: De Oliveira, Daniel Bristot. Automata-based formal analysis and verification of the real-time Linux kernel. PhD Thesis, 2020. The RV interface resembles the tracing/ interface on purpose. The current path for the RV interface is /sys/kernel/tracing/rv/. It presents these files: "available_monitors" - List the available monitors, one per line. For example: # cat available_monitors wip wwnr "enabled_monitors" - Lists the enabled monitors, one per line; - Writing to it enables a given monitor; - Writing a monitor name with a '!' prefix disables it; - Truncating the file disables all enabled monitors. For example: # cat enabled_monitors # echo wip > enabled_monitors # echo wwnr >> enabled_monitors # cat enabled_monitors wip wwnr # echo '!wip' >> enabled_monitors # cat enabled_monitors wwnr # echo > enabled_monitors # cat enabled_monitors # Note that more than one monitor can be enabled concurrently. "monitoring_on" - It is an on/off general switcher for monitoring. Note that it does not disable enabled monitors or detach events, but stop the per-entity monitors of monitoring the events received from the system. It resembles the "tracing_on" switcher. "monitors/" Each monitor will have its one directory inside "monitors/". There the monitor specific files will be presented. The "monitors/" directory resembles the "events" directory on tracefs. For example: # cd monitors/wip/ # ls desc enable # cat desc wakeup in preemptive per-cpu testing monitor. # cat enable 0 For further information, see the comments in the header of kernel/trace/rv/rv.c from this patch. Link: https://lkml.kernel.org/r/a4bfe038f50cb047bfb343ad0e12b0e646ab308b.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- include/linux/rv.h | 43 +++++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 11 +++++++++++ 2 files changed, 54 insertions(+) create mode 100644 include/linux/rv.h (limited to 'include/linux') diff --git a/include/linux/rv.h b/include/linux/rv.h new file mode 100644 index 000000000000..d8fa9e8be94a --- /dev/null +++ b/include/linux/rv.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Runtime Verification. + * + * For futher information, see: kernel/trace/rv/rv.c. + */ +#ifndef _LINUX_RV_H +#define _LINUX_RV_H + +#ifdef CONFIG_RV + +/* + * Per-task RV monitors count. Nowadays fixed in RV_PER_TASK_MONITORS. + * If we find justification for more monitors, we can think about + * adding more or developing a dynamic method. So far, none of + * these are justified. + */ +#define RV_PER_TASK_MONITORS 1 +#define RV_PER_TASK_MONITOR_INIT (RV_PER_TASK_MONITORS) + +/* + * Futher monitor types are expected, so make this a union. + */ +union rv_task_monitor { +}; + +struct rv_monitor { + const char *name; + const char *description; + bool enabled; + int (*enable)(void); + void (*disable)(void); + void (*reset)(void); +}; + +bool rv_monitoring_on(void); +int rv_unregister_monitor(struct rv_monitor *monitor); +int rv_register_monitor(struct rv_monitor *monitor); +int rv_get_task_monitor_slot(void); +void rv_put_task_monitor_slot(int slot); + +#endif /* CONFIG_RV */ +#endif /* _LINUX_RV_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index c46f3a63b758..b5da3e7c3a04 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -34,6 +34,7 @@ #include #include #include +#include #include /* task_struct member predeclarations (sorted alphabetically): */ @@ -1500,6 +1501,16 @@ struct task_struct { struct callback_head l1d_flush_kill; #endif +#ifdef CONFIG_RV + /* + * Per-task RV monitor. Nowadays fixed in RV_PER_TASK_MONITORS. + * If we find justification for more monitors, we can think + * about adding more or developing a dynamic method. So far, + * none of these are justified. + */ + union rv_task_monitor rv[RV_PER_TASK_MONITORS]; +#endif + /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. -- cgit v1.2.3 From 04acadcb4453cf8011dd3d4ce8d97fecac42d325 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:41 +0200 Subject: rv: Add runtime reactors interface A runtime monitor can cause a reaction to the detection of an exception on the model's execution. By default, the monitors have tracing reactions, printing the monitor output via tracepoints. But other reactions can be added (on-demand) via this interface. The user interface resembles the kernel tracing interface and presents these files: "available_reactors" - Reading shows the available reactors, one per line. For example: # cat available_reactors nop panic printk "reacting_on" - It is an on/off general switch for reactors, disabling all reactions. "monitors/MONITOR/reactors" - List available reactors, with the select reaction for the given MONITOR inside []. The default one is the nop (no operation) reactor. - Writing the name of a reactor enables it to the given MONITOR. For example: # cat monitors/wip/reactors [nop] panic printk # echo panic > monitors/wip/reactors # cat monitors/wip/reactors nop [panic] printk Link: https://lkml.kernel.org/r/1794eb994637457bdeaa6bad0b8263d2f7eece0c.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- include/linux/rv.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rv.h b/include/linux/rv.h index d8fa9e8be94a..dcce56987cf7 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -24,6 +24,14 @@ union rv_task_monitor { }; +#ifdef CONFIG_RV_REACTORS +struct rv_reactor { + const char *name; + const char *description; + void (*react)(char *msg); +}; +#endif + struct rv_monitor { const char *name; const char *description; @@ -31,6 +39,9 @@ struct rv_monitor { int (*enable)(void); void (*disable)(void); void (*reset)(void); +#ifdef CONFIG_RV_REACTORS + void (*react)(char *msg); +#endif }; bool rv_monitoring_on(void); @@ -39,5 +50,11 @@ int rv_register_monitor(struct rv_monitor *monitor); int rv_get_task_monitor_slot(void); void rv_put_task_monitor_slot(int slot); +#ifdef CONFIG_RV_REACTORS +bool rv_reacting_on(void); +int rv_unregister_reactor(struct rv_reactor *reactor); +int rv_register_reactor(struct rv_reactor *reactor); +#endif /* CONFIG_RV_REACTORS */ + #endif /* CONFIG_RV */ #endif /* _LINUX_RV_H */ -- cgit v1.2.3 From 792575348ff70e05c6040d02fce38e949ef92c37 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:43 +0200 Subject: rv/include: Add deterministic automata monitor definition via C macros In Linux terms, the runtime verification monitors are encapsulated inside the "RV monitor" abstraction. The "RV monitor" includes a set of instances of the monitor (per-cpu monitor, per-task monitor, and so on), the helper functions that glue the monitor to the system reference model, and the trace output as a reaction for event parsing and exceptions, as depicted below: Linux +----- RV Monitor ----------------------------------+ Formal Realm | | Realm +-------------------+ +----------------+ +-----------------+ | Linux kernel | | Monitor | | Reference | | Tracing | -> | Instance(s) | <- | Model | | (instrumentation) | | (verification) | | (specification) | +-------------------+ +----------------+ +-----------------+ | | | | V | | +----------+ | | | Reaction | | | +--+--+--+-+ | | | | | | | | | +-> trace output ? | +------------------------|--|----------------------+ | +----> panic ? +-------> Add the rv/da_monitor.h, enabling automatic code generation for the *Monitor Instance(s)* using C macros, and code to support it. The benefits of the usage of macro for monitor synthesis are 3-fold as it: - Reduces the code duplication; - Facilitates the bug fix/improvement; - Avoids the case of developers changing the core of the monitor code to manipulate the model in a (let's say) non-standard way. This initial implementation presents three different types of monitor instances: - DECLARE_DA_MON_GLOBAL(name, type) - DECLARE_DA_MON_PER_CPU(name, type) - DECLARE_DA_MON_PER_TASK(name, type) The first declares the functions for a global deterministic automata monitor, the second for monitors with per-cpu instances, and the third with per-task instances. Link: https://lkml.kernel.org/r/51b0bf425a281e226dfeba7401d2115d6091f84e.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- include/linux/rv.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rv.h b/include/linux/rv.h index dcce56987cf7..8883b41d88ec 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -7,7 +7,16 @@ #ifndef _LINUX_RV_H #define _LINUX_RV_H +#define MAX_DA_NAME_LEN 24 + #ifdef CONFIG_RV +/* + * Deterministic automaton per-object variables. + */ +struct da_monitor { + bool monitoring; + unsigned int curr_state; +}; /* * Per-task RV monitors count. Nowadays fixed in RV_PER_TASK_MONITORS. @@ -22,6 +31,7 @@ * Futher monitor types are expected, so make this a union. */ union rv_task_monitor { + struct da_monitor da_mon; }; #ifdef CONFIG_RV_REACTORS -- cgit v1.2.3 From a603002eea8213eec5211be5a85db8340aea06d0 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 22 Jun 2022 08:38:36 +0200 Subject: virtio: replace restricted mem access flag with callback Instead of having a global flag to require restricted memory access for all virtio devices, introduce a callback which can select that requirement on a per-device basis. For convenience add a common function returning always true, which can be used for use cases like SEV. Per default use a callback always returning false. As the callback needs to be set in early init code already, add a virtio anchor which is builtin in case virtio is enabled. Signed-off-by: Juergen Gross Tested-by: Oleksandr Tyshchenko # Arm64 guest using Xen Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/20220622063838.8854-2-jgross@suse.com Signed-off-by: Juergen Gross --- include/linux/platform-feature.h | 6 +----- include/linux/virtio_anchor.h | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 include/linux/virtio_anchor.h (limited to 'include/linux') diff --git a/include/linux/platform-feature.h b/include/linux/platform-feature.h index b2f48be999fa..6ed859928b97 100644 --- a/include/linux/platform-feature.h +++ b/include/linux/platform-feature.h @@ -6,11 +6,7 @@ #include /* The platform features are starting with the architecture specific ones. */ - -/* Used to enable platform specific DMA handling for virtio devices. */ -#define PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS (0 + PLATFORM_ARCH_FEAT_N) - -#define PLATFORM_FEAT_N (1 + PLATFORM_ARCH_FEAT_N) +#define PLATFORM_FEAT_N (0 + PLATFORM_ARCH_FEAT_N) void platform_set(unsigned int feature); void platform_clear(unsigned int feature); diff --git a/include/linux/virtio_anchor.h b/include/linux/virtio_anchor.h new file mode 100644 index 000000000000..432e6c00b3ca --- /dev/null +++ b/include/linux/virtio_anchor.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VIRTIO_ANCHOR_H +#define _LINUX_VIRTIO_ANCHOR_H + +#ifdef CONFIG_VIRTIO_ANCHOR +struct virtio_device; + +bool virtio_require_restricted_mem_acc(struct virtio_device *dev); +extern bool (*virtio_check_mem_acc_cb)(struct virtio_device *dev); + +static inline void virtio_set_mem_acc_cb(bool (*func)(struct virtio_device *)) +{ + virtio_check_mem_acc_cb = func; +} +#else +#define virtio_set_mem_acc_cb(func) do { } while (0) +#endif + +#endif /* _LINUX_VIRTIO_ANCHOR_H */ -- cgit v1.2.3 From a870544ca9d215449e91ebc01e35d80b23151c78 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 22 Jun 2022 08:38:37 +0200 Subject: kernel: remove platform_has() infrastructure The only use case of the platform_has() infrastructure has been removed again, so remove the whole feature. Signed-off-by: Juergen Gross Tested-by: Oleksandr Tyshchenko # Arm64 guest using Xen Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/20220622063838.8854-3-jgross@suse.com Signed-off-by: Juergen Gross --- include/linux/platform-feature.h | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 include/linux/platform-feature.h (limited to 'include/linux') diff --git a/include/linux/platform-feature.h b/include/linux/platform-feature.h deleted file mode 100644 index 6ed859928b97..000000000000 --- a/include/linux/platform-feature.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PLATFORM_FEATURE_H -#define _PLATFORM_FEATURE_H - -#include -#include - -/* The platform features are starting with the architecture specific ones. */ -#define PLATFORM_FEAT_N (0 + PLATFORM_ARCH_FEAT_N) - -void platform_set(unsigned int feature); -void platform_clear(unsigned int feature); -bool platform_has(unsigned int feature); - -#endif /* _PLATFORM_FEATURE_H */ -- cgit v1.2.3 From 36d4b36b69590fed99356a4426c940a253a93800 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 25 Jul 2022 09:39:17 -0700 Subject: lib/nodemask: inline next_node_in() and node_random() The functions are pretty thin wrappers around find_bit engine, and keeping them in c-file prevents compiler from small_const_nbits() optimization, which must take place for all systems with MAX_NUMNODES less than BITS_PER_LONG (default is 16 for me). Moving them to header file doesn't blow up the kernel size: add/remove: 1/2 grow/shrink: 9/5 up/down: 968/-88 (880) CC: Andy Shevchenko CC: Benjamin Herrenschmidt CC: Michael Ellerman CC: Paul Mackerras CC: Rasmus Villemoes CC: Stephen Rothwell CC: linuxppc-dev@lists.ozlabs.org Signed-off-by: Yury Norov --- include/linux/nodemask.h | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 0f233b76c9ce..4b71a96190a8 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -94,6 +94,7 @@ #include #include #include +#include typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; extern nodemask_t _unused_nodemask_arg_; @@ -276,7 +277,14 @@ static inline unsigned int __next_node(int n, const nodemask_t *srcp) * the first node in src if needed. Returns MAX_NUMNODES if src is empty. */ #define next_node_in(n, src) __next_node_in((n), &(src)) -unsigned int __next_node_in(int node, const nodemask_t *srcp); +static inline unsigned int __next_node_in(int node, const nodemask_t *srcp) +{ + unsigned int ret = __next_node(node, srcp); + + if (ret == MAX_NUMNODES) + ret = __first_node(srcp); + return ret; +} static inline void init_nodemask_of_node(nodemask_t *mask, int node) { @@ -493,14 +501,20 @@ static inline int num_node_state(enum node_states state) #endif +static inline int node_random(const nodemask_t *maskp) +{ #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1) -extern int node_random(const nodemask_t *maskp); + int w, bit = NUMA_NO_NODE; + + w = nodes_weight(*maskp); + if (w) + bit = bitmap_ord_to_pos(maskp->bits, + get_random_int() % w, MAX_NUMNODES); + return bit; #else -static inline int node_random(const nodemask_t *mask) -{ return 0; -} #endif +} #define node_online_map node_states[N_ONLINE] #define node_possible_map node_states[N_POSSIBLE] -- cgit v1.2.3 From 68f2736a858324c3ec852f6c2cddd9d1c777357d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 7 Jun 2022 15:38:48 -0400 Subject: mm: Convert all PageMovable users to movable_operations These drivers are rather uncomfortably hammered into the address_space_operations hole. They aren't filesystems and don't behave like filesystems. They just need their own movable_operations structure, which we can point to directly from page->mapping. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/balloon_compaction.h | 6 ++-- include/linux/fs.h | 2 -- include/linux/migrate.h | 56 ++++++++++++++++++++++++++++++++++---- include/linux/page-flags.h | 2 +- 4 files changed, 54 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index edb7f6d41faa..5ca2d5699620 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -57,7 +57,6 @@ struct balloon_dev_info { struct list_head pages; /* Pages enqueued & handled to Host */ int (*migratepage)(struct balloon_dev_info *, struct page *newpage, struct page *page, enum migrate_mode mode); - struct inode *inode; }; extern struct page *balloon_page_alloc(void); @@ -75,11 +74,10 @@ static inline void balloon_devinfo_init(struct balloon_dev_info *balloon) spin_lock_init(&balloon->pages_lock); INIT_LIST_HEAD(&balloon->pages); balloon->migratepage = NULL; - balloon->inode = NULL; } #ifdef CONFIG_BALLOON_COMPACTION -extern const struct address_space_operations balloon_aops; +extern const struct movable_operations balloon_mops; /* * balloon_page_insert - insert a page into the balloon's page list and make @@ -94,7 +92,7 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon, struct page *page) { __SetPageOffline(page); - __SetPageMovable(page, balloon->inode->i_mapping); + __SetPageMovable(page, &balloon_mops); set_page_private(page, (unsigned long)balloon); list_add(&page->lru, &balloon->pages); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ad5e3520fae..5d8ee3155ca2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -367,8 +367,6 @@ struct address_space_operations { */ int (*migratepage) (struct address_space *, struct page *, struct page *, enum migrate_mode); - bool (*isolate_page)(struct page *, isolate_mode_t); - void (*putback_page)(struct page *); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 069a89e847f3..82c735ba6109 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -19,6 +19,43 @@ struct migration_target_control; */ #define MIGRATEPAGE_SUCCESS 0 +/** + * struct movable_operations - Driver page migration + * @isolate_page: + * The VM calls this function to prepare the page to be moved. The page + * is locked and the driver should not unlock it. The driver should + * return ``true`` if the page is movable and ``false`` if it is not + * currently movable. After this function returns, the VM uses the + * page->lru field, so the driver must preserve any information which + * is usually stored here. + * + * @migrate_page: + * After isolation, the VM calls this function with the isolated + * @src page. The driver should copy the contents of the + * @src page to the @dst page and set up the fields of @dst page. + * Both pages are locked. + * If page migration is successful, the driver should call + * __ClearPageMovable(@src) and return MIGRATEPAGE_SUCCESS. + * If the driver cannot migrate the page at the moment, it can return + * -EAGAIN. The VM interprets this as a temporary migration failure and + * will retry it later. Any other error value is a permanent migration + * failure and migration will not be retried. + * The driver shouldn't touch the @src->lru field while in the + * migrate_page() function. It may write to @dst->lru. + * + * @putback_page: + * If migration fails on the isolated page, the VM informs the driver + * that the page is no longer a candidate for migration by calling + * this function. The driver should put the isolated page back into + * its own data structure. + */ +struct movable_operations { + bool (*isolate_page)(struct page *, isolate_mode_t); + int (*migrate_page)(struct page *dst, struct page *src, + enum migrate_mode); + void (*putback_page)(struct page *); +}; + /* Defined in mm/debug.c: */ extern const char *migrate_reason_names[MR_TYPES]; @@ -91,13 +128,13 @@ static inline int next_demotion_node(int node) #endif #ifdef CONFIG_COMPACTION -extern int PageMovable(struct page *page); -extern void __SetPageMovable(struct page *page, struct address_space *mapping); -extern void __ClearPageMovable(struct page *page); +bool PageMovable(struct page *page); +void __SetPageMovable(struct page *page, const struct movable_operations *ops); +void __ClearPageMovable(struct page *page); #else -static inline int PageMovable(struct page *page) { return 0; } +static inline bool PageMovable(struct page *page) { return false; } static inline void __SetPageMovable(struct page *page, - struct address_space *mapping) + const struct movable_operations *ops) { } static inline void __ClearPageMovable(struct page *page) @@ -110,6 +147,15 @@ static inline bool folio_test_movable(struct folio *folio) return PageMovable(&folio->page); } +static inline +const struct movable_operations *page_movable_ops(struct page *page) +{ + VM_BUG_ON(!__PageMovable(page)); + + return (const struct movable_operations *) + ((unsigned long)page->mapping - PAGE_MAPPING_MOVABLE); +} + #ifdef CONFIG_NUMA_BALANCING extern int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, int node); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e66f7aa3191d..3f5490f6f038 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -639,7 +639,7 @@ __PAGEFLAG(Reported, reported, PF_NO_COMPOUND) * structure which KSM associates with that merged page. See ksm.h. * * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable - * page and then page->mapping points a struct address_space. + * page and then page->mapping points to a struct movable_operations. * * Please note that, confusingly, "page_mapping" refers to the inode * address_space which maps the page from disk; whereas "page_mapped" -- cgit v1.2.3 From 5490da4f06d182ba944706875029e98fe7f6b821 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 09:00:16 -0400 Subject: fs: Add aops->migrate_folio Provide a folio-based replacement for aops->migratepage. Update the documentation to document migrate_folio instead of migratepage. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/fs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5d8ee3155ca2..47431cf8fbb3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -362,9 +362,11 @@ struct address_space_operations { void (*free_folio)(struct folio *folio); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); /* - * migrate the contents of a page to the specified target. If + * migrate the contents of a folio to the specified target. If * migrate_mode is MIGRATE_ASYNC, it must not block. */ + int (*migrate_folio)(struct address_space *, struct folio *dst, + struct folio *src, enum migrate_mode); int (*migratepage) (struct address_space *, struct page *, struct page *, enum migrate_mode); int (*launder_folio)(struct folio *); -- cgit v1.2.3 From 67235182a41c1bd6b32806a1556a1d299b84212b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 10:20:31 -0400 Subject: mm/migrate: Convert buffer_migrate_page() to buffer_migrate_folio() Use a folio throughout __buffer_migrate_folio(), add kernel-doc for buffer_migrate_folio() and buffer_migrate_folio_norefs(), move their declarations to buffer.h and switch all filesystems that have wired them up. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/buffer_head.h | 10 ++++++++++ include/linux/fs.h | 12 ------------ 2 files changed, 10 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c9d1463bb20f..b0366c89d6a4 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -267,6 +267,16 @@ int nobh_truncate_page(struct address_space *, loff_t, get_block_t *); int nobh_writepage(struct page *page, get_block_t *get_block, struct writeback_control *wbc); +#ifdef CONFIG_MIGRATION +extern int buffer_migrate_folio(struct address_space *, + struct folio *dst, struct folio *src, enum migrate_mode); +extern int buffer_migrate_folio_norefs(struct address_space *, + struct folio *dst, struct folio *src, enum migrate_mode); +#else +#define buffer_migrate_folio NULL +#define buffer_migrate_folio_norefs NULL +#endif + void buffer_init(void); /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 47431cf8fbb3..9e6b17da4e11 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3215,18 +3215,6 @@ extern int generic_check_addressable(unsigned, u64); extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry); -#ifdef CONFIG_MIGRATION -extern int buffer_migrate_page(struct address_space *, - struct page *, struct page *, - enum migrate_mode); -extern int buffer_migrate_page_norefs(struct address_space *, - struct page *, struct page *, - enum migrate_mode); -#else -#define buffer_migrate_page NULL -#define buffer_migrate_page_norefs NULL -#endif - int may_setattr(struct user_namespace *mnt_userns, struct inode *inode, unsigned int ia_valid); int setattr_prepare(struct user_namespace *, struct dentry *, struct iattr *); -- cgit v1.2.3 From 541846502f4fe826cd7c16e4784695ac90736585 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 10:27:41 -0400 Subject: mm/migrate: Convert migrate_page() to migrate_folio() Convert all callers to pass a folio. Most have the folio already available. Switch all users from aops->migratepage to aops->migrate_folio. Also turn the documentation into kerneldoc. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: David Sterba --- include/linux/migrate.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 82c735ba6109..c9986d5da335 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -62,9 +62,8 @@ extern const char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION extern void putback_movable_pages(struct list_head *l); -extern int migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, - enum migrate_mode mode); +int migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason, unsigned int *ret_succeeded); -- cgit v1.2.3 From 2ec810d59602f0e08847f986ef8e16469722496f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 12:55:08 -0400 Subject: mm/migrate: Add filemap_migrate_folio() There is nothing iomap-specific about iomap_migratepage(), and it fits a pattern used by several other filesystems, so move it to mm/migrate.c, convert it to be filemap_migrate_folio() and convert the iomap filesystems to use it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- include/linux/iomap.h | 6 ------ include/linux/pagemap.h | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e552097c67e0..758a1125e72f 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -231,12 +231,6 @@ void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); -#ifdef CONFIG_MIGRATION -int iomap_migrate_page(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode); -#else -#define iomap_migrate_page NULL -#endif int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 87d4ea571240..cc9adbaddb59 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1078,6 +1078,12 @@ static inline int __must_check write_one_page(struct page *page) int __set_page_dirty_nobuffers(struct page *page); bool noop_dirty_folio(struct address_space *mapping, struct folio *folio); +#ifdef CONFIG_MIGRATION +int filemap_migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode); +#else +#define filemap_migrate_folio NULL +#endif void page_endio(struct page *page, bool is_write, int err); void folio_end_private_2(struct folio *folio); -- cgit v1.2.3 From b890ec2a2c2d962f71ba31ae291f8fd252b46258 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 10:47:21 -0400 Subject: hugetlb: Convert to migrate_folio This involves converting migrate_huge_page_move_mapping(). We also need a folio variant of hugetlb_set_page_subpool(), but that's for a later patch. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Muchun Song Reviewed-by: Mike Kravetz --- include/linux/migrate.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index c9986d5da335..13f793309b75 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -72,8 +72,8 @@ extern int isolate_movable_page(struct page *page, isolate_mode_t mode); extern void migrate_page_states(struct page *newpage, struct page *page); extern void migrate_page_copy(struct page *newpage, struct page *page); -extern int migrate_huge_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page); +int migrate_huge_page_move_mapping(struct address_space *mapping, + struct folio *dst, struct folio *src); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep, @@ -104,7 +104,7 @@ static inline void migrate_page_copy(struct page *newpage, struct page *page) {} static inline int migrate_huge_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page) + struct folio *dst, struct folio *src) { return -ENOSYS; } -- cgit v1.2.3 From 9d0ddc0cb575fd41ff16131b06e08e1feac43b81 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 11:53:31 -0400 Subject: fs: Remove aops->migratepage() With all users converted to migrate_folio(), remove this operation. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9e6b17da4e11..7e06919b8f60 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -367,8 +367,6 @@ struct address_space_operations { */ int (*migrate_folio)(struct address_space *, struct folio *dst, struct folio *src, enum migrate_mode); - int (*migratepage) (struct address_space *, - struct page *, struct page *, enum migrate_mode); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); -- cgit v1.2.3 From 9800562f2ab41656b0bdc2a41c77ab3f6dfdd6fc Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 13:29:10 -0400 Subject: mm/folio-compat: Remove migration compatibility functions migrate_page_move_mapping(), migrate_page_copy() and migrate_page_states() are all now unused after converting all the filesystems from aops->migratepage() to aops->migrate_folio(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/migrate.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 13f793309b75..ae5bb67a9ba1 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -70,12 +70,8 @@ extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, extern struct page *alloc_migration_target(struct page *page, unsigned long private); extern int isolate_movable_page(struct page *page, isolate_mode_t mode); -extern void migrate_page_states(struct page *newpage, struct page *page); -extern void migrate_page_copy(struct page *newpage, struct page *page); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); -extern int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page, int extra_count); void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep, spinlock_t *ptl); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); @@ -96,13 +92,6 @@ static inline struct page *alloc_migration_target(struct page *page, static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) { return -EBUSY; } -static inline void migrate_page_states(struct page *newpage, struct page *page) -{ -} - -static inline void migrate_page_copy(struct page *newpage, - struct page *page) {} - static inline int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src) { -- cgit v1.2.3 From cc9cf350d100f4c336edb228cbd078003430cfe7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Jun 2022 07:37:13 +0200 Subject: fs: remove the nobh helpers All callers are gone, so remove the now dead code. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/buffer_head.h | 8 -------- include/linux/mpage.h | 2 -- 2 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index b0366c89d6a4..61afb81cfdae 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -258,14 +258,6 @@ static inline vm_fault_t block_page_mkwrite_return(int err) } sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); -int nobh_write_begin(struct address_space *, loff_t, unsigned len, - struct page **, void **, get_block_t*); -int nobh_write_end(struct file *, struct address_space *, - loff_t, unsigned, unsigned, - struct page *, void *); -int nobh_truncate_page(struct address_space *, loff_t, get_block_t *); -int nobh_writepage(struct page *page, get_block_t *get_block, - struct writeback_control *wbc); #ifdef CONFIG_MIGRATION extern int buffer_migrate_folio(struct address_space *, diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 43986f7ec4dd..1bdc39daac0a 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -19,7 +19,5 @@ void mpage_readahead(struct readahead_control *, get_block_t get_block); int mpage_read_folio(struct folio *folio, get_block_t get_block); int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block); -int mpage_writepage(struct page *page, get_block_t *get_block, - struct writeback_control *wbc); #endif -- cgit v1.2.3 From 170ab26b01d7f445c46261eff69341dab7e50a24 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Thu, 21 Jul 2022 16:19:04 +0800 Subject: tracepoints: It is CONFIG_TRACEPOINTS not CONFIG_TRACEPOINT When reading this note, CONFIG_TRACEPOINT searches my configuration file, and the result is CONFIG_TRACEPOINTS, the search results are consistent with the following macro definitions. I think it should be repaired. Link: https://lkml.kernel.org/r/20220721081904.3798-1-zeming@nfschina.com Signed-off-by: Li zeming Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 28031b15f878..2908cc5ed70e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -151,7 +151,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) /* * Individual subsystem my have a separate configuration to * enable their tracepoints. By default, this file will create - * the tracepoints if CONFIG_TRACEPOINT is defined. If a subsystem + * the tracepoints if CONFIG_TRACEPOINTS is defined. If a subsystem * wants to be able to disable its tracepoints from being created * it can define NOTRACE before including the tracepoint headers. */ -- cgit v1.2.3 From d9c26e0a58b0039d1ad4340d826fe8db866e9455 Mon Sep 17 00:00:00 2001 From: Chun-Kuang Hu Date: Wed, 8 Jun 2022 22:40:55 +0800 Subject: mailbox: mtk-cmdq: Remove proprietary cmdq_task_cb rx_callback is a standard mailbox callback mechanism and could cover the function of proprietary cmdq_task_cb, so use the standard one instead of the proprietary one. Client driver has changed to use standard rx_callback, so remove proprietary cmdq_task_cb. Signed-off-by: Chun-Kuang Hu Reviewed-by: Matthias Brugger Signed-off-by: Jassi Brar --- include/linux/mailbox/mtk-cmdq-mailbox.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h index 44365aab043c..a8f0070c7aa9 100644 --- a/include/linux/mailbox/mtk-cmdq-mailbox.h +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h @@ -67,24 +67,14 @@ enum cmdq_code { struct cmdq_cb_data { int sta; - void *data; struct cmdq_pkt *pkt; }; -typedef void (*cmdq_async_flush_cb)(struct cmdq_cb_data data); - -struct cmdq_task_cb { - cmdq_async_flush_cb cb; - void *data; -}; - struct cmdq_pkt { void *va_base; dma_addr_t pa_base; size_t cmd_buf_size; /* command occupied size */ size_t buf_size; /* real buffer size */ - struct cmdq_task_cb cb; - struct cmdq_task_cb async_cb; void *cl; }; -- cgit v1.2.3 From d3e94fdc4ef476ca1edd468cc11badf2dbbb3c00 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 8 Jan 2021 15:34:38 -0500 Subject: fscrypt: export fscrypt_fname_encrypt and fscrypt_fname_encrypted_size For ceph, we want to use our own scheme for handling filenames that are are longer than NAME_MAX after encryption and Base64 encoding. This allows us to have a consistent view of the encrypted filenames for clients that don't support fscrypt and clients that do but that don't have the key. Currently, fs/crypto only supports encrypting filenames using fscrypt_setup_filename, but that also handles encoding nokey names. Ceph can't use that because it handles nokey names in a different way. Export fscrypt_fname_encrypt. Rename fscrypt_fname_encrypted_size to __fscrypt_fname_encrypted_size and add a new wrapper called fscrypt_fname_encrypted_size that takes an inode argument rather than a pointer to a fscrypt_policy union. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Acked-by: Eric Biggers Signed-off-by: Ilya Dryomov --- include/linux/fscrypt.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index e60d57c99cb6..5926a4081c6d 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -327,6 +327,10 @@ void fscrypt_free_inode(struct inode *inode); int fscrypt_drop_inode(struct inode *inode); /* fname.c */ +int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname, + u8 *out, unsigned int olen); +bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, + u32 max_len, u32 *encrypted_len_ret); int fscrypt_setup_filename(struct inode *inode, const struct qstr *iname, int lookup, struct fscrypt_name *fname); -- cgit v1.2.3 From 637fa738b590ec0e3414931d1e07c4f195eb5215 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 1 Sep 2020 12:56:42 -0400 Subject: fscrypt: add fscrypt_context_for_new_inode Most filesystems just call fscrypt_set_context on new inodes, which usually causes a setxattr. That's a bit late for ceph, which can send along a full set of attributes with the create request. Doing so allows it to avoid race windows that where the new inode could be seen by other clients without the crypto context attached. It also avoids the separate round trip to the server. Refactor the fscrypt code a bit to allow us to create a new crypto context, attach it to the inode, and write it to the buffer, but without calling set_context on it. ceph can later use this to marshal the context into the attributes we send along with the create request. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Acked-by: Eric Biggers Signed-off-by: Ilya Dryomov --- include/linux/fscrypt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 5926a4081c6d..7d2f1e0f23b1 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -284,6 +284,7 @@ int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg); int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *arg); int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg); int fscrypt_has_permitted_context(struct inode *parent, struct inode *child); +int fscrypt_context_for_new_inode(void *ctx, struct inode *inode); int fscrypt_set_context(struct inode *inode, void *fs_data); struct fscrypt_dummy_policy { -- cgit v1.2.3 From 4f48d5da81ee7004a789c8aac2d0dfb2514c37f1 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 16 May 2022 11:23:19 +0800 Subject: fs/dcache: export d_same_name() helper Compare dentry name with case-exact name, return true if names are same, or false. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Reviewed-by: Luis Chamberlain Signed-off-by: Ilya Dryomov --- include/linux/dcache.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index f5bba51480b2..bb72361834de 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -233,6 +233,8 @@ extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, wait_queue_head_t *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); +extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent, + const struct qstr *name); extern struct dentry * d_exact_alias(struct dentry *, struct inode *); extern struct dentry *d_find_any_alias(struct inode *inode); extern struct dentry * d_obtain_alias(struct inode *); -- cgit v1.2.3 From d93231a6bc8a452323d5fef16cca7107ce483a27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= Date: Fri, 3 Jun 2022 14:29:09 +0100 Subject: ceph: prevent a client from exceeding the MDS maximum xattr size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MDS tries to enforce a limit on the total key/values in extended attributes. However, this limit is enforced only if doing a synchronous operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS doesn't have a chance to enforce these limits. This patch adds support for decoding the xattrs maximum size setting that is distributed in the mdsmap. Then, when setting an xattr, the kernel client will revert to do a synchronous operation if that maximum size is exceeded. While there, fix a dout() that would trigger a printk warning: [ 98.718078] ------------[ cut here ]------------ [ 98.719012] precision 65536 too large [ 98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600 ... Link: https://tracker.ceph.com/issues/55725 Signed-off-by: Luís Henriques Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- include/linux/ceph/mdsmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 523fd0452856..4c3e0648dc27 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -25,6 +25,7 @@ struct ceph_mdsmap { u32 m_session_timeout; /* seconds */ u32 m_session_autoclose; /* seconds */ u64 m_max_file_size; + u64 m_max_xattr_size; /* maximum size for xattrs blob */ u32 m_max_mds; /* expected up:active mds number */ u32 m_num_active_mds; /* actual up:active mds number */ u32 possible_max_rank; /* possible max rank index */ -- cgit v1.2.3 From 1b7587d69ea7b8c350195392bfd30a0f31374ae4 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 6 Jun 2022 20:15:33 +0800 Subject: ceph: fix the incorrect comment for the ceph_mds_caps struct The incorrect comment is misleading. Acutally the last members in ceph_mds_caps strcut is a union for none export and export bodies. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 86bf82dbd8b8..40740e234ce1 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -768,7 +768,7 @@ struct ceph_mds_caps { __le32 xattr_len; __le64 xattr_version; - /* filelock */ + /* a union of non-export and export bodies. */ __le64 size, max_size, truncate_size; __le32 truncate_seq; struct ceph_timespec mtime, atime, ctime; -- cgit v1.2.3 From 020bc44a9fbf6946f42db503d11c9811f26dd9fd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 10 Jun 2022 11:40:13 -0400 Subject: ceph: switch back to testing for NULL folio->private in ceph_dirty_folio Willy requested that we change this back to warning on folio->private being non-NULl. He's trying to kill off the PG_private flag, and so we'd like to catch where it's non-NULL. Add a VM_WARN_ON_FOLIO (since it doesn't exist yet) and change over to using that instead of VM_BUG_ON_FOLIO along with testing the ->private pointer. [ xiubli: define VM_WARN_ON_FOLIO macro in case DEBUG_VM is disabled reported by kernel test robot ] Cc: Matthew Wilcox Signed-off-by: Jeff Layton Signed-off-by: Xiubo Li Signed-off-by: Ilya Dryomov --- include/linux/mmdebug.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index d7285f8148a3..15ae78cd2853 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -54,6 +54,15 @@ void dump_mm(const struct mm_struct *mm); } \ unlikely(__ret_warn_once); \ }) +#define VM_WARN_ON_FOLIO(cond, folio) ({ \ + int __ret_warn = !!(cond); \ + \ + if (unlikely(__ret_warn)) { \ + dump_page(&folio->page, "VM_WARN_ON_FOLIO(" __stringify(cond)")");\ + WARN_ON(1); \ + } \ + unlikely(__ret_warn); \ +}) #define VM_WARN_ON_ONCE_FOLIO(cond, folio) ({ \ static bool __section(".data.once") __warned; \ int __ret_warn_once = !!(cond); \ @@ -79,6 +88,7 @@ void dump_mm(const struct mm_struct *mm); #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ON_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) -- cgit v1.2.3 From b53aca4b460ae2ece453963ef01667ee8ee78f52 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 9 Jun 2022 15:21:37 +0800 Subject: ceph: fix incorrect old_size length in ceph_mds_request_args The 'old_size' is a __le64 type since birth, not sure why the kclient incorrectly switched it to __le32. This change is okay won't break anything because union will always allocate more memory than the 'open' member needed. Rename 'file_replication' to 'pool' as ceph did. Though this 'open' struct may never be used in kclient in future, it's confusing when going through the ceph code. Signed-off-by: Xiubo Li Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 40740e234ce1..49586ff26152 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -433,9 +433,9 @@ union ceph_mds_request_args { __le32 stripe_unit; /* layout for newly created file */ __le32 stripe_count; /* ... */ __le32 object_size; - __le32 file_replication; - __le32 mask; /* CEPH_CAP_* */ - __le32 old_size; + __le32 pool; + __le32 mask; /* CEPH_CAP_* */ + __le64 old_size; } __attribute__ ((packed)) open; struct { __le32 flags; -- cgit v1.2.3 From 2c61c97fb12b806e1c8eb15f04c277ad097ec95e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 8 Jun 2022 11:52:21 -0700 Subject: nvme: handle the persistent internal error AER In the NVM Express Revision 1.4 spec, Figure 145 describes possible values for an AER with event type "Error" (value 000b). For a Persistent Internal Error (value 03h), the host should perform a controller reset. Add support for this error using code that already exists for doing a controller reset. As part of this support, introduce two utility functions for parsing the AER type and subtype. This new support was tested in a lab environment where we can generate the persistent internal error on demand, and observe both the Linux side and NVMe controller side to see that the controller reset has been done. Signed-off-by: Michael Kelley Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 07cfc922f8e4..3b8fc6c69529 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -711,6 +711,10 @@ enum { NVME_AER_VS = 7, }; +enum { + NVME_AER_ERROR_PERSIST_INT_ERR = 0x03, +}; + enum { NVME_AER_NOTICE_NS_CHANGED = 0x00, NVME_AER_NOTICE_FW_ACT_STARTING = 0x01, -- cgit v1.2.3 From a116e1cdc64a743c36c40e6fa639dec991c157a3 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 27 Jun 2022 11:51:59 +0200 Subject: lib/base64: RFC4648-compliant base64 encoding Add RFC4648-compliant base64 encoding and decoding routines, based on the base64url encoding in fs/crypto/fname.c. Signed-off-by: Hannes Reinecke Reviewed-by: Himanshu Madhani Reviewed-by: Sagi Grimberg Cc: Eric Biggers Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/base64.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/base64.h (limited to 'include/linux') diff --git a/include/linux/base64.h b/include/linux/base64.h new file mode 100644 index 000000000000..660d4cb1ef31 --- /dev/null +++ b/include/linux/base64.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * base64 encoding, lifted from fs/crypto/fname.c. + */ + +#ifndef _LINUX_BASE64_H +#define _LINUX_BASE64_H + +#include + +#define BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3) + +int base64_encode(const u8 *src, int len, char *dst); +int base64_decode(const char *src, int len, u8 *dst); + +#endif /* _LINUX_BASE64_H */ -- cgit v1.2.3 From 88b140fec07307f825170a45562013a80842cc93 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 27 Jun 2022 11:52:00 +0200 Subject: nvme: add definitions for NVMe In-Band authentication Add new definitions for NVMe In-band authentication as defined in the NVMe Base Specification v2.0. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Himanshu Madhani Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 209 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 208 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 3b8fc6c69529..ae53d74f3696 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -19,6 +19,7 @@ #define NVMF_TRSVCID_SIZE 32 #define NVMF_TRADDR_SIZE 256 #define NVMF_TSAS_SIZE 256 +#define NVMF_AUTH_HASH_LEN 64 #define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery" @@ -1373,6 +1374,8 @@ enum nvmf_capsule_command { nvme_fabrics_type_property_set = 0x00, nvme_fabrics_type_connect = 0x01, nvme_fabrics_type_property_get = 0x04, + nvme_fabrics_type_auth_send = 0x05, + nvme_fabrics_type_auth_receive = 0x06, }; #define nvme_fabrics_type_name(type) { type, #type } @@ -1380,7 +1383,9 @@ enum nvmf_capsule_command { __print_symbolic(type, \ nvme_fabrics_type_name(nvme_fabrics_type_property_set), \ nvme_fabrics_type_name(nvme_fabrics_type_connect), \ - nvme_fabrics_type_name(nvme_fabrics_type_property_get)) + nvme_fabrics_type_name(nvme_fabrics_type_property_get), \ + nvme_fabrics_type_name(nvme_fabrics_type_auth_send), \ + nvme_fabrics_type_name(nvme_fabrics_type_auth_receive)) /* * If not fabrics command, fctype will be ignored. @@ -1476,6 +1481,11 @@ struct nvmf_connect_command { __u8 resv4[12]; }; +enum { + NVME_CONNECT_AUTHREQ_ASCR = (1 << 2), + NVME_CONNECT_AUTHREQ_ATR = (1 << 1), +}; + struct nvmf_connect_data { uuid_t hostid; __le16 cntlid; @@ -1510,6 +1520,200 @@ struct nvmf_property_get_command { __u8 resv4[16]; }; +struct nvmf_auth_common_command { + __u8 opcode; + __u8 resv1; + __u16 command_id; + __u8 fctype; + __u8 resv2[19]; + union nvme_data_ptr dptr; + __u8 resv3; + __u8 spsp0; + __u8 spsp1; + __u8 secp; + __le32 al_tl; + __u8 resv4[16]; +}; + +struct nvmf_auth_send_command { + __u8 opcode; + __u8 resv1; + __u16 command_id; + __u8 fctype; + __u8 resv2[19]; + union nvme_data_ptr dptr; + __u8 resv3; + __u8 spsp0; + __u8 spsp1; + __u8 secp; + __le32 tl; + __u8 resv4[16]; +}; + +struct nvmf_auth_receive_command { + __u8 opcode; + __u8 resv1; + __u16 command_id; + __u8 fctype; + __u8 resv2[19]; + union nvme_data_ptr dptr; + __u8 resv3; + __u8 spsp0; + __u8 spsp1; + __u8 secp; + __le32 al; + __u8 resv4[16]; +}; + +/* Value for secp */ +enum { + NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER = 0xe9, +}; + +/* Defined value for auth_type */ +enum { + NVME_AUTH_COMMON_MESSAGES = 0x00, + NVME_AUTH_DHCHAP_MESSAGES = 0x01, +}; + +/* Defined messages for auth_id */ +enum { + NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE = 0x00, + NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE = 0x01, + NVME_AUTH_DHCHAP_MESSAGE_REPLY = 0x02, + NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1 = 0x03, + NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2 = 0x04, + NVME_AUTH_DHCHAP_MESSAGE_FAILURE2 = 0xf0, + NVME_AUTH_DHCHAP_MESSAGE_FAILURE1 = 0xf1, +}; + +struct nvmf_auth_dhchap_protocol_descriptor { + __u8 authid; + __u8 rsvd; + __u8 halen; + __u8 dhlen; + __u8 idlist[60]; +}; + +enum { + NVME_AUTH_DHCHAP_AUTH_ID = 0x01, +}; + +/* Defined hash functions for DH-HMAC-CHAP authentication */ +enum { + NVME_AUTH_HASH_SHA256 = 0x01, + NVME_AUTH_HASH_SHA384 = 0x02, + NVME_AUTH_HASH_SHA512 = 0x03, + NVME_AUTH_HASH_INVALID = 0xff, +}; + +/* Defined Diffie-Hellman group identifiers for DH-HMAC-CHAP authentication */ +enum { + NVME_AUTH_DHGROUP_NULL = 0x00, + NVME_AUTH_DHGROUP_2048 = 0x01, + NVME_AUTH_DHGROUP_3072 = 0x02, + NVME_AUTH_DHGROUP_4096 = 0x03, + NVME_AUTH_DHGROUP_6144 = 0x04, + NVME_AUTH_DHGROUP_8192 = 0x05, + NVME_AUTH_DHGROUP_INVALID = 0xff, +}; + +union nvmf_auth_protocol { + struct nvmf_auth_dhchap_protocol_descriptor dhchap; +}; + +struct nvmf_auth_dhchap_negotiate_data { + __u8 auth_type; + __u8 auth_id; + __le16 rsvd; + __le16 t_id; + __u8 sc_c; + __u8 napd; + union nvmf_auth_protocol auth_protocol[]; +}; + +struct nvmf_auth_dhchap_challenge_data { + __u8 auth_type; + __u8 auth_id; + __u16 rsvd1; + __le16 t_id; + __u8 hl; + __u8 rsvd2; + __u8 hashid; + __u8 dhgid; + __le16 dhvlen; + __le32 seqnum; + /* 'hl' bytes of challenge value */ + __u8 cval[]; + /* followed by 'dhvlen' bytes of DH value */ +}; + +struct nvmf_auth_dhchap_reply_data { + __u8 auth_type; + __u8 auth_id; + __le16 rsvd1; + __le16 t_id; + __u8 hl; + __u8 rsvd2; + __u8 cvalid; + __u8 rsvd3; + __le16 dhvlen; + __le32 seqnum; + /* 'hl' bytes of response data */ + __u8 rval[]; + /* followed by 'hl' bytes of Challenge value */ + /* followed by 'dhvlen' bytes of DH value */ +}; + +enum { + NVME_AUTH_DHCHAP_RESPONSE_VALID = (1 << 0), +}; + +struct nvmf_auth_dhchap_success1_data { + __u8 auth_type; + __u8 auth_id; + __le16 rsvd1; + __le16 t_id; + __u8 hl; + __u8 rsvd2; + __u8 rvalid; + __u8 rsvd3[7]; + /* 'hl' bytes of response value if 'rvalid' is set */ + __u8 rval[]; +}; + +struct nvmf_auth_dhchap_success2_data { + __u8 auth_type; + __u8 auth_id; + __le16 rsvd1; + __le16 t_id; + __u8 rsvd2[10]; +}; + +struct nvmf_auth_dhchap_failure_data { + __u8 auth_type; + __u8 auth_id; + __le16 rsvd1; + __le16 t_id; + __u8 rescode; + __u8 rescode_exp; +}; + +enum { + NVME_AUTH_DHCHAP_FAILURE_REASON_FAILED = 0x01, +}; + +enum { + NVME_AUTH_DHCHAP_FAILURE_FAILED = 0x01, + NVME_AUTH_DHCHAP_FAILURE_NOT_USABLE = 0x02, + NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH = 0x03, + NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE = 0x04, + NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE = 0x05, + NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD = 0x06, + NVME_AUTH_DHCHAP_FAILURE_INCORRECT_MESSAGE = 0x07, +}; + + struct nvme_dbbuf { __u8 opcode; __u8 flags; @@ -1553,6 +1757,9 @@ struct nvme_command { struct nvmf_connect_command connect; struct nvmf_property_set_command prop_set; struct nvmf_property_get_command prop_get; + struct nvmf_auth_common_command auth_common; + struct nvmf_auth_send_command auth_send; + struct nvmf_auth_receive_command auth_receive; struct nvme_dbbuf dbbuf; struct nvme_directive_cmd directive; }; -- cgit v1.2.3 From f50fff73d620cd6e8f48bc58d4f1c944615a3fea Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 27 Jun 2022 11:52:02 +0200 Subject: nvme: implement In-Band authentication Implement NVMe-oF In-Band authentication according to NVMe TPAR 8006. This patch adds two new fabric options 'dhchap_secret' to specify the pre-shared key (in ASCII respresentation according to NVMe 2.0 section 8.13.5.8 'Secret representation') and 'dhchap_ctrl_secret' to specify the pre-shared controller key for bi-directional authentication of both the host and the controller. Re-authentication can be triggered by writing the PSK into the new controller sysfs attribute 'dhchap_secret' or 'dhchap_ctrl_secret'. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig [axboe: fold in clang build fix] Signed-off-by: Jens Axboe --- include/linux/nvme-auth.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 include/linux/nvme-auth.h (limited to 'include/linux') diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h new file mode 100644 index 000000000000..354456826221 --- /dev/null +++ b/include/linux/nvme-auth.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2021 Hannes Reinecke, SUSE Software Solutions + */ + +#ifndef _NVME_AUTH_H +#define _NVME_AUTH_H + +#include + +struct nvme_dhchap_key { + u8 *key; + size_t len; + u8 hash; +}; + +u32 nvme_auth_get_seqnum(void); +const char *nvme_auth_dhgroup_name(u8 dhgroup_id); +const char *nvme_auth_dhgroup_kpp(u8 dhgroup_id); +u8 nvme_auth_dhgroup_id(const char *dhgroup_name); + +const char *nvme_auth_hmac_name(u8 hmac_id); +const char *nvme_auth_digest_name(u8 hmac_id); +size_t nvme_auth_hmac_hash_len(u8 hmac_id); +u8 nvme_auth_hmac_id(const char *hmac_name); + +struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret, + u8 key_hash); +void nvme_auth_free_key(struct nvme_dhchap_key *key); +u8 *nvme_auth_transform_key(struct nvme_dhchap_key *key, char *nqn); +int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key); + +#endif /* _NVME_AUTH_H */ -- cgit v1.2.3 From b61775d185a395f26fecdc7898e39de677a6c3dd Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 27 Jun 2022 11:52:03 +0200 Subject: nvme-auth: Diffie-Hellman key exchange support Implement Diffie-Hellman key exchange using FFDHE groups for NVMe In-Band Authentication. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme-auth.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h index 354456826221..dcb8030062dd 100644 --- a/include/linux/nvme-auth.h +++ b/include/linux/nvme-auth.h @@ -29,5 +29,13 @@ struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret, void nvme_auth_free_key(struct nvme_dhchap_key *key); u8 *nvme_auth_transform_key(struct nvme_dhchap_key *key, char *nqn); int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key); +int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len, + u8 *challenge, u8 *aug, size_t hlen); +int nvme_auth_gen_privkey(struct crypto_kpp *dh_tfm, u8 dh_gid); +int nvme_auth_gen_pubkey(struct crypto_kpp *dh_tfm, + u8 *host_key, size_t host_key_len); +int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm, + u8 *ctrl_key, size_t ctrl_key_len, + u8 *sess_key, size_t sess_key_len); #endif /* _NVME_AUTH_H */ -- cgit v1.2.3 From 5a97806f7dc069d9561d9930a2ae108700e222ab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 27 Jul 2022 12:22:55 -0400 Subject: block: change the blk_queue_split calling convention The double indirect bio leads to somewhat suboptimal code generation. Instead return the (original or split) bio, and make sure the request_queue arguments to the lower level helpers is passed after the bio to avoid constant reshuffling of the argument passing registers. Also give it and the helpers used to implement it more descriptive names. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220727162300.3089193-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d04bdf549efa..5eef8d2eddc1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -864,9 +864,9 @@ void blk_request_module(dev_t devt); extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); void submit_bio_noacct(struct bio *bio); +struct bio *bio_split_to_limits(struct bio *bio); extern int blk_lld_busy(struct request_queue *q); -extern void blk_queue_split(struct bio **); extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); extern void blk_queue_exit(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); -- cgit v1.2.3 From 46754bd056053785d7079f1d48f7f571728dcb47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 27 Jul 2022 12:22:57 -0400 Subject: block: move ->bio_split to the gendisk Only non-passthrough requests are split by the block layer and use the ->bio_split bio_set. Move it from the request_queue to the gendisk. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220727162300.3089193-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5eef8d2eddc1..49dcd31e283e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -140,6 +140,8 @@ struct gendisk { struct request_queue *queue; void *private_data; + struct bio_set bio_split; + int flags; unsigned long state; #define GD_NEED_PART_SCAN 0 @@ -531,7 +533,6 @@ struct request_queue { struct blk_mq_tag_set *tag_set; struct list_head tag_set_list; - struct bio_set bio_split; struct dentry *debugfs_dir; struct dentry *sched_debugfs_dir; -- cgit v1.2.3 From cb3b3bf22cf33707d684e74207908ba0ef3b6467 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Jun 2022 13:23:48 +0200 Subject: jbd2: rename jbd_debug() to jbd2_debug() The name of jbd_debug() is confusing as all functions inside jbd2 have jbd2_ prefix. Rename jbd_debug() to jbd2_debug(). No functional changes. Signed-off-by: Jan Kara Reviewed-by: Lukas Czerner Link: https://lore.kernel.org/r/20220608112355.4397-2-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index e79d6e0b14e8..d4d59e43769f 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -58,10 +58,10 @@ extern ushort jbd2_journal_enable_debug; void __jbd2_debug(int level, const char *file, const char *func, unsigned int line, const char *fmt, ...); -#define jbd_debug(n, fmt, a...) \ +#define jbd2_debug(n, fmt, a...) \ __jbd2_debug((n), __FILE__, __func__, __LINE__, (fmt), ##a) #else -#define jbd_debug(n, fmt, a...) no_printk(fmt, ##a) +#define jbd2_debug(n, fmt, a...) no_printk(fmt, ##a) #endif extern void *jbd2_alloc(size_t size, gfp_t flags); -- cgit v1.2.3 From 68af74e92a86984ca6d5f7b19ee8f8a30a550873 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Jun 2022 13:23:49 +0200 Subject: jbd2: remove unused exports for jbd2 debugging Jbd2 exports jbd2_journal_enable_debug and __jbd2_debug() depite the first is used only in fs/jbd2/journal.c and the second only within jbd2 code. Remove the pointless exports make jbd2_journal_enable_debug static. Signed-off-by: Jan Kara Reviewed-by: Lukas Czerner Link: https://lore.kernel.org/r/20220608112355.4397-3-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d4d59e43769f..6c2aa61e0f73 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -54,7 +54,6 @@ * CONFIG_JBD2_DEBUG is on. */ #define JBD2_EXPENSIVE_CHECKING -extern ushort jbd2_journal_enable_debug; void __jbd2_debug(int level, const char *file, const char *func, unsigned int line, const char *fmt, ...); -- cgit v1.2.3 From d1324958567da957385d8d555a8b840b3bf8e6e3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Jun 2022 13:23:50 +0200 Subject: jbd2: unexport jbd2_log_start_commit() jbd2_log_start_commit() is not used outside of jbd2 so unexport it. Also make __jbd2_log_start_commit() static when we are at it. Signed-off-by: Jan Kara Reviewed-by: Lukas Czerner Link: https://lore.kernel.org/r/20220608112355.4397-4-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 6c2aa61e0f73..164ddf1211c0 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1646,7 +1646,6 @@ extern void jbd2_clear_buffer_revoked_flags(journal_t *journal); */ int jbd2_log_start_commit(journal_t *journal, tid_t tid); -int __jbd2_log_start_commit(journal_t *journal, tid_t tid); int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); int jbd2_log_wait_commit(journal_t *journal, tid_t tid); int jbd2_transaction_committed(journal_t *journal, tid_t tid); -- cgit v1.2.3 From 3dc96bba65f53daa217f0a8f43edad145286a8f5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jul 2022 12:54:21 +0200 Subject: mbcache: add functions to delete entry if unused Add function mb_cache_entry_delete_or_get() to delete mbcache entry if it is unused and also add a function to wait for entry to become unused - mb_cache_entry_wait_unused(). We do not share code between the two deleting function as one of them will go away soon. CC: stable@vger.kernel.org Fixes: 82939d7999df ("ext4: convert to mbcache2") Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20220712105436.32204-2-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/mbcache.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h index 20f1e3ff6013..8eca7f25c432 100644 --- a/include/linux/mbcache.h +++ b/include/linux/mbcache.h @@ -30,15 +30,23 @@ void mb_cache_destroy(struct mb_cache *cache); int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, u64 value, bool reusable); void __mb_cache_entry_free(struct mb_cache_entry *entry); +void mb_cache_entry_wait_unused(struct mb_cache_entry *entry); static inline int mb_cache_entry_put(struct mb_cache *cache, struct mb_cache_entry *entry) { - if (!atomic_dec_and_test(&entry->e_refcnt)) + unsigned int cnt = atomic_dec_return(&entry->e_refcnt); + + if (cnt > 0) { + if (cnt <= 3) + wake_up_var(&entry->e_refcnt); return 0; + } __mb_cache_entry_free(entry); return 1; } +struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, + u32 key, u64 value); void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value); struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, u64 value); -- cgit v1.2.3 From 75896339e43176af078509c1fce94ee6df9ca1a7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jul 2022 12:54:28 +0200 Subject: mbcache: Remove mb_cache_entry_delete() Nobody uses mb_cache_entry_delete() anymore. Remove it. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20220712105436.32204-9-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/mbcache.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h index 8eca7f25c432..452b579856d4 100644 --- a/include/linux/mbcache.h +++ b/include/linux/mbcache.h @@ -47,7 +47,6 @@ static inline int mb_cache_entry_put(struct mb_cache *cache, struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, u32 key, u64 value); -void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value); struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, u64 value); struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, -- cgit v1.2.3 From 307af6c879377c1c63e71cbdd978201f9c7ee8df Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jul 2022 12:54:29 +0200 Subject: mbcache: automatically delete entries from cache on freeing Use the fact that entries with elevated refcount are not removed from the hash and just move removal of the entry from the hash to the entry freeing time. When doing this we also change the generic code to hold one reference to the cache entry, not two of them, which makes code somewhat more obvious. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20220712105436.32204-10-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/mbcache.h | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h index 452b579856d4..2da63fd7b98f 100644 --- a/include/linux/mbcache.h +++ b/include/linux/mbcache.h @@ -13,8 +13,16 @@ struct mb_cache; struct mb_cache_entry { /* List of entries in cache - protected by cache->c_list_lock */ struct list_head e_list; - /* Hash table list - protected by hash chain bitlock */ + /* + * Hash table list - protected by hash chain bitlock. The entry is + * guaranteed to be hashed while e_refcnt > 0. + */ struct hlist_bl_node e_hash_list; + /* + * Entry refcount. Once it reaches zero, entry is unhashed and freed. + * While refcount > 0, the entry is guaranteed to stay in the hash and + * e.g. mb_cache_entry_try_delete() will fail. + */ atomic_t e_refcnt; /* Key in hash - stable during lifetime of the entry */ u32 e_key; @@ -29,20 +37,20 @@ void mb_cache_destroy(struct mb_cache *cache); int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, u64 value, bool reusable); -void __mb_cache_entry_free(struct mb_cache_entry *entry); +void __mb_cache_entry_free(struct mb_cache *cache, + struct mb_cache_entry *entry); void mb_cache_entry_wait_unused(struct mb_cache_entry *entry); -static inline int mb_cache_entry_put(struct mb_cache *cache, - struct mb_cache_entry *entry) +static inline void mb_cache_entry_put(struct mb_cache *cache, + struct mb_cache_entry *entry) { unsigned int cnt = atomic_dec_return(&entry->e_refcnt); if (cnt > 0) { - if (cnt <= 3) + if (cnt <= 2) wake_up_var(&entry->e_refcnt); - return 0; + return; } - __mb_cache_entry_free(entry); - return 1; + __mb_cache_entry_free(cache, entry); } struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, -- cgit v1.2.3 From b6e8d40d43ae4dec00c8fea2593eeea3114b8f44 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 2 Aug 2022 21:54:51 -0400 Subject: sched, cpuset: Fix dl_cpu_busy() panic due to empty cs->cpus_allowed With cgroup v2, the cpuset's cpus_allowed mask can be empty indicating that the cpuset will just use the effective CPUs of its parent. So cpuset_can_attach() can call task_can_attach() with an empty mask. This can lead to cpumask_any_and() returns nr_cpu_ids causing the call to dl_bw_of() to crash due to percpu value access of an out of bound CPU value. For example: [80468.182258] BUG: unable to handle page fault for address: ffffffff8b6648b0 : [80468.191019] RIP: 0010:dl_cpu_busy+0x30/0x2b0 : [80468.207946] Call Trace: [80468.208947] cpuset_can_attach+0xa0/0x140 [80468.209953] cgroup_migrate_execute+0x8c/0x490 [80468.210931] cgroup_update_dfl_csses+0x254/0x270 [80468.211898] cgroup_subtree_control_write+0x322/0x400 [80468.212854] kernfs_fop_write_iter+0x11c/0x1b0 [80468.213777] new_sync_write+0x11f/0x1b0 [80468.214689] vfs_write+0x1eb/0x280 [80468.215592] ksys_write+0x5f/0xe0 [80468.216463] do_syscall_64+0x5c/0x80 [80468.224287] entry_SYSCALL_64_after_hwframe+0x44/0xae Fix that by using effective_cpus instead. For cgroup v1, effective_cpus is the same as cpus_allowed. For v2, effective_cpus is the real cpumask to be used by tasks within the cpuset anyway. Also update task_can_attach()'s 2nd argument name to cs_effective_cpus to reflect the change. In addition, a check is added to task_can_attach() to guard against the possibility that cpumask_any_and() may return a value >= nr_cpu_ids. Fixes: 7f51412a415d ("sched/deadline: Fix bandwidth check/update when migrating tasks between exclusive cpusets") Signed-off-by: Waiman Long Signed-off-by: Ingo Molnar Acked-by: Juri Lelli Link: https://lore.kernel.org/r/20220803015451.2219567-1-longman@redhat.com --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 88b8817b827d..6a060160f0db 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1813,7 +1813,7 @@ current_restore_flags(unsigned long orig_flags, unsigned long flags) } extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); -extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed); +extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_effective_cpus); #ifdef CONFIG_SMP extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); -- cgit v1.2.3 From a8af0d682ae0c9cf62dd0ad6afdb1480951d6a10 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Jun 2022 16:21:50 -0400 Subject: libceph: clean up ceph_osdc_start_request prototype This function always returns 0, and ignores the nofail boolean. Drop the nofail argument, make the function void return and fix up the callers. Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index cba8a6ffc329..fb6be72104df 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -507,9 +507,8 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, extern void ceph_osdc_get_request(struct ceph_osd_request *req); extern void ceph_osdc_put_request(struct ceph_osd_request *req); -extern int ceph_osdc_start_request(struct ceph_osd_client *osdc, - struct ceph_osd_request *req, - bool nofail); +void ceph_osdc_start_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req); extern void ceph_osdc_cancel_request(struct ceph_osd_request *req); extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); -- cgit v1.2.3 From bed4593645366ad7362a3aa7bc0d100d8d8236a8 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 11 Jul 2022 09:17:38 +0800 Subject: tpm: eventlog: Fix section mismatch for DEBUG_SECTION_MISMATCH If DEBUG_SECTION_MISMATCH enabled, __calc_tpm2_event_size() will not be inlined, this cause section mismatch like this: WARNING: modpost: vmlinux.o(.text.unlikely+0xe30c): Section mismatch in reference from the variable L0 to the function .init.text:early_ioremap() The function L0() references the function __init early_memremap(). This is often because L0 lacks a __init annotation or the annotation of early_ioremap is wrong. Fix it by using __always_inline instead of inline for the called-once function __calc_tpm2_event_size(). Fixes: 44038bc514a2 ("tpm: Abstract crypto agile event size calculations") Cc: stable@vger.kernel.org # v5.3 Reported-by: WANG Xuerui Signed-off-by: Huacai Chen Signed-off-by: Jarkko Sakkinen --- include/linux/tpm_eventlog.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 739ba9a03ec1..20c0ff54b7a0 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -157,7 +157,7 @@ struct tcg_algorithm_info { * Return: size of the event on success, 0 on failure */ -static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, +static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, struct tcg_pcr_event *event_header, bool do_mapping) { -- cgit v1.2.3 From 6930bcbfb6ceda63e298c6af6d733ecdf6bd4cde Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 1 Aug 2022 15:57:26 -0400 Subject: lockd: detect and reject lock arguments that overflow lockd doesn't currently vet the start and length in nlm4 requests like it should, and can end up generating lock requests with arguments that overflow when passed to the filesystem. The NLM4 protocol uses unsigned 64-bit arguments for both start and length, whereas struct file_lock tracks the start and end as loff_t values. By the time we get around to calling nlm4svc_retrieve_args, we've lost the information that would allow us to determine if there was an overflow. Start tracking the actual start and len for NLM4 requests in the nlm_lock. In nlm4svc_retrieve_args, vet these values to ensure they won't cause an overflow, and return NLM4_FBIG if they do. Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=392 Reported-by: Jan Kasiak Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Cc: # 5.14+ --- include/linux/lockd/xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 398f70093cd3..67e4a2c5500b 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -41,6 +41,8 @@ struct nlm_lock { struct nfs_fh fh; struct xdr_netobj oh; u32 svid; + u64 lock_start; + u64 lock_len; struct file_lock fl; }; -- cgit v1.2.3 From f482aa98652795846cc55da98ebe331eb74f3d0b Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Wed, 3 Aug 2022 15:23:43 -0700 Subject: audit, io_uring, io-wq: Fix memory leak in io_sq_thread() and io_wqe_worker() Currently @audit_context is allocated twice for io_uring workers: 1. copy_process() calls audit_alloc(); 2. io_sq_thread() or io_wqe_worker() calls audit_alloc_kernel() (which is effectively audit_alloc()) and overwrites @audit_context, causing: BUG: memory leak unreferenced object 0xffff888144547400 (size 1024): <...> hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] audit_alloc+0x133/0x210 [] copy_process+0xcd3/0x2340 [] create_io_thread+0x63/0x90 [] create_io_worker+0xb4/0x230 [] io_wqe_enqueue+0x248/0x3b0 [] io_queue_iowq+0xba/0x200 [] io_queue_async+0x113/0x180 [] io_req_task_submit+0x18f/0x1a0 [] io_apoll_task_func+0xdd/0x120 [] tctx_task_work+0x11f/0x570 [] task_work_run+0x7e/0xc0 [] get_signal+0xc18/0xf10 [] arch_do_signal_or_restart+0x2b/0x730 [] exit_to_user_mode_prepare+0x5e/0x180 [] syscall_exit_to_user_mode+0x12/0x20 [] do_syscall_64+0x40/0x80 Then, 3. io_sq_thread() or io_wqe_worker() frees @audit_context using audit_free(); 4. do_exit() eventually calls audit_free() again, which is okay because audit_free() does a NULL check. As suggested by Paul Moore, fix it by deleting audit_alloc_kernel() and redundant audit_free() calls. Fixes: 5bd2182d58e9 ("audit,io_uring,io-wq: add some basic audit support to io_uring") Suggested-by: Paul Moore Cc: stable@vger.kernel.org Signed-off-by: Peilin Ye Acked-by: Paul Moore Link: https://lore.kernel.org/r/20220803222343.31673-1-yepeilin.cs@gmail.com Signed-off-by: Jens Axboe --- include/linux/audit.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 00f7a80f1a3e..3608992848d3 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -285,7 +285,6 @@ static inline int audit_signal_info(int sig, struct task_struct *t) /* These are defined in auditsc.c */ /* Public API */ extern int audit_alloc(struct task_struct *task); -extern int audit_alloc_kernel(struct task_struct *task); extern void __audit_free(struct task_struct *task); extern void __audit_uring_entry(u8 op); extern void __audit_uring_exit(int success, long code); @@ -578,10 +577,6 @@ static inline int audit_alloc(struct task_struct *task) { return 0; } -static inline int audit_alloc_kernel(struct task_struct *task) -{ - return 0; -} static inline void audit_free(struct task_struct *task) { } static inline void audit_uring_entry(u8 op) -- cgit v1.2.3 From e2dcac2f58f5a95ab092d1da237ffdc0da1832cf Mon Sep 17 00:00:00 2001 From: Jinghao Jia Date: Fri, 29 Jul 2022 20:17:13 +0000 Subject: BPF: Fix potential bad pointer dereference in bpf_sys_bpf() The bpf_sys_bpf() helper function allows an eBPF program to load another eBPF program from within the kernel. In this case the argument union bpf_attr pointer (as well as the insns and license pointers inside) is a kernel address instead of a userspace address (which is the case of a usual bpf() syscall). To make the memory copying process in the syscall work in both cases, bpfptr_t was introduced to wrap around the pointer and distinguish its origin. Specifically, when copying memory contents from a bpfptr_t, a copy_from_user() is performed in case of a userspace address and a memcpy() is performed for a kernel address. This can lead to problems because the in-kernel pointer is never checked for validity. The problem happens when an eBPF syscall program tries to call bpf_sys_bpf() to load a program but provides a bad insns pointer -- say 0xdeadbeef -- in the bpf_attr union. The helper calls __sys_bpf() which would then call bpf_prog_load() to load the program. bpf_prog_load() is responsible for copying the eBPF instructions to the newly allocated memory for the program; it creates a kernel bpfptr_t for insns and invokes copy_from_bpfptr(). Internally, all bpfptr_t operations are backed by the corresponding sockptr_t operations, which performs direct memcpy() on kernel pointers for copy_from/strncpy_from operations. Therefore, the code is always happy to dereference the bad pointer to trigger a un-handle-able page fault and in turn an oops. However, this is not supposed to happen because at that point the eBPF program is already verified and should not cause a memory error. Sample KASAN trace: [ 25.685056][ T228] ================================================================== [ 25.685680][ T228] BUG: KASAN: user-memory-access in copy_from_bpfptr+0x21/0x30 [ 25.686210][ T228] Read of size 80 at addr 00000000deadbeef by task poc/228 [ 25.686732][ T228] [ 25.686893][ T228] CPU: 3 PID: 228 Comm: poc Not tainted 5.19.0-rc7 #7 [ 25.687375][ T228] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS d55cb5a 04/01/2014 [ 25.687991][ T228] Call Trace: [ 25.688223][ T228] [ 25.688429][ T228] dump_stack_lvl+0x73/0x9e [ 25.688747][ T228] print_report+0xea/0x200 [ 25.689061][ T228] ? copy_from_bpfptr+0x21/0x30 [ 25.689401][ T228] ? _printk+0x54/0x6e [ 25.689693][ T228] ? _raw_spin_lock_irqsave+0x70/0xd0 [ 25.690071][ T228] ? copy_from_bpfptr+0x21/0x30 [ 25.690412][ T228] kasan_report+0xb5/0xe0 [ 25.690716][ T228] ? copy_from_bpfptr+0x21/0x30 [ 25.691059][ T228] kasan_check_range+0x2bd/0x2e0 [ 25.691405][ T228] ? copy_from_bpfptr+0x21/0x30 [ 25.691734][ T228] memcpy+0x25/0x60 [ 25.692000][ T228] copy_from_bpfptr+0x21/0x30 [ 25.692328][ T228] bpf_prog_load+0x604/0x9e0 [ 25.692653][ T228] ? cap_capable+0xb4/0xe0 [ 25.692956][ T228] ? security_capable+0x4f/0x70 [ 25.693324][ T228] __sys_bpf+0x3af/0x580 [ 25.693635][ T228] bpf_sys_bpf+0x45/0x240 [ 25.693937][ T228] bpf_prog_f0ec79a5a3caca46_bpf_func1+0xa2/0xbd [ 25.694394][ T228] bpf_prog_run_pin_on_cpu+0x2f/0xb0 [ 25.694756][ T228] bpf_prog_test_run_syscall+0x146/0x1c0 [ 25.695144][ T228] bpf_prog_test_run+0x172/0x190 [ 25.695487][ T228] __sys_bpf+0x2c5/0x580 [ 25.695776][ T228] __x64_sys_bpf+0x3a/0x50 [ 25.696084][ T228] do_syscall_64+0x60/0x90 [ 25.696393][ T228] ? fpregs_assert_state_consistent+0x50/0x60 [ 25.696815][ T228] ? exit_to_user_mode_prepare+0x36/0xa0 [ 25.697202][ T228] ? syscall_exit_to_user_mode+0x20/0x40 [ 25.697586][ T228] ? do_syscall_64+0x6e/0x90 [ 25.697899][ T228] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 25.698312][ T228] RIP: 0033:0x7f6d543fb759 [ 25.698624][ T228] Code: 08 5b 89 e8 5d c3 66 2e 0f 1f 84 00 00 00 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 97 a6 0e 00 f7 d8 64 89 01 48 [ 25.699946][ T228] RSP: 002b:00007ffc3df78468 EFLAGS: 00000287 ORIG_RAX: 0000000000000141 [ 25.700526][ T228] RAX: ffffffffffffffda RBX: 00007ffc3df78628 RCX: 00007f6d543fb759 [ 25.701071][ T228] RDX: 0000000000000090 RSI: 00007ffc3df78478 RDI: 000000000000000a [ 25.701636][ T228] RBP: 00007ffc3df78510 R08: 0000000000000000 R09: 0000000000300000 [ 25.702191][ T228] R10: 0000000000000005 R11: 0000000000000287 R12: 0000000000000000 [ 25.702736][ T228] R13: 00007ffc3df78638 R14: 000055a1584aca68 R15: 00007f6d5456a000 [ 25.703282][ T228] [ 25.703490][ T228] ================================================================== [ 25.704050][ T228] Disabling lock debugging due to kernel taint Update copy_from_bpfptr() and strncpy_from_bpfptr() so that: - for a kernel pointer, it uses the safe copy_from_kernel_nofault() and strncpy_from_kernel_nofault() functions. - for a userspace pointer, it performs copy_from_user() and strncpy_from_user(). Fixes: af2ac3e13e45 ("bpf: Prepare bpf syscall to be used from kernel and user space.") Link: https://lore.kernel.org/bpf/20220727132905.45166-1-jinghao@linux.ibm.com/ Signed-off-by: Jinghao Jia Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20220729201713.88688-1-jinghao@linux.ibm.com Signed-off-by: Alexei Starovoitov --- include/linux/bpfptr.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h index 46e1757d06a3..79b2f78eec1a 100644 --- a/include/linux/bpfptr.h +++ b/include/linux/bpfptr.h @@ -49,7 +49,9 @@ static inline void bpfptr_add(bpfptr_t *bpfptr, size_t val) static inline int copy_from_bpfptr_offset(void *dst, bpfptr_t src, size_t offset, size_t size) { - return copy_from_sockptr_offset(dst, (sockptr_t) src, offset, size); + if (!bpfptr_is_kernel(src)) + return copy_from_user(dst, src.user + offset, size); + return copy_from_kernel_nofault(dst, src.kernel + offset, size); } static inline int copy_from_bpfptr(void *dst, bpfptr_t src, size_t size) @@ -78,7 +80,9 @@ static inline void *kvmemdup_bpfptr(bpfptr_t src, size_t len) static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count) { - return strncpy_from_sockptr(dst, (sockptr_t) src, count); + if (bpfptr_is_kernel(src)) + return strncpy_from_kernel_nofault(dst, src.kernel, count); + return strncpy_from_user(dst, src.user, count); } #endif /* _LINUX_BPFPTR_H */ -- cgit v1.2.3 From 3c59366c207e4c6c6569524af606baf017a55c61 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 1 Aug 2022 10:33:34 +1000 Subject: NFS: don't unhash dentry during unlink/rename NFS unlink() (and rename over existing target) must determine if the file is open, and must perform a "silly rename" instead of an unlink (or before rename) if it is. Otherwise the client might hold a file open which has been removed on the server. Consequently if it determines that the file isn't open, it must block any subsequent opens until the unlink/rename has been completed on the server. This is currently achieved by unhashing the dentry. This forces any open attempt to the slow-path for lookup which will block on i_rwsem on the directory until the unlink/rename completes. A future patch will change the VFS to only get a shared lock on i_rwsem for unlink, so this will no longer work. Instead we introduce an explicit interlock. A special value is stored in dentry->d_fsdata while the unlink/rename is running and ->d_revalidate blocks while that value is present. When ->d_revalidate unblocks, the dentry will be invalid. This closes the race without requiring exclusion on i_rwsem. d_fsdata is already used in two different ways. 1/ an IS_ROOT directory dentry might have a "devname" stored in d_fsdata. Such a dentry doesn't have a name and so cannot be the target of unlink or rename. For safety we check if an old devname is still stored, and remove it if it is. 2/ a dentry with DCACHE_NFSFS_RENAMED set will have a 'struct nfs_unlinkdata' stored in d_fsdata. While this is set maydelete() will fail, so an unlink or rename will never proceed on such a dentry. Neither of these can be in effect when a dentry is the target of unlink or rename. So we can expect d_fsdata to be NULL, and store a special value ((void*)1) which is given the name NFS_FSDATA_BLOCKED to indicate that any lookup will be blocked. The d_count() is incremented under d_lock() when a lookup finds the dentry, so we check d_count() is low, and set NFS_FSDATA_BLOCKED under the same lock to avoid any races. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a17c337dbdf1..b32ed68e7dc4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -617,6 +617,15 @@ nfs_fileid_to_ino_t(u64 fileid) #define NFS_JUKEBOX_RETRY_TIME (5 * HZ) +/* We need to block new opens while a file is being unlinked. + * If it is opened *before* we decide to unlink, we will silly-rename + * instead. If it is opened *after*, then we need to create or will fail. + * If we allow the two to race, we could end up with a file that is open + * but deleted on the server resulting in ESTALE. + * So use ->d_fsdata to record when the unlink is happening + * and block dentry revalidation while it is set. + */ +#define NFS_FSDATA_BLOCKED ((void*)1) # undef ifdebug # ifdef NFS_DEBUG -- cgit v1.2.3 From 2da1c30929a28c3c6b01d9c16c4216037be95597 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 28 Jun 2022 17:22:28 +0800 Subject: mm: hugetlb_vmemmap: delete hugetlb_optimize_vmemmap_enabled() Patch series "Simplify hugetlb vmemmap and improve its readability", v2. This series aims to simplify hugetlb vmemmap and improve its readability. This patch (of 8): The name hugetlb_optimize_vmemmap_enabled() a bit confusing as it tests two conditions (enabled and pages in use). Instead of coming up to an appropriate name, we could just delete it. There is already a discussion about deleting it in thread [1]. There is only one user of hugetlb_optimize_vmemmap_enabled() outside of hugetlb_vmemmap, that is flush_dcache_page() in arch/arm64/mm/flush.c. However, it does not need to call hugetlb_optimize_vmemmap_enabled() in flush_dcache_page() since HugeTLB pages are always fully mapped and only head page will be set PG_dcache_clean meaning only head page's flag may need to be cleared (see commit cf5a501d985b). So it is easy to remove hugetlb_optimize_vmemmap_enabled(). Link: https://lore.kernel.org/all/c77c61c8-8a5a-87e8-db89-d04d8aaab4cc@oracle.com/ [1] Link: https://lkml.kernel.org/r/20220628092235.91270-2-songmuchun@bytedance.com Signed-off-by: Muchun Song Reviewed-by: Oscar Salvador Reviewed-by: Mike Kravetz Reviewed-by: Catalin Marinas Cc: Will Deacon Cc: Anshuman Khandual Cc: David Hildenbrand Cc: Jonathan Corbet Cc: Xiongchun Duan Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index ea19528564d1..2455405ab82b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -208,12 +208,6 @@ enum pageflags { DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON, hugetlb_optimize_vmemmap_key); -static __always_inline bool hugetlb_optimize_vmemmap_enabled(void) -{ - return static_branch_maybe(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON, - &hugetlb_optimize_vmemmap_key); -} - /* * If the feature of optimizing vmemmap pages associated with each HugeTLB * page is enabled, the head vmemmap page frame is reused and all of the tail @@ -232,7 +226,8 @@ static __always_inline bool hugetlb_optimize_vmemmap_enabled(void) */ static __always_inline const struct page *page_fixed_fake_head(const struct page *page) { - if (!hugetlb_optimize_vmemmap_enabled()) + if (!static_branch_maybe(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON, + &hugetlb_optimize_vmemmap_key)) return page; /* @@ -260,11 +255,6 @@ static inline const struct page *page_fixed_fake_head(const struct page *page) { return page; } - -static inline bool hugetlb_optimize_vmemmap_enabled(void) -{ - return false; -} #endif static __always_inline int page_is_fake_head(struct page *page) -- cgit v1.2.3 From cf5472e561133888df81d2e48f7da9ebd3299459 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 28 Jun 2022 17:22:29 +0800 Subject: mm: hugetlb_vmemmap: optimize vmemmap_optimize_mode handling We hold an another reference to hugetlb_optimize_vmemmap_key when making vmemmap_optimize_mode on, because we use static_key to tell memory_hotplug that memory_hotplug.memmap_on_memory should be overridden. However, this rule has gone when we have introduced PageVmemmapSelfHosted. Therefore, we could simplify vmemmap_optimize_mode handling by not holding an another reference to hugetlb_optimize_vmemmap_key. This also means that we not incur the extra page_fixed_fake_head checks if there are no vmemmap optinmized hugetlb pages after this change. Link: https://lkml.kernel.org/r/20220628092235.91270-3-songmuchun@bytedance.com Signed-off-by: Muchun Song Reviewed-by: Oscar Salvador Reviewed-by: Mike Kravetz Cc: Anshuman Khandual Cc: Catalin Marinas Cc: David Hildenbrand Cc: Jonathan Corbet Cc: Will Deacon Cc: Xiongchun Duan Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 2455405ab82b..b44cc24d7496 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -205,8 +205,7 @@ enum pageflags { #ifndef __GENERATING_BOUNDS_H #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP -DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON, - hugetlb_optimize_vmemmap_key); +DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key); /* * If the feature of optimizing vmemmap pages associated with each HugeTLB @@ -226,8 +225,7 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON, */ static __always_inline const struct page *page_fixed_fake_head(const struct page *page) { - if (!static_branch_maybe(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON, - &hugetlb_optimize_vmemmap_key)) + if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key)) return page; /* -- cgit v1.2.3 From dff033818a06e7d0bf79271e34bda11c2d9d98d0 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 28 Jun 2022 17:22:30 +0800 Subject: mm: hugetlb_vmemmap: introduce the name HVO It it inconvenient to mention the feature of optimizing vmemmap pages associated with HugeTLB pages when communicating with others since there is no specific or abbreviated name for it when it is first introduced. Let us give it a name HVO (HugeTLB Vmemmap Optimization) from now. This commit also updates the document about "hugetlb_free_vmemmap" by the way discussed in thread [1]. Link: https://lore.kernel.org/all/21aae898-d54d-cc4b-a11f-1bb7fddcfffa@redhat.com/ [1] Link: https://lkml.kernel.org/r/20220628092235.91270-4-songmuchun@bytedance.com Signed-off-by: Muchun Song Reviewed-by: Oscar Salvador Reviewed-by: Mike Kravetz Cc: Anshuman Khandual Cc: Catalin Marinas Cc: David Hildenbrand Cc: Jonathan Corbet Cc: Will Deacon Cc: Xiongchun Duan Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b44cc24d7496..78ed46ae6ee5 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -208,8 +208,7 @@ enum pageflags { DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key); /* - * If the feature of optimizing vmemmap pages associated with each HugeTLB - * page is enabled, the head vmemmap page frame is reused and all of the tail + * If HVO is enabled, the head vmemmap page frame is reused and all of the tail * vmemmap addresses map to the head vmemmap page frame (furture details can * refer to the figure at the head of the mm/hugetlb_vmemmap.c). In other * words, there are more than one page struct with PG_head associated with each -- cgit v1.2.3 From 998a2997885f73e5cc732ac6d661dfa6e0f50654 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 28 Jun 2022 17:22:31 +0800 Subject: mm: hugetlb_vmemmap: move vmemmap code related to HugeTLB to hugetlb_vmemmap.c When I first introduced vmemmap manipulation functions related to HugeTLB, I thought those functions may be reused by other modules (e.g. using similar approach to optimize vmemmap pages, unfortunately, the DAX used the same approach but does not use those functions). After two years, we didn't see any other users. So move those functions to hugetlb_vmemmap.c. Code movement without any functional change. Link: https://lkml.kernel.org/r/20220628092235.91270-5-songmuchun@bytedance.com Signed-off-by: Muchun Song Reviewed-by: Oscar Salvador Reviewed-by: Mike Kravetz Cc: Anshuman Khandual Cc: Catalin Marinas Cc: David Hildenbrand Cc: Jonathan Corbet Cc: Will Deacon Cc: Xiongchun Duan Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 18e01474cf6b..e6e201a4ce05 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3142,13 +3142,6 @@ static inline void print_vma_addr(char *prefix, unsigned long rip) } #endif -#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP -int vmemmap_remap_free(unsigned long start, unsigned long end, - unsigned long reuse); -int vmemmap_remap_alloc(unsigned long start, unsigned long end, - unsigned long reuse, gfp_t gfp_mask); -#endif - void *sparse_buffer_alloc(unsigned long size); struct page * __populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap, -- cgit v1.2.3 From 6213834c10de954470b7195cf0cdbda858edf0ee Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 28 Jun 2022 17:22:33 +0800 Subject: mm: hugetlb_vmemmap: improve hugetlb_vmemmap code readability There is a discussion about the name of hugetlb_vmemmap_alloc/free in thread [1]. The suggestion suggested by David is rename "alloc/free" to "optimize/restore" to make functionalities clearer to users, "optimize" means the function will optimize vmemmap pages, while "restore" means restoring its vmemmap pages discared before. This commit does this. Another discussion is the confusion RESERVE_VMEMMAP_NR isn't used explicitly for vmemmap_addr but implicitly for vmemmap_end in hugetlb_vmemmap_alloc/free. David suggested we can compute what hugetlb_vmemmap_init() does now at runtime. We do not need to worry for the overhead of computing at runtime since the calculation is simple enough and those functions are not in a hot path. This commit has the following improvements: 1) The function suffixed name ("optimize/restore") is more expressive. 2) The logic becomes less weird in hugetlb_vmemmap_optimize/restore(). 3) The hugetlb_vmemmap_init() does not need to be exported anymore. 4) A ->optimize_vmemmap_pages field in struct hstate is killed. 5) There is only one place where checks is_power_of_2(sizeof(struct page)) instead of two places. 6) Add more comments for hugetlb_vmemmap_optimize/restore(). 7) For external users, hugetlb_optimize_vmemmap_pages() is used for detecting if the HugeTLB's vmemmap pages is optimizable originally. In this commit, it is killed and we introduce a new helper hugetlb_vmemmap_optimizable() to replace it. The name is more expressive. Link: https://lore.kernel.org/all/20220404074652.68024-2-songmuchun@bytedance.com/ [1] Link: https://lkml.kernel.org/r/20220628092235.91270-7-songmuchun@bytedance.com Signed-off-by: Muchun Song Reviewed-by: Mike Kravetz Cc: Anshuman Khandual Cc: Catalin Marinas Cc: David Hildenbrand Cc: Jonathan Corbet Cc: Oscar Salvador Cc: Will Deacon Cc: Xiongchun Duan Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 7 ++----- include/linux/sysctl.h | 4 ++++ 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 4cdfce976644..6d0620edf0a6 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -638,9 +638,6 @@ struct hstate { unsigned int nr_huge_pages_node[MAX_NUMNODES]; unsigned int free_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES]; -#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP - unsigned int optimize_vmemmap_pages; -#endif #ifdef CONFIG_CGROUP_HUGETLB /* cgroup control files */ struct cftype cgroup_files_dfl[8]; @@ -716,7 +713,7 @@ static inline struct hstate *hstate_vma(struct vm_area_struct *vma) return hstate_file(vma->vm_file); } -static inline unsigned long huge_page_size(struct hstate *h) +static inline unsigned long huge_page_size(const struct hstate *h) { return (unsigned long)PAGE_SIZE << h->order; } @@ -745,7 +742,7 @@ static inline bool hstate_is_gigantic(struct hstate *h) return huge_page_order(h) >= MAX_ORDER; } -static inline unsigned int pages_per_huge_page(struct hstate *h) +static inline unsigned int pages_per_huge_page(const struct hstate *h) { return 1 << h->order; } diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 17b42ce89d3e..780690dc08cd 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -268,6 +268,10 @@ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * return NULL; } +static inline void register_sysctl_init(const char *path, struct ctl_table *table) +{ +} + static inline struct ctl_table_header *register_sysctl_mount_point(const char *path) { return NULL; -- cgit v1.2.3 From 838691a1c0ec44739db558834e6954d62577d6b8 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 28 Jun 2022 17:22:34 +0800 Subject: mm: hugetlb_vmemmap: move code comments to vmemmap_dedup.rst All the comments which explains how HVO works are moved to vmemmap_dedup.rst since commit 4917f55b4ef9 ("mm/sparse-vmemmap: improve memory savings for compound devmaps") except some comments above page_fixed_fake_head(). This commit moves those comments to vmemmap_dedup.rst and improve vmemmap_dedup.rst as well. Link: https://lkml.kernel.org/r/20220628092235.91270-8-songmuchun@bytedance.com Signed-off-by: Muchun Song Cc: Anshuman Khandual Cc: Catalin Marinas Cc: David Hildenbrand Cc: Jonathan Corbet Cc: Mike Kravetz Cc: Oscar Salvador Cc: Will Deacon Cc: Xiongchun Duan Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 78ed46ae6ee5..465ff35a8c00 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -208,19 +208,8 @@ enum pageflags { DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key); /* - * If HVO is enabled, the head vmemmap page frame is reused and all of the tail - * vmemmap addresses map to the head vmemmap page frame (furture details can - * refer to the figure at the head of the mm/hugetlb_vmemmap.c). In other - * words, there are more than one page struct with PG_head associated with each - * HugeTLB page. We __know__ that there is only one head page struct, the tail - * page structs with PG_head are fake head page structs. We need an approach - * to distinguish between those two different types of page structs so that - * compound_head() can return the real head page struct when the parameter is - * the tail page struct but with PG_head. - * - * The page_fixed_fake_head() returns the real head page struct if the @page is - * fake page head, otherwise, returns @page which can either be a true page - * head or tail. + * Return the real head page struct iff the @page is a fake head page, otherwise + * return the @page itself. See Documentation/mm/vmemmap_dedup.rst. */ static __always_inline const struct page *page_fixed_fake_head(const struct page *page) { -- cgit v1.2.3 From 161df60e9e89651c9aa3ae0edc9aae3a8a2d21e7 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 14 Jul 2022 13:24:15 +0900 Subject: mm, hwpoison, hugetlb: support saving mechanism of raw error pages When handling memory error on a hugetlb page, the error handler tries to dissolve and turn it into 4kB pages. If it's successfully dissolved, PageHWPoison flag is moved to the raw error page, so that's all right. However, dissolve sometimes fails, then the error page is left as hwpoisoned hugepage. It's useful if we can retry to dissolve it to save healthy pages, but that's not possible now because the information about where the raw error pages is lost. Use the private field of a few tail pages to keep that information. The code path of shrinking hugepage pool uses this info to try delayed dissolve. In order to remember multiple errors in a hugepage, a singly-linked list originated from SUBPAGE_INDEX_HWPOISON-th tail page is constructed. Only simple operations (adding an entry or clearing all) are required and the list is assumed not to be very long, so this simple data structure should be enough. If we failed to save raw error info, the hwpoison hugepage has errors on unknown subpage, then this new saving mechanism does not work any more, so disable saving new raw error info and freeing hwpoison hugepages. Link: https://lkml.kernel.org/r/20220714042420.1847125-4-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Reported-by: kernel test robot Reviewed-by: Miaohe Lin Cc: David Hildenbrand Cc: Liu Shixin Cc: Mike Kravetz Cc: Muchun Song Cc: Oscar Salvador Cc: Yang Shi Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6d0620edf0a6..3ec981a0d8b3 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -42,6 +42,9 @@ enum { SUBPAGE_INDEX_CGROUP, /* reuse page->private */ SUBPAGE_INDEX_CGROUP_RSVD, /* reuse page->private */ __MAX_CGROUP_SUBPAGE_INDEX = SUBPAGE_INDEX_CGROUP_RSVD, +#endif +#ifdef CONFIG_MEMORY_FAILURE + SUBPAGE_INDEX_HWPOISON, #endif __NR_USED_SUBPAGE, }; @@ -551,7 +554,7 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, * Synchronization: Initially set after new page allocation with no * locking. When examined and modified during migration processing * (isolate, migrate, putback) the hugetlb_lock is held. - * HPG_temporary - - Set on a page that is temporarily allocated from the buddy + * HPG_temporary - Set on a page that is temporarily allocated from the buddy * allocator. Typically used for migration target pages when no pages * are available in the pool. The hugetlb free page path will * immediately free pages with this flag set to the buddy allocator. @@ -561,6 +564,8 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, * HPG_freed - Set when page is on the free lists. * Synchronization: hugetlb_lock held for examination and modification. * HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed. + * HPG_raw_hwp_unreliable - Set when the hugetlb page has a hwpoison sub-page + * that is not tracked by raw_hwp_page list. */ enum hugetlb_page_flags { HPG_restore_reserve = 0, @@ -568,6 +573,7 @@ enum hugetlb_page_flags { HPG_temporary, HPG_freed, HPG_vmemmap_optimized, + HPG_raw_hwp_unreliable, __NR_HPAGEFLAGS, }; @@ -614,6 +620,7 @@ HPAGEFLAG(Migratable, migratable) HPAGEFLAG(Temporary, temporary) HPAGEFLAG(Freed, freed) HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) +HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) #ifdef CONFIG_HUGETLB_PAGE @@ -796,6 +803,14 @@ extern int dissolve_free_huge_page(struct page *page); extern int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn); +#ifdef CONFIG_MEMORY_FAILURE +extern void hugetlb_clear_page_hwpoison(struct page *hpage); +#else +static inline void hugetlb_clear_page_hwpoison(struct page *hpage) +{ +} +#endif + #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION #ifndef arch_hugetlb_migration_supported static inline bool arch_hugetlb_migration_supported(struct hstate *h) -- cgit v1.2.3 From ac5fcde0a96a18773f06b7c00c5ea081bbdc64b3 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 14 Jul 2022 13:24:16 +0900 Subject: mm, hwpoison: make unpoison aware of raw error info in hwpoisoned hugepage Raw error info list needs to be removed when hwpoisoned hugetlb is unpoisoned. And unpoison handler needs to know how many errors there are in the target hugepage. So add them. HPageVmemmapOptimized(hpage) and HPageRawHwpUnreliable(hpage)) sometimes can't be unpoisoned, so skip them. Link: https://lkml.kernel.org/r/20220714042420.1847125-5-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Reported-by: kernel test robot Reviewed-by: Miaohe Lin Cc: David Hildenbrand Cc: Liu Shixin Cc: Mike Kravetz Cc: Muchun Song Cc: Oscar Salvador Cc: Yang Shi Signed-off-by: Andrew Morton --- include/linux/swapops.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swapops.h b/include/linux/swapops.h index bb7afd03a324..a3d435bf9f97 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -490,6 +490,11 @@ static inline void num_poisoned_pages_dec(void) atomic_long_dec(&num_poisoned_pages); } +static inline void num_poisoned_pages_sub(long i) +{ + atomic_long_sub(i, &num_poisoned_pages); +} + #else static inline swp_entry_t make_hwpoison_entry(struct page *page) @@ -505,6 +510,10 @@ static inline int is_hwpoison_entry(swp_entry_t swp) static inline void num_poisoned_pages_inc(void) { } + +static inline void num_poisoned_pages_sub(long i) +{ +} #endif static inline int non_swap_entry(swp_entry_t entry) -- cgit v1.2.3 From 38f6d29397ccb9c191c4c91103e8123f518fdc10 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 14 Jul 2022 13:24:17 +0900 Subject: mm, hwpoison: set PG_hwpoison for busy hugetlb pages If memory_failure() fails to grab page refcount on a hugetlb page because it's busy, it returns without setting PG_hwpoison on it. This not only loses a chance of error containment, but breaks the rule that action_result() should be called only when memory_failure() do any of handling work (even if that's just setting PG_hwpoison). This inconsistency could harm code maintainability. So set PG_hwpoison and call hugetlb_set_page_hwpoison() for such a case. Link: https://lkml.kernel.org/r/20220714042420.1847125-6-naoya.horiguchi@linux.dev Fixes: 405ce051236c ("mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()") Signed-off-by: Naoya Horiguchi Reviewed-by: Miaohe Lin Cc: David Hildenbrand Cc: kernel test robot Cc: Liu Shixin Cc: Mike Kravetz Cc: Muchun Song Cc: Oscar Salvador Cc: Yang Shi Signed-off-by: Andrew Morton --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index e6e201a4ce05..0345b8c30394 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3176,6 +3176,7 @@ enum mf_flags { MF_SOFT_OFFLINE = 1 << 3, MF_UNPOISON = 1 << 4, MF_SW_SIMULATED = 1 << 5, + MF_NO_RETRY = 1 << 6, }; int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, unsigned long count, int mf_flags); -- cgit v1.2.3 From 6f4614886baa59b6ae014093300482c1da4d3c93 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 14 Jul 2022 13:24:20 +0900 Subject: mm, hwpoison: enable memory error handling on 1GB hugepage Now error handling code is prepared, so remove the blocking code and enable memory error handling on 1GB hugepage. Link: https://lkml.kernel.org/r/20220714042420.1847125-9-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Reviewed-by: Miaohe Lin Cc: David Hildenbrand Cc: kernel test robot Cc: Liu Shixin Cc: Mike Kravetz Cc: Muchun Song Cc: Oscar Salvador Cc: Yang Shi Signed-off-by: Andrew Morton --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0345b8c30394..3bedc449c14d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3229,7 +3229,6 @@ enum mf_action_page_type { MF_MSG_DIFFERENT_COMPOUND, MF_MSG_HUGE, MF_MSG_FREE_HUGE, - MF_MSG_NON_PMD_HUGE, MF_MSG_UNMAP_FAILED, MF_MSG_DIRTY_SWAPCACHE, MF_MSG_CLEAN_SWAPCACHE, -- cgit v1.2.3 From 729337bc20876af348b363b3e35fb19be71ba793 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Thu, 28 Jul 2022 17:48:38 +0200 Subject: highmem: remove unneeded spaces in kmap_local_page() kdocs Patch series "highmem: Extend kmap_local_page() documentation", v2. The Highmem interface is evolving and the current documentation does not reflect the intended uses of each of the calls. Furthermore, after a recent series of reworks, the differences of the calls can still be confusing and may lead to the expanded use of calls which are deprecated. This series is the second round of changes towards an enhanced documentation of the Highmem's interface; at this stage the patches are only focused to kmap_local_page(). In addition it also contains some minor clean ups. This patch (of 7): In the kdocs of kmap_local_page(), the description of @page starts after several unnecessary spaces. Therefore, remove those spaces. Link: https://lkml.kernel.org/r/20220728154844.10874-1-fmdefrancesco@gmail.com Link: https://lkml.kernel.org/r/20220728154844.10874-2-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Suggested-by: Ira Weiny Reviewed-by: Ira Weiny Cc: Matthew Wilcox (Oracle) Cc: Mike Rapoport Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: Catalin Marinas Cc: Will Deacon Cc: Peter Collingbourne Cc: Vlastimil Babka Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/highmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 177b07944640..0bd5ed4ac391 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -60,7 +60,7 @@ static inline void kmap_flush_unused(void); /** * kmap_local_page - Map a page for temporary usage - * @page: Pointer to the page to be mapped + * @page: Pointer to the page to be mapped * * Returns: The virtual address of the mapping * -- cgit v1.2.3 From 383bbef283920411379c5c93829102ff7859fea5 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Thu, 28 Jul 2022 17:48:39 +0200 Subject: highmem: specify that kmap_local_page() is callable from interrupts In a recent thread about converting kmap() to kmap_local_page(), the safety of calling kmap_local_page() was questioned.[1] "any context" should probably be enough detail for users who want to know whether or not kmap_local_page() can be called from interrupts. However, Linux still has kmap_atomic() which might make users think they must use the latter in interrupts. Add "including interrupts" for better clarity. [1] https://lore.kernel.org/lkml/3187836.aeNJFYEL58@opensuse/ Link: https://lkml.kernel.org/r/20220728154844.10874-3-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Suggested-by: Ira Weiny Cc: Matthew Wilcox (Oracle) Cc: Mike Rapoport Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: Catalin Marinas Cc: Jonathan Corbet Cc: Peter Collingbourne Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/highmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 0bd5ed4ac391..4a46d95ff6c8 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -64,7 +64,7 @@ static inline void kmap_flush_unused(void); * * Returns: The virtual address of the mapping * - * Can be invoked from any context. + * Can be invoked from any context, including interrupts. * * Requires careful handling when nesting multiple mappings because the map * management is stack based. The unmap has to be in the reverse order of -- cgit v1.2.3 From 72f1c55adf70fd08ceac6b67455238db2014894a Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Thu, 28 Jul 2022 17:48:43 +0200 Subject: highmem: delete a sentence from kmap_local_page() kdocs kmap_local_page() should always be preferred in place of kmap() and kmap_atomic(). "Only use when really necessary." is not consistent with the Documentation/mm/highmem.rst and these kdocs it embeds. Therefore, delete the above-mentioned sentence from kdocs. Link: https://lkml.kernel.org/r/20220728154844.10874-7-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Suggested-by: Ira Weiny Reviewed-by: Ira Weiny Cc: Matthew Wilcox (Oracle) Cc: Mike Rapoport Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: Catalin Marinas Cc: Jonathan Corbet Cc: Peter Collingbourne Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/highmem.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 4a46d95ff6c8..25679035ca28 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -86,8 +86,7 @@ static inline void kmap_flush_unused(void); * temporarily mapped. * * While it is significantly faster than kmap() for the higmem case it - * comes with restrictions about the pointer validity. Only use when really - * necessary. + * comes with restrictions about the pointer validity. * * On HIGHMEM enabled systems mapping a highmem page has the side effect of * disabling migration in order to keep the virtual address stable across -- cgit v1.2.3 From fcb14cb1bdacec5b4374fe161e83fb8208164a85 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 May 2022 14:59:25 -0400 Subject: new iov_iter flavour - ITER_UBUF Equivalent of single-segment iovec. Initialized by iov_iter_ubuf(), checked for by iter_is_ubuf(), otherwise behaves like ITER_IOVEC ones. We are going to expose the things like ->write_iter() et.al. to those in subsequent commits. New predicate (user_backed_iter()) that is true for ITER_IOVEC and ITER_UBUF; places like direct-IO handling should use that for checking that pages we modify after getting them from iov_iter_get_pages() would need to be dirtied. DO NOT assume that replacing iter_is_iovec() with user_backed_iter() will solve all problems - there's code that uses iter_is_iovec() to decide how to poke around in iov_iter guts and for that the predicate replacement obviously won't suffice. Signed-off-by: Al Viro --- include/linux/uio.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 9a2dc496d535..85bef84fd294 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -26,6 +26,7 @@ enum iter_type { ITER_PIPE, ITER_XARRAY, ITER_DISCARD, + ITER_UBUF, }; struct iov_iter_state { @@ -38,6 +39,7 @@ struct iov_iter { u8 iter_type; bool nofault; bool data_source; + bool user_backed; size_t iov_offset; size_t count; union { @@ -46,6 +48,7 @@ struct iov_iter { const struct bio_vec *bvec; struct xarray *xarray; struct pipe_inode_info *pipe; + void __user *ubuf; }; union { unsigned long nr_segs; @@ -70,6 +73,11 @@ static inline void iov_iter_save_state(struct iov_iter *iter, state->nr_segs = iter->nr_segs; } +static inline bool iter_is_ubuf(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_UBUF; +} + static inline bool iter_is_iovec(const struct iov_iter *i) { return iov_iter_type(i) == ITER_IOVEC; @@ -105,6 +113,11 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i) return i->data_source ? WRITE : READ; } +static inline bool user_backed_iter(const struct iov_iter *i) +{ + return i->user_backed; +} + /* * Total number of bytes covered by an iovec. * @@ -322,4 +335,17 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec, int import_single_range(int type, void __user *buf, size_t len, struct iovec *iov, struct iov_iter *i); +static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, + void __user *buf, size_t count) +{ + WARN_ON(direction & ~(READ | WRITE)); + *i = (struct iov_iter) { + .iter_type = ITER_UBUF, + .user_backed = true, + .data_source = direction, + .ubuf = buf, + .count = count + }; +} + #endif -- cgit v1.2.3 From 10f525a8cd7a525e9fc73288bb35428c9cad5e63 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Jun 2022 02:02:51 -0400 Subject: ITER_PIPE: cache the type of last buffer We often need to find whether the last buffer is anon or not, and currently it's rather clumsy: check if ->iov_offset is non-zero (i.e. that pipe is not empty) if so, get the corresponding pipe_buffer and check its ->ops if it's &default_pipe_buf_ops, we have an anon buffer. Let's replace the use of ->iov_offset (which is nowhere near similar to its role for other flavours) with signed field (->last_offset), with the following rules: empty, no buffers occupied: 0 anon, with bytes up to N-1 filled: N zero-copy, with bytes up to N-1 filled: -N That way abs(i->last_offset) is equal to what used to be in i->iov_offset and empty vs. anon vs. zero-copy can be distinguished by the sign of i->last_offset. Checks for "should we extend the last buffer or should we start a new one?" become easier to follow that way. Note that most of the operations can only be done in a sane state - i.e. when the pipe has nothing past the current position of iterator. About the only thing that could be done outside of that state is iov_iter_advance(), which transitions to the sane state by truncating the pipe. There are only two cases where we leave the sane state: 1) iov_iter_get_pages()/iov_iter_get_pages_alloc(). Will be dealt with later, when we make get_pages advancing - the callers are actually happier that way. 2) iov_iter copied, then something is put into the copy. Since they share the underlying pipe, the original gets behind. When we decide that we are done with the copy (original is not usable until then) we advance the original. direct_io used to be done that way; nowadays it operates on the original and we do iov_iter_revert() to discard the excessive data. At the moment there's nothing in the kernel that could do that to ITER_PIPE iterators, so this reason for insane state is theoretical right now. Signed-off-by: Al Viro --- include/linux/uio.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 85bef84fd294..e7fc29b5ad19 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -40,7 +40,10 @@ struct iov_iter { bool nofault; bool data_source; bool user_backed; - size_t iov_offset; + union { + size_t iov_offset; + int last_offset; + }; size_t count; union { const struct iovec *iov; -- cgit v1.2.3 From 12d426ab64a1c75f1b2ee5c33e933a4c16004049 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Jun 2022 09:44:38 -0400 Subject: ITER_PIPE: fold data_start() and pipe_space_for_user() together All their callers are next to each other; all of them want the total amount of pages and, possibly, the offset in the partial final buffer. Combine into a new helper (pipe_npages()), fix the bogosity in pipe_space_for_user(), while we are at it. Signed-off-by: Al Viro --- include/linux/pipe_fs_i.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 4ea496924106..6cb65df3e3ba 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -156,26 +156,6 @@ static inline bool pipe_full(unsigned int head, unsigned int tail, return pipe_occupancy(head, tail) >= limit; } -/** - * pipe_space_for_user - Return number of slots available to userspace - * @head: The pipe ring head pointer - * @tail: The pipe ring tail pointer - * @pipe: The pipe info structure - */ -static inline unsigned int pipe_space_for_user(unsigned int head, unsigned int tail, - struct pipe_inode_info *pipe) -{ - unsigned int p_occupancy, p_space; - - p_occupancy = pipe_occupancy(head, tail); - if (p_occupancy >= pipe->max_usage) - return 0; - p_space = pipe->ring_size - p_occupancy; - if (p_space > pipe->max_usage) - p_space = pipe->max_usage; - return p_space; -} - /** * pipe_buf_get - get a reference to a pipe_buffer * @pipe: the pipe that the buffer belongs to -- cgit v1.2.3 From 1ef255e257173f4bc44317ef2076e7e0de688fdf Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Jun 2022 10:28:36 -0400 Subject: iov_iter: advancing variants of iov_iter_get_pages{,_alloc}() Most of the users immediately follow successful iov_iter_get_pages() with advancing by the amount it had returned. Provide inline wrappers doing that, convert trivial open-coded uses of those. BTW, iov_iter_get_pages() never returns more than it had been asked to; such checks in cifs ought to be removed someday... Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/uio.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index e7fc29b5ad19..b70d28693400 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -351,4 +351,24 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, }; } +static inline ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, + size_t maxsize, unsigned maxpages, size_t *start) +{ + ssize_t res = iov_iter_get_pages(i, pages, maxsize, maxpages, start); + + if (res >= 0) + iov_iter_advance(i, res); + return res; +} + +static inline ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, + size_t maxsize, size_t *start) +{ + ssize_t res = iov_iter_get_pages_alloc(i, pages, maxsize, start); + + if (res >= 0) + iov_iter_advance(i, res); + return res; +} + #endif -- cgit v1.2.3 From eba2d3d798295dc43cae8fade102f9d083a2a741 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Jun 2022 13:05:12 -0400 Subject: get rid of non-advancing variants mechanical change; will be further massaged in subsequent commits Reviewed-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/uio.h | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index b70d28693400..5896af36199c 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -247,9 +247,9 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, loff_t start, size_t count); -ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, +ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); -ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, +ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state); @@ -351,24 +351,4 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, }; } -static inline ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, - size_t maxsize, unsigned maxpages, size_t *start) -{ - ssize_t res = iov_iter_get_pages(i, pages, maxsize, maxpages, start); - - if (res >= 0) - iov_iter_advance(i, res); - return res; -} - -static inline ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, - size_t maxsize, size_t *start) -{ - ssize_t res = iov_iter_get_pages_alloc(i, pages, maxsize, start); - - if (res >= 0) - iov_iter_advance(i, res); - return res; -} - #endif -- cgit v1.2.3 From 46dae32fe625a75f549c3a70edc77b778197bb05 Mon Sep 17 00:00:00 2001 From: Youngmin Nam Date: Tue, 12 Jul 2022 18:47:15 +0900 Subject: time: Correct the prototype of ns_to_kernel_old_timeval and ns_to_timespec64 In ns_to_kernel_old_timeval() definition, the function argument is defined with const identifier in kernel/time/time.c, but the prototype in include/linux/time32.h looks different. - The function is defined in kernel/time/time.c as below: struct __kernel_old_timeval ns_to_kernel_old_timeval(const s64 nsec) - The function is decalared in include/linux/time32.h as below: extern struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec); Because the variable of arithmethic types isn't modified in the calling scope, there's no need to mark arguments as const, which was already mentioned during review (Link[1) of the original patch. Likewise remove the "const" keyword in both definition and declaration of ns_to_timespec64() as requested by Arnd (Link[2]). Fixes: a84d1169164b ("y2038: Introduce struct __kernel_old_timeval") Signed-off-by: Youngmin Nam Signed-off-by: Thomas Gleixner Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/all/20220712094715.2918823-1-youngmin.nam@samsung.com Link[1]: https://lore.kernel.org/all/20180310081123.thin6wphgk7tongy@gmail.com/ Link[2]: https://lore.kernel.org/all/CAK8P3a3nknJgEDESGdJH91jMj6R_xydFqWASd8r5BbesdvMBgA@mail.gmail.com/ --- include/linux/time64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/time64.h b/include/linux/time64.h index 2fb8232cff1d..f1bcea8c124a 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -145,7 +145,7 @@ static inline s64 timespec64_to_ns(const struct timespec64 *ts) * * Returns the timespec64 representation of the nsec parameter. */ -extern struct timespec64 ns_to_timespec64(const s64 nsec); +extern struct timespec64 ns_to_timespec64(s64 nsec); /** * timespec64_add_ns - Adds nanoseconds to a timespec64 -- cgit v1.2.3 From af887e437bb298752b2edc5834048b8151b8aea0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Aug 2022 12:50:28 -0400 Subject: NFS: Improve write error tracing Don't leak request pointers, but use the "device:inode" labelling that is used by all the other trace points. Furthermore, replace use of page indexes with an offset, again in order to align behaviour with other NFS trace points. Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index f0373a6cb5fb..ba7e2e4b0926 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -202,8 +202,7 @@ nfs_list_entry(struct list_head *head) return list_entry(head, struct nfs_page, wb_list); } -static inline -loff_t req_offset(struct nfs_page *req) +static inline loff_t req_offset(const struct nfs_page *req) { return (((loff_t)req->wb_index) << PAGE_SHIFT) + req->wb_offset; } -- cgit v1.2.3 From d4252071b97d2027d246f6a82cbee4d52f618b47 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 9 Aug 2022 14:32:13 -0400 Subject: add barriers to buffer_uptodate and set_buffer_uptodate Let's have a look at this piece of code in __bread_slow: get_bh(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(REQ_OP_READ, 0, bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; Neither wait_on_buffer nor buffer_uptodate contain any memory barrier. Consequently, if someone calls sb_bread and then reads the buffer data, the read of buffer data may be executed before wait_on_buffer(bh) on architectures with weak memory ordering and it may return invalid data. Fix this bug by adding a memory barrier to set_buffer_uptodate and an acquire barrier to buffer_uptodate (in a similar way as folio_test_uptodate and folio_mark_uptodate). Signed-off-by: Mikulas Patocka Reviewed-by: Matthew Wilcox (Oracle) Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 307445d1c69e..def8b8d30ccc 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -118,7 +118,6 @@ static __always_inline int test_clear_buffer_##name(struct buffer_head *bh) \ * of the form "mark_buffer_foo()". These are higher-level functions which * do something in addition to setting a b_state bit. */ -BUFFER_FNS(Uptodate, uptodate) BUFFER_FNS(Dirty, dirty) TAS_BUFFER_FNS(Dirty, dirty) BUFFER_FNS(Lock, locked) @@ -136,6 +135,30 @@ BUFFER_FNS(Meta, meta) BUFFER_FNS(Prio, prio) BUFFER_FNS(Defer_Completion, defer_completion) +static __always_inline void set_buffer_uptodate(struct buffer_head *bh) +{ + /* + * make it consistent with folio_mark_uptodate + * pairs with smp_load_acquire in buffer_uptodate + */ + smp_mb__before_atomic(); + set_bit(BH_Uptodate, &bh->b_state); +} + +static __always_inline void clear_buffer_uptodate(struct buffer_head *bh) +{ + clear_bit(BH_Uptodate, &bh->b_state); +} + +static __always_inline int buffer_uptodate(const struct buffer_head *bh) +{ + /* + * make it consistent with folio_test_uptodate + * pairs with smp_mb__before_atomic in set_buffer_uptodate + */ + return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; +} + #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) /* If we *know* page->private refers to buffer_heads */ -- cgit v1.2.3 From 2a0133723f9ebeb751cfce19f74ec07e108bef1f Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 5 Aug 2022 15:48:34 +0800 Subject: net: fix refcount bug in sk_psock_get (2) Syzkaller reports refcount bug as follows: ------------[ cut here ]------------ refcount_t: saturated; leaking memory. WARNING: CPU: 1 PID: 3605 at lib/refcount.c:19 refcount_warn_saturate+0xf4/0x1e0 lib/refcount.c:19 Modules linked in: CPU: 1 PID: 3605 Comm: syz-executor208 Not tainted 5.18.0-syzkaller-03023-g7e062cda7d90 #0 __refcount_add_not_zero include/linux/refcount.h:163 [inline] __refcount_inc_not_zero include/linux/refcount.h:227 [inline] refcount_inc_not_zero include/linux/refcount.h:245 [inline] sk_psock_get+0x3bc/0x410 include/linux/skmsg.h:439 tls_data_ready+0x6d/0x1b0 net/tls/tls_sw.c:2091 tcp_data_ready+0x106/0x520 net/ipv4/tcp_input.c:4983 tcp_data_queue+0x25f2/0x4c90 net/ipv4/tcp_input.c:5057 tcp_rcv_state_process+0x1774/0x4e80 net/ipv4/tcp_input.c:6659 tcp_v4_do_rcv+0x339/0x980 net/ipv4/tcp_ipv4.c:1682 sk_backlog_rcv include/net/sock.h:1061 [inline] __release_sock+0x134/0x3b0 net/core/sock.c:2849 release_sock+0x54/0x1b0 net/core/sock.c:3404 inet_shutdown+0x1e0/0x430 net/ipv4/af_inet.c:909 __sys_shutdown_sock net/socket.c:2331 [inline] __sys_shutdown_sock net/socket.c:2325 [inline] __sys_shutdown+0xf1/0x1b0 net/socket.c:2343 __do_sys_shutdown net/socket.c:2351 [inline] __se_sys_shutdown net/socket.c:2349 [inline] __x64_sys_shutdown+0x50/0x70 net/socket.c:2349 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 During SMC fallback process in connect syscall, kernel will replaces TCP with SMC. In order to forward wakeup smc socket waitqueue after fallback, kernel will sets clcsk->sk_user_data to origin smc socket in smc_fback_replace_callbacks(). Later, in shutdown syscall, kernel will calls sk_psock_get(), which treats the clcsk->sk_user_data as psock type, triggering the refcnt warning. So, the root cause is that smc and psock, both will use sk_user_data field. So they will mismatch this field easily. This patch solves it by using another bit(defined as SK_USER_DATA_PSOCK) in PTRMASK, to mark whether sk_user_data points to a psock object or not. This patch depends on a PTRMASK introduced in commit f1ff5ce2cd5e ("net, sk_msg: Clear sk_user_data pointer on clone if tagged"). For there will possibly be more flags in the sk_user_data field, this patch also refactor sk_user_data flags code to be more generic to improve its maintainability. Reported-and-tested-by: syzbot+5f26f85569bd179c18ce@syzkaller.appspotmail.com Suggested-by: Jakub Kicinski Acked-by: Wen Gu Signed-off-by: Hawkins Jiawei Reviewed-by: Jakub Sitnicki Signed-off-by: Jakub Kicinski --- include/linux/skmsg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 153b6dec9b6a..48f4b645193b 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -278,7 +278,8 @@ static inline void sk_msg_sg_copy_clear(struct sk_msg *msg, u32 start) static inline struct sk_psock *sk_psock(const struct sock *sk) { - return rcu_dereference_sk_user_data(sk); + return __rcu_dereference_sk_user_data_with_flags(sk, + SK_USER_DATA_PSOCK); } static inline void sk_psock_set_state(struct sk_psock *psock, -- cgit v1.2.3 From c2a052a4a949df53f50a5024843432d2234cb824 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Fri, 24 Jun 2022 10:55:41 +0800 Subject: remoteproc: rename len of rpoc_vring to num Rename the member len in the structure rpoc_vring to num. And remove 'in bytes' from the comment of it. This is misleading. Because this actually refers to the size of the virtio vring to be created. The unit is not bytes. Signed-off-by: Xuan Zhuo Message-Id: <20220624025621.128843-2-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/remoteproc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 7c943f0a2fc4..aea79c77db0f 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -597,7 +597,7 @@ struct rproc_subdev { /** * struct rproc_vring - remoteproc vring state * @va: virtual address - * @len: length, in bytes + * @num: vring size * @da: device address * @align: vring alignment * @notifyid: rproc-specific unique vring index @@ -606,7 +606,7 @@ struct rproc_subdev { */ struct rproc_vring { void *va; - int len; + int num; u32 da; u32 align; int notifyid; -- cgit v1.2.3 From da802961832f9852886304290135457519815497 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:21 +0800 Subject: virtio: record the maximum queue num supported by the device. virtio-net can display the maximum (supported by hardware) ring size in ethtool -g eth0. When the subsequent patch implements vring reset, it can judge whether the ring size passed by the driver is legal based on this. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-2-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index d8fdf170637c..129bde7521e3 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -19,6 +19,7 @@ * @priv: a pointer for the virtqueue implementation to use. * @index: the zero-based ordinal number for this queue. * @num_free: number of elements we expect to be able to fit. + * @num_max: the maximum number of elements supported by the device. * * A note on @num_free: with indirect buffers, each buffer needs one * element in the queue, otherwise a buffer will need one element per @@ -31,6 +32,7 @@ struct virtqueue { struct virtio_device *vdev; unsigned int index; unsigned int num_free; + unsigned int num_max; void *priv; }; -- cgit v1.2.3 From 3086e9fc9173166774652a488467e4176ee1c81b Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:22 +0800 Subject: virtio: struct virtio_config_ops add callbacks for queue_reset reset can be divided into the following four steps (example): 1. transport: notify the device to reset the queue 2. vring: recycle the buffer submitted 3. vring: reset/resize the vring (may re-alloc) 4. transport: mmap vring to device, and enable the queue In order to support queue reset, add two callbacks in struct virtio_config_ops to implement steps 1 and 4. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-3-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index b47c2e7ed0ee..36ec7be1f480 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -78,6 +78,18 @@ struct virtio_shm_region { * @set_vq_affinity: set the affinity for a virtqueue (optional). * @get_vq_affinity: get the affinity for a virtqueue (optional). * @get_shm_region: get a shared memory region based on the index. + * @disable_vq_and_reset: reset a queue individually (optional). + * vq: the virtqueue + * Returns 0 on success or error status + * disable_vq_and_reset will guarantee that the callbacks are disabled and + * synchronized. + * Except for the callback, the caller should guarantee that the vring is + * not accessed by any functions of virtqueue. + * @enable_vq_after_reset: enable a reset queue + * vq: the virtqueue + * Returns 0 on success or error status + * If disable_vq_and_reset is set, then enable_vq_after_reset must also be + * set. */ typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { @@ -104,6 +116,8 @@ struct virtio_config_ops { int index); bool (*get_shm_region)(struct virtio_device *vdev, struct virtio_shm_region *region, u8 id); + int (*disable_vq_and_reset)(struct virtqueue *vq); + int (*enable_vq_after_reset)(struct virtqueue *vq); }; /* If driver didn't advertise the feature, it will never appear. */ -- cgit v1.2.3 From 07d9629d49584b6f79faa6158cd7aef7e6919703 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:27 +0800 Subject: virtio_ring: split: stop __vring_new_virtqueue as export symbol There is currently only one place to reference __vring_new_virtqueue() directly from the outside of virtio core. And here vring_new_virtqueue() can be used instead. Subsequent patches will modify __vring_new_virtqueue, so stop it as an export symbol for now. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-8-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_ring.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index b485b13fa50b..8b8af1a38991 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -76,16 +76,6 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, void (*callback)(struct virtqueue *vq), const char *name); -/* Creates a virtqueue with a custom layout. */ -struct virtqueue *__vring_new_virtqueue(unsigned int index, - struct vring vring, - struct virtio_device *vdev, - bool weak_barriers, - bool ctx, - bool (*notify)(struct virtqueue *), - void (*callback)(struct virtqueue *), - const char *name); - /* * Creates a virtqueue with a standard layout but a caller-allocated * ring. -- cgit v1.2.3 From c790e8e1817f1a17c05e64f1c4f16f231b8529d5 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:44 +0800 Subject: virtio_ring: introduce virtqueue_resize() Introduce virtqueue_resize() to implement the resize of vring. Based on these, the driver can dynamically adjust the size of the vring. For example: ethtool -G. virtqueue_resize() implements resize based on the vq reset function. In case of failure to allocate a new vring, it will give up resize and use the original vring. During this process, if the re-enable reset vq fails, the vq can no longer be used. Although the probability of this situation is not high. The parameter recycle is used to recycle the buffer that is no longer used. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-25-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 129bde7521e3..62e31bca5602 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -91,6 +91,9 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *vq); dma_addr_t virtqueue_get_avail_addr(struct virtqueue *vq); dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq); +int virtqueue_resize(struct virtqueue *vq, u32 num, + void (*recycle)(struct virtqueue *vq, void *buf)); + /** * virtio_device - representation of a device using virtio * @index: unique position on the virtio bus -- cgit v1.2.3 From ea024594b1dc5b6719c1400ae154690f5c203996 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:45 +0800 Subject: virtio_pci: struct virtio_pci_common_cfg add queue_notify_data Add queue_notify_data in struct virtio_pci_common_cfg, which comes from here https://github.com/oasis-tcs/virtio-spec/issues/89 In order not to affect the API, add a dedicated structure struct virtio_pci_modern_common_cfg to virtio_pci_modern.h. Since I want to add queue_reset after queue_notify_data, I submitted this patch first. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-26-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_pci_modern.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index eb2bd9b4077d..41f5a018bd94 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -5,6 +5,13 @@ #include #include +struct virtio_pci_modern_common_cfg { + struct virtio_pci_common_cfg cfg; + + __le16 queue_notify_data; /* read-write */ + __le16 padding; +}; + struct virtio_pci_modern_device { struct pci_dev *pci_dev; -- cgit v1.2.3 From 3251063155032729b8793ac3957136ae25c0bafa Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:46 +0800 Subject: virtio: allow to unbreak/break virtqueue individually This patch allows the new introduced __virtqueue_break()/__virtqueue_unbreak() to break/unbreak the virtqueue. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-27-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 62e31bca5602..d45ee82a4470 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -138,6 +138,9 @@ bool is_virtio_device(struct device *dev); void virtio_break_device(struct virtio_device *dev); void __virtio_unbreak_device(struct virtio_device *dev); +void __virtqueue_break(struct virtqueue *_vq); +void __virtqueue_unbreak(struct virtqueue *_vq); + void virtio_config_changed(struct virtio_device *dev); #ifdef CONFIG_PM_SLEEP int virtio_device_freeze(struct virtio_device *dev); -- cgit v1.2.3 From 4913e85441b40386c4bb093f188b955d8165f1b7 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:48 +0800 Subject: virtio_ring: struct virtqueue introduce reset Introduce a new member reset to the structure virtqueue to determine whether the current vq is in the reset state. Subsequent patches will use it. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-29-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index d45ee82a4470..a3f73bb6733e 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -20,6 +20,7 @@ * @index: the zero-based ordinal number for this queue. * @num_free: number of elements we expect to be able to fit. * @num_max: the maximum number of elements supported by the device. + * @reset: vq is in reset state or not. * * A note on @num_free: with indirect buffers, each buffer needs one * element in the queue, otherwise a buffer will need one element per @@ -34,6 +35,7 @@ struct virtqueue { unsigned int num_free; unsigned int num_max; void *priv; + bool reset; }; int virtqueue_add_outbuf(struct virtqueue *vq, -- cgit v1.2.3 From 0cdd450e70510c9e13af8099e9f6c1467e6a0b91 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:49 +0800 Subject: virtio_pci: struct virtio_pci_common_cfg add queue_reset Add queue_reset in virtio_pci_modern_common_cfg. https://github.com/oasis-tcs/virtio-spec/issues/124 https://github.com/oasis-tcs/virtio-spec/issues/139 Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-30-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_pci_modern.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index 41f5a018bd94..05123b9a606f 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -9,7 +9,7 @@ struct virtio_pci_modern_common_cfg { struct virtio_pci_common_cfg cfg; __le16 queue_notify_data; /* read-write */ - __le16 padding; + __le16 queue_reset; /* read-write */ }; struct virtio_pci_modern_device { -- cgit v1.2.3 From 0b50cece0b7857732d2055f2c77f8730c10f9196 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:50 +0800 Subject: virtio_pci: introduce helper to get/set queue reset Introduce new helpers to implement queue reset and get queue reset status. https://github.com/oasis-tcs/virtio-spec/issues/124 https://github.com/oasis-tcs/virtio-spec/issues/139 Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-31-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_pci_modern.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index 05123b9a606f..c4eeb79b0139 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -113,4 +113,6 @@ void __iomem * vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev, u16 index, resource_size_t *pa); int vp_modern_probe(struct virtio_pci_modern_device *mdev); void vp_modern_remove(struct virtio_pci_modern_device *mdev); +int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index); +void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index); #endif -- cgit v1.2.3 From a10fba0377145fccefea4dc4dd5915b7ed87e546 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:53 +0800 Subject: virtio: find_vqs() add arg sizes find_vqs() adds a new parameter sizes to specify the size of each vq vring. NULL as sizes means that all queues in find_vqs() use the maximum size. A value in the array is 0, which means that the corresponding queue uses the maximum size. In the split scenario, the meaning of size is the largest size, because it may be limited by memory, the virtio core will try a smaller size. And the size is power of 2. Signed-off-by: Xuan Zhuo Acked-by: Hans de Goede Reviewed-by: Mathieu Poirier Acked-by: Jason Wang Message-Id: <20220801063902.129329-34-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 36ec7be1f480..888f7e96f0c7 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -55,6 +55,7 @@ struct virtio_shm_region { * include a NULL entry for vqs that do not need a callback * names: array of virtqueue names (mainly for debugging) * include a NULL entry for vqs unused by driver + * sizes: array of virtqueue sizes * Returns 0 on success or error status * @del_vqs: free virtqueues found by find_vqs(). * @synchronize_cbs: synchronize with the virtqueue callbacks (optional) @@ -103,7 +104,9 @@ struct virtio_config_ops { void (*reset)(struct virtio_device *vdev); int (*find_vqs)(struct virtio_device *, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], const bool *ctx, + const char * const names[], + u32 sizes[], + const bool *ctx, struct irq_affinity *desc); void (*del_vqs)(struct virtio_device *); void (*synchronize_cbs)(struct virtio_device *); @@ -212,7 +215,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, const char *names[] = { n }; struct virtqueue *vq; int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL, - NULL); + NULL, NULL); if (err < 0) return ERR_PTR(err); return vq; @@ -224,7 +227,8 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, const char * const names[], struct irq_affinity *desc) { - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc); + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, + NULL, desc); } static inline @@ -233,8 +237,8 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, const char * const names[], const bool *ctx, struct irq_affinity *desc) { - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, - desc); + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, + ctx, desc); } /** -- cgit v1.2.3 From fe3dc04e31aa51f91dc7f741a5f76cc4817eb5b4 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:56 +0800 Subject: virtio: add helper virtio_find_vqs_ctx_size() Introduce helper virtio_find_vqs_ctx_size() to call find_vqs and specify the maximum size of each vq ring. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-37-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 888f7e96f0c7..6adff09f7170 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -241,6 +241,18 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, ctx, desc); } +static inline +int virtio_find_vqs_ctx_size(struct virtio_device *vdev, u32 nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char * const names[], + u32 sizes[], + const bool *ctx, struct irq_affinity *desc) +{ + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, sizes, + ctx, desc); +} + /** * virtio_synchronize_cbs - synchronize with virtqueue callbacks * @vdev: the device -- cgit v1.2.3 From cae15c2ed8e6e058bd5e32de292ab7982640161e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 14 Jul 2022 14:39:26 +0300 Subject: vdpa/mlx5: Implement susupend virtqueue callback Implement the suspend callback allowing to suspend the virtqueues so they stop processing descriptors. This is required to allow to query a consistent state of the virtqueue while live migration is taking place. Signed-off-by: Eli Cohen Message-Id: <20220714113927.85729-2-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin --- include/linux/mlx5/mlx5_ifc_vdpa.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h index 4414ed5b6ed2..9becdc3fa503 100644 --- a/include/linux/mlx5/mlx5_ifc_vdpa.h +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h @@ -150,6 +150,14 @@ enum { MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR = 0x3, }; +/* This indicates that the object was not created or has already + * been desroyed. It is very safe to assume that this object will never + * have so many states + */ +enum { + MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff +}; + enum { MLX5_RQTC_LIST_Q_TYPE_RQ = 0x0, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q = 0x1, -- cgit v1.2.3 From 848ecea184e1253758423b37cbfc1ed732ccf5b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Wed, 10 Aug 2022 19:15:09 +0200 Subject: vdpa: Add suspend operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This operation is optional: It it's not implemented, backend feature bit will not be exposed. Signed-off-by: Eugenio Pérez Message-Id: <20220810171512.2343333-2-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 7b4a13d3bd91..d282f464d2f1 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -218,6 +218,9 @@ struct vdpa_map_file { * @reset: Reset device * @vdev: vdpa device * Returns integer: success (0) or error (< 0) + * @suspend: Suspend or resume the device (optional) + * @vdev: vdpa device + * Returns integer: success (0) or error (< 0) * @get_config_size: Get the size of the configuration space includes * fields that are conditional on feature bits. * @vdev: vdpa device @@ -319,6 +322,7 @@ struct vdpa_config_ops { u8 (*get_status)(struct vdpa_device *vdev); void (*set_status)(struct vdpa_device *vdev, u8 status); int (*reset)(struct vdpa_device *vdev); + int (*suspend)(struct vdpa_device *vdev); size_t (*get_config_size)(struct vdpa_device *vdev); void (*get_config)(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len); -- cgit v1.2.3 From addebd9ac9ca0ef8b3764907bf8018e48caffc64 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 12 Aug 2022 15:56:33 -0700 Subject: fs: don't randomize struct kiocb fields This is a size sensitive structure and randomizing can introduce extra padding that breaks io_uring's fixed size expectations. There are few fields here as it is, half of which need a fixed order to optimally pack, so the randomization isn't providing much. Suggested-by: Linus Torvalds Signed-off-by: Keith Busch Link: https://lore.kernel.org/io-uring/b6f508ca-b1b2-5f40-7998-e4cff1cf7212@kernel.dk/ Signed-off-by: Jens Axboe --- include/linux/fs.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9f131e559d05..daf69a6504b6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -339,17 +339,12 @@ enum rw_hint { struct kiocb { struct file *ki_filp; - - /* The 'ki_filp' pointer is shared in a union for aio */ - randomized_struct_fields_start - loff_t ki_pos; void (*ki_complete)(struct kiocb *iocb, long ret); void *private; int ki_flags; u16 ki_ioprio; /* See linux/ioprio.h */ struct wait_page_queue *ki_waitq; /* for async buffered IO */ - randomized_struct_fields_end }; static inline bool is_sync_kiocb(struct kiocb *kiocb) -- cgit v1.2.3 From f2ccb5aed7bce1d8b3ed5b3385759a5509663028 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 11 Aug 2022 09:11:15 +0200 Subject: io_uring: make io_kiocb_to_cmd() typesafe We need to make sure (at build time) that struct io_cmd_data is not casted to a structure that's larger. Signed-off-by: Stefan Metzmacher Link: https://lore.kernel.org/r/c024cdf25ae19fc0319d4180e2298bade8ed17b8.1660201408.git.metze@samba.org Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index f7fab3758cb9..677a25d44d7f 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -491,7 +491,14 @@ struct io_cmd_data { __u8 data[56]; }; -#define io_kiocb_to_cmd(req) ((void *) &(req)->cmd) +static inline void io_kiocb_cmd_sz_check(size_t cmd_sz) +{ + BUILD_BUG_ON(cmd_sz > sizeof(struct io_cmd_data)); +} +#define io_kiocb_to_cmd(req, cmd_type) ( \ + io_kiocb_cmd_sz_check(sizeof(cmd_type)) , \ + ((cmd_type *)&(req)->cmd) \ +) #define cmd_to_io_kiocb(ptr) ((struct io_kiocb *) ptr) struct io_kiocb { -- cgit v1.2.3 From 67f4b5dc49913abcdb5cc736e73674e2f352f81d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 13 Aug 2022 08:22:25 -0400 Subject: NFS: Fix another fsync() issue after a server reboot Currently, when the writeback code detects a server reboot, it redirties any pages that were not committed to disk, and it sets the flag NFS_CONTEXT_RESEND_WRITES in the nfs_open_context of the file descriptor that dirtied the file. While this allows the file descriptor in question to redrive its own writes, it violates the fsync() requirement that we should be synchronising all writes to disk. While the problem is infrequent, we do see corner cases where an untimely server reboot causes the fsync() call to abandon its attempt to sync data to disk and causing data corruption issues due to missed error conditions or similar. In order to tighted up the client's ability to deal with this situation without introducing livelocks, add a counter that records the number of times pages are redirtied due to a server reboot-like condition, and use that in fsync() to redrive the sync to disk. Fixes: 2197e9b06c22 ("NFS: Fix up fsync() when the server rebooted") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b32ed68e7dc4..f08e581f0161 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -182,6 +182,7 @@ struct nfs_inode { /* Regular file */ struct { atomic_long_t nrequests; + atomic_long_t redirtied_pages; struct nfs_mds_commit_info commit_info; struct mutex commit_mutex; }; -- cgit v1.2.3 From 5f6277a0c15e1ea54b6fd3d78c9fff7bfe42556c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 13 Aug 2022 08:51:45 -0400 Subject: NFS: Cleanup to remove unused flag NFS_CONTEXT_RESEND_WRITES Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f08e581f0161..7931fa472561 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -83,7 +83,6 @@ struct nfs_open_context { fmode_t mode; unsigned long flags; -#define NFS_CONTEXT_RESEND_WRITES (1) #define NFS_CONTEXT_BAD (2) #define NFS_CONTEXT_UNLOCK (3) #define NFS_CONTEXT_FILE_OPEN (4) -- cgit v1.2.3 From 9f162193d6e48eb4ff51c2ea3612f1daebca1b7e Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 11 Aug 2022 22:34:25 -0700 Subject: radix-tree: replace gfp.h inclusion with gfp_types.h Radix tree header includes gfp.h for __GFP_BITS_SHIFT only. Now we have gfp_types.h for this. Fixes powerpc allmodconfig build: In file included from include/linux/nodemask.h:97, from include/linux/mmzone.h:17, from include/linux/gfp.h:7, from include/linux/radix-tree.h:12, from include/linux/idr.h:15, from include/linux/kernfs.h:12, from include/linux/sysfs.h:16, from include/linux/kobject.h:20, from include/linux/pci.h:35, from arch/powerpc/kernel/prom_init.c:24: include/linux/random.h: In function 'add_latent_entropy': >> include/linux/random.h:25:46: error: 'latent_entropy' undeclared (first use in this function); did you mean 'add_latent_entropy'? 25 | add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); | ^~~~~~~~~~~~~~ | add_latent_entropy include/linux/random.h:25:46: note: each undeclared identifier is reported only once for each function it appears in Reported-by: kernel test robot CC: Andy Shevchenko CC: Andrew Morton CC: Jason A. Donenfeld Signed-off-by: Yury Norov Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index f7c1d21c2f39..eae67015ce51 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -9,7 +9,7 @@ #define _LINUX_RADIX_TREE_H #include -#include +#include #include #include #include -- cgit v1.2.3 From 0a90ed8d0cfa29735a221eba14d9cb6c735d35b6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 1 Aug 2022 14:37:31 +0300 Subject: platform/x86: pmc_atom: Fix SLP_TYPx bitfield mask On Intel hardware the SLP_TYPx bitfield occupies bits 10-12 as per ACPI specification (see Table 4.13 "PM1 Control Registers Fixed Hardware Feature Control Bits" for the details). Fix the mask and other related definitions accordingly. Fixes: 93e5eadd1f6e ("x86/platform: New Intel Atom SOC power management controller driver") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220801113734.36131-1-andriy.shevchenko@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/platform_data/x86/pmc_atom.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h index 3edfb6d4e67a..dd81f510e4cf 100644 --- a/include/linux/platform_data/x86/pmc_atom.h +++ b/include/linux/platform_data/x86/pmc_atom.h @@ -7,6 +7,8 @@ #ifndef PMC_ATOM_H #define PMC_ATOM_H +#include + /* ValleyView Power Control Unit PCI Device ID */ #define PCI_DEVICE_ID_VLV_PMC 0x0F1C /* CherryTrail Power Control Unit PCI Device ID */ @@ -139,9 +141,9 @@ #define ACPI_MMIO_REG_LEN 0x100 #define PM1_CNT 0x4 -#define SLEEP_TYPE_MASK 0xFFFFECFF +#define SLEEP_TYPE_MASK GENMASK(12, 10) #define SLEEP_TYPE_S5 0x1C00 -#define SLEEP_ENABLE 0x2000 +#define SLEEP_ENABLE BIT(13) extern int pmc_atom_read(int offset, u32 *value); -- cgit v1.2.3 From be599244865cb788abe2c6f59ccb05d7240448e4 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Tue, 9 Aug 2022 19:36:33 +0200 Subject: cpumask: align signatures of UP implementations Between the generic version, and their uniprocessor optimised implementations, the return types of cpumask_any_and_distribute() and cpumask_any_distribute() are not identical. Change the UP versions to 'unsigned int', to match the generic versions. Suggested-by: Yury Norov Signed-off-by: Sander Vanheule Signed-off-by: Yury Norov --- include/linux/cpumask.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 0d435d0edbcb..d8c2a40f8beb 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -202,12 +202,13 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node) return 0; } -static inline int cpumask_any_and_distribute(const struct cpumask *src1p, - const struct cpumask *src2p) { +static inline unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, + const struct cpumask *src2p) +{ return cpumask_first_and(src1p, src2p); } -static inline int cpumask_any_distribute(const struct cpumask *srcp) +static inline unsigned int cpumask_any_distribute(const struct cpumask *srcp) { return cpumask_first(srcp); } -- cgit v1.2.3 From 2248ccd80124e61c2c84a22b22409bab452e1f0c Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Tue, 9 Aug 2022 19:36:34 +0200 Subject: lib/cpumask: add inline cpumask_next_wrap() for UP In the uniprocessor case, cpumask_next_wrap() can be simplified, as the number of valid argument combinations is limited: - 'start' can only be 0 - 'n' can only be -1 or 0 The only valid CPU that can then be returned, if any, will be the first one set in the provided 'mask'. For NR_CPUS == 1, include/linux/cpumask.h now provides an inline definition of cpumask_next_wrap(), which will conflict with the one provided by lib/cpumask.c. Make building of lib/cpumask.o again depend on CONFIG_SMP=y (i.e. NR_CPUS > 1) to avoid the re-definition. Suggested-by: Yury Norov Signed-off-by: Sander Vanheule Signed-off-by: Yury Norov --- include/linux/cpumask.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d8c2a40f8beb..bd047864c7ac 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -262,7 +262,26 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, (cpu) = cpumask_next_zero((cpu), (mask)), \ (cpu) < nr_cpu_ids;) +#if NR_CPUS == 1 +static inline +unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) +{ + cpumask_check(start); + if (n != -1) + cpumask_check(n); + + /* + * Return the first available CPU when wrapping, or when starting before cpu0, + * since there is only one valid option. + */ + if (wrap && n >= 0) + return nr_cpumask_bits; + + return cpumask_first(mask); +} +#else unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); +#endif /** * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location -- cgit v1.2.3 From 76b079ef4cc954fc2c2e0333a01855b0b2b6bdee Mon Sep 17 00:00:00 2001 From: Hao Jia Date: Sat, 6 Aug 2022 20:05:09 +0800 Subject: sched/psi: Remove unused parameter nbytes of psi_trigger_create() psi_trigger_create()'s 'nbytes' parameter is not used, so we can remove it. Signed-off-by: Hao Jia Reviewed-by: Ingo Molnar Acked-by: Johannes Weiner Signed-off-by: Tejun Heo --- include/linux/psi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/psi.h b/include/linux/psi.h index 89784763d19e..dd74411ac21d 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -27,7 +27,7 @@ void psi_memstall_leave(unsigned long *flags); int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); struct psi_trigger *psi_trigger_create(struct psi_group *group, - char *buf, size_t nbytes, enum psi_res res); + char *buf, enum psi_res res); void psi_trigger_destroy(struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, -- cgit v1.2.3 From d7ae5818c3fa3007dee13f9d99832e7f26b8bc44 Mon Sep 17 00:00:00 2001 From: Hao Jia Date: Sat, 6 Aug 2022 20:05:10 +0800 Subject: sched/psi: Remove redundant cgroup_psi() when !CONFIG_CGROUPS cgroup_psi() is only called under CONFIG_CGROUPS. We don't need cgroup_psi() when !CONFIG_CGROUPS, so we can remove it in this case. Signed-off-by: Hao Jia Reviewed-by: Ingo Molnar Acked-by: Johannes Weiner Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed53bfe7c46c..ac5d0515680e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -734,11 +734,6 @@ static inline struct cgroup *cgroup_parent(struct cgroup *cgrp) return NULL; } -static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) -{ - return NULL; -} - static inline bool cgroup_psi_enabled(void) { return false; -- cgit v1.2.3 From 484b9fa4886bd9377969aad5e9ea17efda4ecda6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 16 Aug 2022 01:36:31 -0400 Subject: virtio: Revert "virtio: add helper virtio_find_vqs_ctx_size()" This reverts commit fe3dc04e31aa51f91dc7f741a5f76cc4817eb5b4: the API is now unused and in fact can't be implemented on top of a legacy device. Fixes: fe3dc04e31aa ("virtio: add helper virtio_find_vqs_ctx_size()") Cc: "Xuan Zhuo" Signed-off-by: Michael S. Tsirkin Message-Id: <20220816053602.173815-3-mst@redhat.com> --- include/linux/virtio_config.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 6adff09f7170..888f7e96f0c7 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -241,18 +241,6 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, ctx, desc); } -static inline -int virtio_find_vqs_ctx_size(struct virtio_device *vdev, u32 nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[], - u32 sizes[], - const bool *ctx, struct irq_affinity *desc) -{ - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, sizes, - ctx, desc); -} - /** * virtio_synchronize_cbs - synchronize with virtqueue callbacks * @vdev: the device -- cgit v1.2.3 From 9993a4f989c7ca5e227329b2878f65d05c9fc20f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 16 Aug 2022 01:36:58 -0400 Subject: virtio: Revert "virtio: find_vqs() add arg sizes" This reverts commit a10fba0377145fccefea4dc4dd5915b7ed87e546: the proposed API isn't supported on all transports but no effort was made to address this. It might not be hard to fix if we want to: maybe just rename size to size_hint and make sure legacy transports ignore the hint. But it's not sure what the benefit is in any case, so let's drop it. Fixes: a10fba037714 ("virtio: find_vqs() add arg sizes") Signed-off-by: Michael S. Tsirkin Message-Id: <20220816053602.173815-8-mst@redhat.com> --- include/linux/virtio_config.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 888f7e96f0c7..36ec7be1f480 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -55,7 +55,6 @@ struct virtio_shm_region { * include a NULL entry for vqs that do not need a callback * names: array of virtqueue names (mainly for debugging) * include a NULL entry for vqs unused by driver - * sizes: array of virtqueue sizes * Returns 0 on success or error status * @del_vqs: free virtqueues found by find_vqs(). * @synchronize_cbs: synchronize with the virtqueue callbacks (optional) @@ -104,9 +103,7 @@ struct virtio_config_ops { void (*reset)(struct virtio_device *vdev); int (*find_vqs)(struct virtio_device *, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], - u32 sizes[], - const bool *ctx, + const char * const names[], const bool *ctx, struct irq_affinity *desc); void (*del_vqs)(struct virtio_device *); void (*synchronize_cbs)(struct virtio_device *); @@ -215,7 +212,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, const char *names[] = { n }; struct virtqueue *vq; int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL, - NULL, NULL); + NULL); if (err < 0) return ERR_PTR(err); return vq; @@ -227,8 +224,7 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, const char * const names[], struct irq_affinity *desc) { - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, - NULL, desc); + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc); } static inline @@ -237,8 +233,8 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, const char * const names[], const bool *ctx, struct irq_affinity *desc) { - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, - ctx, desc); + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, + desc); } /** -- cgit v1.2.3 From 5c669c4a4c6aa0489848093c93b8029f5c5c75ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Wed, 10 Aug 2022 11:40:03 +0200 Subject: virtio: kerneldocs fixes and enhancements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix variable names in some kerneldocs, naming in others. Add kerneldocs for struct vring_desc and vring_interrupt. Signed-off-by: Ricardo Cañuelo Message-Id: <20220810094004.1250-2-ricardo.canuelo@collabora.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- include/linux/virtio.h | 6 +++--- include/linux/virtio_config.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index a3f73bb6733e..dcab9c7e8784 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -11,7 +11,7 @@ #include /** - * virtqueue - a queue to register buffers for sending or receiving. + * struct virtqueue - a queue to register buffers for sending or receiving. * @list: the chain of virtqueues for this device * @callback: the function to call when buffers are consumed (can be NULL). * @name: the name of this virtqueue (mainly for debugging) @@ -97,7 +97,7 @@ int virtqueue_resize(struct virtqueue *vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)); /** - * virtio_device - representation of a device using virtio + * struct virtio_device - representation of a device using virtio * @index: unique position on the virtio bus * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore) * @config_enabled: configuration change reporting enabled @@ -156,7 +156,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev); list_for_each_entry(vq, &vdev->vqs, list) /** - * virtio_driver - operations for a virtio I/O driver + * struct virtio_driver - operations for a virtio I/O driver * @driver: underlying device driver (populate name and owner). * @id_table: the ids serviced by this driver. * @feature_table: an array of feature numbers supported by this driver. diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 36ec7be1f480..4b517649cfe8 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -239,7 +239,7 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, /** * virtio_synchronize_cbs - synchronize with virtqueue callbacks - * @vdev: the device + * @dev: the virtio device */ static inline void virtio_synchronize_cbs(struct virtio_device *dev) @@ -258,7 +258,7 @@ void virtio_synchronize_cbs(struct virtio_device *dev) /** * virtio_device_ready - enable vq use in probe function - * @vdev: the device + * @dev: the virtio device * * Driver must call this to use vqs in the probe function. * @@ -306,7 +306,7 @@ const char *virtio_bus_name(struct virtio_device *vdev) /** * virtqueue_set_affinity - setting affinity for a virtqueue * @vq: the virtqueue - * @cpu: the cpu no. + * @cpu_mask: the cpu mask * * Pay attention the function are best-effort: the affinity hint may not be set * due to config support, irq type and sharing. -- cgit v1.2.3 From a8239f0342bae5a51acca967ba95b9a8ad56dd62 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 18 Aug 2022 14:35:55 +0800 Subject: blk-mq: remove unused function blk_mq_queue_stopped() blk_mq_queue_stopped() doesn't have any caller, which was found by code coverage test, thus remove it. Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20220818063555.3741222-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index effee1dc715a..92294a5fb083 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -857,7 +857,6 @@ void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); void blk_mq_complete_request(struct request *rq); bool blk_mq_complete_request_remote(struct request *rq); -bool blk_mq_queue_stopped(struct request_queue *q); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queues(struct request_queue *q); -- cgit v1.2.3 From b5a5b9d5f28d23b84f06b45c61dcad95b07d41bc Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Aug 2022 15:38:58 +0200 Subject: serial: document start_rx member at struct uart_ops Fix this doc build warning: ./include/linux/serial_core.h:397: warning: Function parameter or member 'start_rx' not described in 'uart_ops' Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/5d07ae2eec8fbad87e623160f9926b178bef2744.1660829433.git.mchehab@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index aef3145f2032..6e4f4765d209 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -141,6 +141,14 @@ struct gpio_desc; * Locking: none. * Interrupts: caller dependent. * + * @start_rx: ``void ()(struct uart_port *port)`` + * + * Start receiving characters. + * + * Locking: @port->lock taken. + * Interrupts: locally disabled. + * This call must not sleep + * * @stop_rx: ``void ()(struct uart_port *port)`` * * Stop receiving characters; the @port is in the process of being closed. -- cgit v1.2.3 From c1e5c2f0cb8a22ec2e14af92afc7006491bebabb Mon Sep 17 00:00:00 2001 From: Pablo Sun Date: Thu, 4 Aug 2022 11:48:03 +0800 Subject: usb: typec: altmodes/displayport: correct pin assignment for UFP receptacles Fix incorrect pin assignment values when connecting to a monitor with Type-C receptacle instead of a plug. According to specification, an UFP_D receptacle's pin assignment should came from the UFP_D pin assignments field (bit 23:16), while an UFP_D plug's assignments are described in the DFP_D pin assignments (bit 15:8) during Mode Discovery. For example the LG 27 UL850-W is a monitor with Type-C receptacle. The monitor responds to MODE DISCOVERY command with following DisplayPort Capability flag: dp->alt->vdo=0x140045 The existing logic only take cares of UPF_D plug case, and would take the bit 15:8 for this 0x140045 case. This results in an non-existing pin assignment 0x0 in dp_altmode_configure. To fix this problem a new set of macros are introduced to take plug/receptacle differences into consideration. Fixes: 0e3bb7d6894d ("usb: typec: Add driver for DisplayPort alternate mode") Cc: stable@vger.kernel.org Co-developed-by: Pablo Sun Co-developed-by: Macpaul Lin Reviewed-by: Guillaume Ranquet Reviewed-by: Heikki Krogerus Signed-off-by: Pablo Sun Signed-off-by: Macpaul Lin Link: https://lore.kernel.org/r/20220804034803.19486-1-macpaul.lin@mediatek.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_dp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index cfb916cccd31..8d09c2f0a9b8 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -73,6 +73,11 @@ enum { #define DP_CAP_USB BIT(7) #define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(15, 8)) >> 8) #define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(23, 16)) >> 16) +/* Get pin assignment taking plug & receptacle into consideration */ +#define DP_CAP_PIN_ASSIGN_UFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ + DP_CAP_UFP_D_PIN_ASSIGN(_cap_) : DP_CAP_DFP_D_PIN_ASSIGN(_cap_)) +#define DP_CAP_PIN_ASSIGN_DFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ + DP_CAP_DFP_D_PIN_ASSIGN(_cap_) : DP_CAP_UFP_D_PIN_ASSIGN(_cap_)) /* DisplayPort Status Update VDO bits */ #define DP_STATUS_CONNECTION(_status_) ((_status_) & 3) -- cgit v1.2.3 From bdd1c37a315bc50ab14066c4852bc8dcf070451e Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Tue, 16 Aug 2022 20:53:21 +0800 Subject: KVM: Rename KVM_PRIVATE_MEM_SLOTS to KVM_INTERNAL_MEM_SLOTS KVM_INTERNAL_MEM_SLOTS better reflects the fact those slots are KVM internally used (invisible to userspace) and avoids confusion to future private slots that can have different meaning. Signed-off-by: Chao Peng Message-Id: <20220816125322.1110439-2-chao.p.peng@linux.intel.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1c480b1821e1..fd5c3b4715f8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -656,12 +656,12 @@ struct kvm_irq_routing_table { }; #endif -#ifndef KVM_PRIVATE_MEM_SLOTS -#define KVM_PRIVATE_MEM_SLOTS 0 +#ifndef KVM_INTERNAL_MEM_SLOTS +#define KVM_INTERNAL_MEM_SLOTS 0 #endif #define KVM_MEM_SLOTS_NUM SHRT_MAX -#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_PRIVATE_MEM_SLOTS) +#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS) #ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 20ec3ebd707c77fb9b11b37193449193d4649f33 Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Tue, 16 Aug 2022 20:53:22 +0800 Subject: KVM: Rename mmu_notifier_* to mmu_invalidate_* The motivation of this renaming is to make these variables and related helper functions less mmu_notifier bound and can also be used for non mmu_notifier based page invalidation. mmu_invalidate_* was chosen to better describe the purpose of 'invalidating' a page that those variables are used for. - mmu_notifier_seq/range_start/range_end are renamed to mmu_invalidate_seq/range_start/range_end. - mmu_notifier_retry{_hva} helper functions are renamed to mmu_invalidate_retry{_hva}. - mmu_notifier_count is renamed to mmu_invalidate_in_progress to avoid confusion with mn_active_invalidate_count. - While here, also update kvm_inc/dec_notifier_count() to kvm_mmu_invalidate_begin/end() to match the change for mmu_notifier_count. No functional change intended. Signed-off-by: Chao Peng Message-Id: <20220816125322.1110439-3-chao.p.peng@linux.intel.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 60 +++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fd5c3b4715f8..f4519d3689e1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -765,10 +765,10 @@ struct kvm { #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) struct mmu_notifier mmu_notifier; - unsigned long mmu_notifier_seq; - long mmu_notifier_count; - unsigned long mmu_notifier_range_start; - unsigned long mmu_notifier_range_end; + unsigned long mmu_invalidate_seq; + long mmu_invalidate_in_progress; + unsigned long mmu_invalidate_range_start; + unsigned long mmu_invalidate_range_end; #endif struct list_head devices; u64 manual_dirty_log_protect; @@ -1357,10 +1357,10 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); #endif -void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end); -void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end); +void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, + unsigned long end); +void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, + unsigned long end); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); @@ -1907,42 +1907,44 @@ extern const struct kvm_stats_header kvm_vcpu_stats_header; extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) -static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) +static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) { - if (unlikely(kvm->mmu_notifier_count)) + if (unlikely(kvm->mmu_invalidate_in_progress)) return 1; /* - * Ensure the read of mmu_notifier_count happens before the read - * of mmu_notifier_seq. This interacts with the smp_wmb() in - * mmu_notifier_invalidate_range_end to make sure that the caller - * either sees the old (non-zero) value of mmu_notifier_count or - * the new (incremented) value of mmu_notifier_seq. - * PowerPC Book3s HV KVM calls this under a per-page lock - * rather than under kvm->mmu_lock, for scalability, so - * can't rely on kvm->mmu_lock to keep things ordered. + * Ensure the read of mmu_invalidate_in_progress happens before + * the read of mmu_invalidate_seq. This interacts with the + * smp_wmb() in mmu_notifier_invalidate_range_end to make sure + * that the caller either sees the old (non-zero) value of + * mmu_invalidate_in_progress or the new (incremented) value of + * mmu_invalidate_seq. + * + * PowerPC Book3s HV KVM calls this under a per-page lock rather + * than under kvm->mmu_lock, for scalability, so can't rely on + * kvm->mmu_lock to keep things ordered. */ smp_rmb(); - if (kvm->mmu_notifier_seq != mmu_seq) + if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0; } -static inline int mmu_notifier_retry_hva(struct kvm *kvm, - unsigned long mmu_seq, - unsigned long hva) +static inline int mmu_invalidate_retry_hva(struct kvm *kvm, + unsigned long mmu_seq, + unsigned long hva) { lockdep_assert_held(&kvm->mmu_lock); /* - * If mmu_notifier_count is non-zero, then the range maintained by - * kvm_mmu_notifier_invalidate_range_start contains all addresses that - * might be being invalidated. Note that it may include some false + * If mmu_invalidate_in_progress is non-zero, then the range maintained + * by kvm_mmu_notifier_invalidate_range_start contains all addresses + * that might be being invalidated. Note that it may include some false * positives, due to shortcuts when handing concurrent invalidations. */ - if (unlikely(kvm->mmu_notifier_count) && - hva >= kvm->mmu_notifier_range_start && - hva < kvm->mmu_notifier_range_end) + if (unlikely(kvm->mmu_invalidate_in_progress) && + hva >= kvm->mmu_invalidate_range_start && + hva < kvm->mmu_invalidate_range_end) return 1; - if (kvm->mmu_notifier_seq != mmu_seq) + if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0; } -- cgit v1.2.3 From a357f7b4583ebf81d19c95aef57497ae81c5f63c Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 17 Aug 2022 23:20:08 +0800 Subject: ata: libata: Set __ATA_BASE_SHT max_sectors Commit 0568e6122574 ("ata: libata-scsi: cap ata_device->max_sectors according to shost->max_sectors") inadvertently capped the max_sectors value for some SATA disks to a value which is lower than we would want. For a device which supports LBA48, we would previously have request queue max_sectors_kb and max_hw_sectors_kb values of 1280 and 32767 respectively. For AHCI controllers, the value chosen for shost max sectors comes from the minimum of the SCSI host default max sectors in SCSI_DEFAULT_MAX_SECTORS (1024) and the shost DMA device mapping limit. This means that we would now set the max_sectors_kb and max_hw_sectors_kb values for a disk which supports LBA48 at 512, ignoring DMA mapping limit. As report by Oliver at [0], this caused a performance regression. Fix by picking a large enough max sectors value for ATA host controllers such that we don't needlessly reduce max_sectors_kb for LBA48 disks. [0] https://lore.kernel.org/linux-ide/YvsGbidf3na5FpGb@xsang-OptiPlex-9020/T/#m22d9fc5ad15af66066dd9fecf3d50f1b1ef11da3 Fixes: 0568e6122574 ("ata: libata-scsi: cap ata_device->max_sectors according to shost->max_sectors") Reported-by: Oliver Sang Signed-off-by: John Garry Signed-off-by: Damien Le Moal --- include/linux/libata.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 0269ff114f5a..698032e5ef2d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1382,7 +1382,8 @@ extern const struct attribute_group *ata_common_sdev_groups[]; .proc_name = drv_name, \ .slave_destroy = ata_scsi_slave_destroy, \ .bios_param = ata_std_bios_param, \ - .unlock_native_capacity = ata_scsi_unlock_native_capacity + .unlock_native_capacity = ata_scsi_unlock_native_capacity,\ + .max_sectors = ATA_MAX_SECTORS_LBA48 #define ATA_SUBBASE_SHT(drv_name) \ __ATA_BASE_SHT(drv_name), \ -- cgit v1.2.3 From 5535be3099717646781ce1540cf725965d680e7b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 9 Aug 2022 22:56:40 +0200 Subject: mm/gup: fix FOLL_FORCE COW security issue and remove FOLL_COW Ever since the Dirty COW (CVE-2016-5195) security issue happened, we know that FOLL_FORCE can be possibly dangerous, especially if there are races that can be exploited by user space. Right now, it would be sufficient to have some code that sets a PTE of a R/O-mapped shared page dirty, in order for it to erroneously become writable by FOLL_FORCE. The implications of setting a write-protected PTE dirty might not be immediately obvious to everyone. And in fact ever since commit 9ae0f87d009c ("mm/shmem: unconditionally set pte dirty in mfill_atomic_install_pte"), we can use UFFDIO_CONTINUE to map a shmem page R/O while marking the pte dirty. This can be used by unprivileged user space to modify tmpfs/shmem file content even if the user does not have write permissions to the file, and to bypass memfd write sealing -- Dirty COW restricted to tmpfs/shmem (CVE-2022-2590). To fix such security issues for good, the insight is that we really only need that fancy retry logic (FOLL_COW) for COW mappings that are not writable (!VM_WRITE). And in a COW mapping, we really only broke COW if we have an exclusive anonymous page mapped. If we have something else mapped, or the mapped anonymous page might be shared (!PageAnonExclusive), we have to trigger a write fault to break COW. If we don't find an exclusive anonymous page when we retry, we have to trigger COW breaking once again because something intervened. Let's move away from this mandatory-retry + dirty handling and rely on our PageAnonExclusive() flag for making a similar decision, to use the same COW logic as in other kernel parts here as well. In case we stumble over a PTE in a COW mapping that does not map an exclusive anonymous page, COW was not properly broken and we have to trigger a fake write-fault to break COW. Just like we do in can_change_pte_writable() added via commit 64fe24a3e05e ("mm/mprotect: try avoiding write faults for exclusive anonymous pages when changing protection") and commit 76aefad628aa ("mm/mprotect: fix soft-dirty check in can_change_pte_writable()"), take care of softdirty and uffd-wp manually. For example, a write() via /proc/self/mem to a uffd-wp-protected range has to fail instead of silently granting write access and bypassing the userspace fault handler. Note that FOLL_FORCE is not only used for debug access, but also triggered by applications without debug intentions, for example, when pinning pages via RDMA. This fixes CVE-2022-2590. Note that only x86_64 and aarch64 are affected, because only those support CONFIG_HAVE_ARCH_USERFAULTFD_MINOR. Fortunately, FOLL_COW is no longer required to handle FOLL_FORCE. So let's just get rid of it. Thanks to Nadav Amit for pointing out that the pte_dirty() check in FOLL_FORCE code is problematic and might be exploitable. Note 1: We don't check for the PTE being dirty because it doesn't matter for making a "was COWed" decision anymore, and whoever modifies the page has to set the page dirty either way. Note 2: Kernels before extended uffd-wp support and before PageAnonExclusive (< 5.19) can simply revert the problematic commit instead and be safe regarding UFFDIO_CONTINUE. A backport to v5.19 requires minor adjustments due to lack of vma_soft_dirty_enabled(). Link: https://lkml.kernel.org/r/20220809205640.70916-1-david@redhat.com Fixes: 9ae0f87d009c ("mm/shmem: unconditionally set pte dirty in mfill_atomic_install_pte") Signed-off-by: David Hildenbrand Cc: Greg Kroah-Hartman Cc: Axel Rasmussen Cc: Nadav Amit Cc: Peter Xu Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: Matthew Wilcox Cc: Vlastimil Babka Cc: John Hubbard Cc: Jason Gunthorpe Cc: David Laight Cc: [5.16] Signed-off-by: Andrew Morton --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3bedc449c14d..982f2607180b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2885,7 +2885,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ -#define FOLL_COW 0x4000 /* internal GUP flag */ #define FOLL_ANON 0x8000 /* don't do file mappings */ #define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */ #define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ -- cgit v1.2.3 From a39c5d3ce03dd890ab6a9be44b21177cec32da55 Mon Sep 17 00:00:00 2001 From: Hao Lee Date: Sun, 7 Aug 2022 15:44:42 +0000 Subject: mm: add DEVICE_ZONE to FOR_ALL_ZONES FOR_ALL_ZONES should be consistent with enum zone_type. Otherwise, __count_zid_vm_events have the potential to add count to wrong item when zid is ZONE_DEVICE. Link: https://lkml.kernel.org/r/20220807154442.GA18167@haolee.io Signed-off-by: Hao Lee Cc: David Hildenbrand Cc: Johannes Weiner Signed-off-by: Andrew Morton --- include/linux/vm_event_item.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 404024486fa5..f3fc36cd2276 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -20,12 +20,19 @@ #define HIGHMEM_ZONE(xx) #endif -#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, HIGHMEM_ZONE(xx) xx##_MOVABLE +#ifdef CONFIG_ZONE_DEVICE +#define DEVICE_ZONE(xx) xx##_DEVICE, +#else +#define DEVICE_ZONE(xx) +#endif + +#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, \ + HIGHMEM_ZONE(xx) xx##_MOVABLE, DEVICE_ZONE(xx) enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, - FOR_ALL_ZONES(PGALLOC), - FOR_ALL_ZONES(ALLOCSTALL), - FOR_ALL_ZONES(PGSCAN_SKIP), + FOR_ALL_ZONES(PGALLOC) + FOR_ALL_ZONES(ALLOCSTALL) + FOR_ALL_ZONES(PGSCAN_SKIP) PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE, PGFAULT, PGMAJFAULT, PGLAZYFREED, -- cgit v1.2.3 From f369b07c861435bd812a9d14493f71b34132ed6f Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 11 Aug 2022 16:13:40 -0400 Subject: mm/uffd: reset write protection when unregister with wp-mode The motivation of this patch comes from a recent report and patchfix from David Hildenbrand on hugetlb shared handling of wr-protected page [1]. With the reproducer provided in commit message of [1], one can leverage the uffd-wp lazy-reset of ptes to trigger a hugetlb issue which can affect not only the attacker process, but also the whole system. The lazy-reset mechanism of uffd-wp was used to make unregister faster, meanwhile it has an assumption that any leftover pgtable entries should only affect the process on its own, so not only the user should be aware of anything it does, but also it should not affect outside of the process. But it seems that this is not true, and it can also be utilized to make some exploit easier. So far there's no clue showing that the lazy-reset is important to any userfaultfd users because normally the unregister will only happen once for a specific range of memory of the lifecycle of the process. Considering all above, what this patch proposes is to do explicit pte resets when unregister an uffd region with wr-protect mode enabled. It should be the same as calling ioctl(UFFDIO_WRITEPROTECT, wp=false) right before ioctl(UFFDIO_UNREGISTER) for the user. So potentially it'll make the unregister slower. From that pov it's a very slight abi change, but hopefully nothing should break with this change either. Regarding to the change itself - core of uffd write [un]protect operation is moved into a separate function (uffd_wp_range()) and it is reused in the unregister code path. Note that the new function will not check for anything, e.g. ranges or memory types, because they should have been checked during the previous UFFDIO_REGISTER or it should have failed already. It also doesn't check mmap_changing because we're with mmap write lock held anyway. I added a Fixes upon introducing of uffd-wp shmem+hugetlbfs because that's the only issue reported so far and that's the commit David's reproducer will start working (v5.19+). But the whole idea actually applies to not only file memories but also anonymous. It's just that we don't need to fix anonymous prior to v5.19- because there's no known way to exploit. IOW, this patch can also fix the issue reported in [1] as the patch 2 does. [1] https://lore.kernel.org/all/20220811103435.188481-3-david@redhat.com/ Link: https://lkml.kernel.org/r/20220811201340.39342-1-peterx@redhat.com Fixes: b1f9e876862d ("mm/uffd: enable write protection for shmem & hugetlbfs") Signed-off-by: Peter Xu Cc: David Hildenbrand Cc: Mike Rapoport Cc: Mike Kravetz Cc: Andrea Arcangeli Cc: Nadav Amit Cc: Axel Rasmussen Cc: Signed-off-by: Andrew Morton --- include/linux/userfaultfd_k.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 732b522bacb7..e1b8a915e9e9 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -73,6 +73,8 @@ extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start, extern int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, unsigned long len, bool enable_wp, atomic_t *mmap_changing); +extern void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *vma, + unsigned long start, unsigned long len, bool enable_wp); /* mm helpers */ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, -- cgit v1.2.3 From cb241339b9d020c758a6647c69f8e42538c5cf88 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 10 Aug 2022 21:51:09 -0700 Subject: mm/shmem: fix chattr fsflags support in tmpfs ext[234] have always allowed unimplemented chattr flags to be set, but other filesystems have tended to be stricter. Follow the stricter approach for tmpfs: I don't want to have to explain why csu attributes don't actually work, and we won't need to update the chattr(1) manpage; and it's never wrong to start off strict, relaxing later if persuaded. Allow only a (append only) i (immutable) A (no atime) and d (no dump). Although lsattr showed 'A' inherited, the NOATIME behavior was not being inherited: because nothing sync'ed FS_NOATIME_FL to S_NOATIME. Add shmem_set_inode_flags() to sync the flags, using inode_set_flags() to avoid that instant of lost immutablility during fileattr_set(). But that change switched generic/079 from passing to failing: because FS_IMMUTABLE_FL and FS_APPEND_FL had been unconventionally included in the INHERITED fsflags: remove them and generic/079 is back to passing. Link: https://lkml.kernel.org/r/2961dcb0-ddf3-b9f0-3268-12a4ff996856@google.com Fixes: e408e695f5f1 ("mm/shmem: support FS_IOC_[SG]ETFLAGS in tmpfs") Signed-off-by: Hugh Dickins Cc: "Theodore Ts'o" Cc: Radoslaw Burny Cc: "Darrick J. Wong" Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/shmem_fs.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 1b6c4013f691..ff0b990de83d 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -29,15 +29,10 @@ struct shmem_inode_info { struct inode vfs_inode; }; -#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE -#define SHMEM_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE -#define SHMEM_FL_INHERITED FS_FL_USER_MODIFIABLE - -/* Flags that are appropriate for regular files (all but dir-specific ones). */ -#define SHMEM_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) - -/* Flags that are appropriate for non-directories/regular files. */ -#define SHMEM_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) +#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE +#define SHMEM_FL_USER_MODIFIABLE \ + (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL) +#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL) struct shmem_sb_info { unsigned long max_blocks; /* How many blocks are allowed */ -- cgit v1.2.3 From d59b73a66e5e0682442b6d7b4965364e57078b80 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 3 Aug 2022 10:49:23 +0300 Subject: net/mlx5: Avoid false positive lockdep warning by adding lock_class_key Add a lock_class_key per mlx5 device to avoid a false positive "possible circular locking dependency" warning by lockdep, on flows which lock more than one mlx5 device, such as adding SF. kernel log: ====================================================== WARNING: possible circular locking dependency detected 5.19.0-rc8+ #2 Not tainted ------------------------------------------------------ kworker/u20:0/8 is trying to acquire lock: ffff88812dfe0d98 (&dev->intf_state_mutex){+.+.}-{3:3}, at: mlx5_init_one+0x2e/0x490 [mlx5_core] but task is already holding lock: ffff888101aa7898 (&(¬ifier->n_head)->rwsem){++++}-{3:3}, at: blocking_notifier_call_chain+0x5a/0x130 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&(¬ifier->n_head)->rwsem){++++}-{3:3}: down_write+0x90/0x150 blocking_notifier_chain_register+0x53/0xa0 mlx5_sf_table_init+0x369/0x4a0 [mlx5_core] mlx5_init_one+0x261/0x490 [mlx5_core] probe_one+0x430/0x680 [mlx5_core] local_pci_probe+0xd6/0x170 work_for_cpu_fn+0x4e/0xa0 process_one_work+0x7c2/0x1340 worker_thread+0x6f6/0xec0 kthread+0x28f/0x330 ret_from_fork+0x1f/0x30 -> #0 (&dev->intf_state_mutex){+.+.}-{3:3}: __lock_acquire+0x2fc7/0x6720 lock_acquire+0x1c1/0x550 __mutex_lock+0x12c/0x14b0 mlx5_init_one+0x2e/0x490 [mlx5_core] mlx5_sf_dev_probe+0x29c/0x370 [mlx5_core] auxiliary_bus_probe+0x9d/0xe0 really_probe+0x1e0/0xaa0 __driver_probe_device+0x219/0x480 driver_probe_device+0x49/0x130 __device_attach_driver+0x1b8/0x280 bus_for_each_drv+0x123/0x1a0 __device_attach+0x1a3/0x460 bus_probe_device+0x1a2/0x260 device_add+0x9b1/0x1b40 __auxiliary_device_add+0x88/0xc0 mlx5_sf_dev_state_change_handler+0x67e/0x9d0 [mlx5_core] blocking_notifier_call_chain+0xd5/0x130 mlx5_vhca_state_work_handler+0x2b0/0x3f0 [mlx5_core] process_one_work+0x7c2/0x1340 worker_thread+0x59d/0xec0 kthread+0x28f/0x330 ret_from_fork+0x1f/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&(¬ifier->n_head)->rwsem); lock(&dev->intf_state_mutex); lock(&(¬ifier->n_head)->rwsem); lock(&dev->intf_state_mutex); *** DEADLOCK *** 4 locks held by kworker/u20:0/8: #0: ffff888150612938 ((wq_completion)mlx5_events){+.+.}-{0:0}, at: process_one_work+0x6e2/0x1340 #1: ffff888100cafdb8 ((work_completion)(&work->work)#3){+.+.}-{0:0}, at: process_one_work+0x70f/0x1340 #2: ffff888101aa7898 (&(¬ifier->n_head)->rwsem){++++}-{3:3}, at: blocking_notifier_call_chain+0x5a/0x130 #3: ffff88813682d0e8 (&dev->mutex){....}-{3:3}, at:__device_attach+0x76/0x460 stack backtrace: CPU: 6 PID: 8 Comm: kworker/u20:0 Not tainted 5.19.0-rc8+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5_events mlx5_vhca_state_work_handler [mlx5_core] Call Trace: dump_stack_lvl+0x57/0x7d check_noncircular+0x278/0x300 ? print_circular_bug+0x460/0x460 ? lock_chain_count+0x20/0x20 ? register_lock_class+0x1880/0x1880 __lock_acquire+0x2fc7/0x6720 ? register_lock_class+0x1880/0x1880 ? register_lock_class+0x1880/0x1880 lock_acquire+0x1c1/0x550 ? mlx5_init_one+0x2e/0x490 [mlx5_core] ? lockdep_hardirqs_on_prepare+0x400/0x400 __mutex_lock+0x12c/0x14b0 ? mlx5_init_one+0x2e/0x490 [mlx5_core] ? mlx5_init_one+0x2e/0x490 [mlx5_core] ? _raw_read_unlock+0x1f/0x30 ? mutex_lock_io_nested+0x1320/0x1320 ? __ioremap_caller.constprop.0+0x306/0x490 ? mlx5_sf_dev_probe+0x269/0x370 [mlx5_core] ? iounmap+0x160/0x160 mlx5_init_one+0x2e/0x490 [mlx5_core] mlx5_sf_dev_probe+0x29c/0x370 [mlx5_core] ? mlx5_sf_dev_remove+0x130/0x130 [mlx5_core] auxiliary_bus_probe+0x9d/0xe0 really_probe+0x1e0/0xaa0 __driver_probe_device+0x219/0x480 ? auxiliary_match_id+0xe9/0x140 driver_probe_device+0x49/0x130 __device_attach_driver+0x1b8/0x280 ? driver_allows_async_probing+0x140/0x140 bus_for_each_drv+0x123/0x1a0 ? bus_for_each_dev+0x1a0/0x1a0 ? lockdep_hardirqs_on_prepare+0x286/0x400 ? trace_hardirqs_on+0x2d/0x100 __device_attach+0x1a3/0x460 ? device_driver_attach+0x1e0/0x1e0 ? kobject_uevent_env+0x22d/0xf10 bus_probe_device+0x1a2/0x260 device_add+0x9b1/0x1b40 ? dev_set_name+0xab/0xe0 ? __fw_devlink_link_to_suppliers+0x260/0x260 ? memset+0x20/0x40 ? lockdep_init_map_type+0x21a/0x7d0 __auxiliary_device_add+0x88/0xc0 ? auxiliary_device_init+0x86/0xa0 mlx5_sf_dev_state_change_handler+0x67e/0x9d0 [mlx5_core] blocking_notifier_call_chain+0xd5/0x130 mlx5_vhca_state_work_handler+0x2b0/0x3f0 [mlx5_core] ? mlx5_vhca_event_arm+0x100/0x100 [mlx5_core] ? lock_downgrade+0x6e0/0x6e0 ? lockdep_hardirqs_on_prepare+0x286/0x400 process_one_work+0x7c2/0x1340 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? pwq_dec_nr_in_flight+0x230/0x230 ? rwlock_bug.part.0+0x90/0x90 worker_thread+0x59d/0xec0 ? process_one_work+0x1340/0x1340 kthread+0x28f/0x330 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Fixes: 6a3273217469 ("net/mlx5: SF, Port function state change support") Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 96b16fbe1aa4..7b7ce602c808 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -779,6 +779,7 @@ struct mlx5_core_dev { enum mlx5_device_state state; /* sync interface state */ struct mutex intf_state_mutex; + struct lock_class_key lock_key; unsigned long intf_state; struct mlx5_priv priv; struct mlx5_profile profile; -- cgit v1.2.3 From 13a8e0f6b01b14b2e28ba144e112c883f03a3db2 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 19 Aug 2022 15:16:11 -0700 Subject: Revert "driver core: Delete driver_deferred_probe_check_state()" This reverts commit 9cbffc7a59561be950ecc675d19a3d2b45202b2b. There are a few more issues to fix that have been reported in the thread for the original series [1]. We'll need to fix those before this will work. So, revert it for now. [1] - https://lore.kernel.org/lkml/20220601070707.3946847-1-saravanak@google.com/ Fixes: 9cbffc7a5956 ("driver core: Delete driver_deferred_probe_check_state()") Tested-by: Tony Lindgren Tested-by: Peng Fan Tested-by: Douglas Anderson Tested-by: Alexander Stein Reviewed-by: Tony Lindgren Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20220819221616.2107893-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 7acaabde5396..2114d65b862f 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -242,6 +242,7 @@ driver_find_device_by_acpi_dev(struct device_driver *drv, const void *adev) extern int driver_deferred_probe_timeout; void driver_deferred_probe_add(struct device *dev); +int driver_deferred_probe_check_state(struct device *dev); void driver_init(void); /** -- cgit v1.2.3 From 7997eff82828304b780dc0a39707e1946d6f1ebf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 20 Aug 2022 17:38:37 +0200 Subject: netfilter: ebtables: reject blobs that don't provide all entry points Harshit Mogalapalli says: In ebt_do_table() function dereferencing 'private->hook_entry[hook]' can lead to NULL pointer dereference. [..] Kernel panic: general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] [..] RIP: 0010:ebt_do_table+0x1dc/0x1ce0 Code: 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 5c 16 00 00 48 b8 00 00 00 00 00 fc ff df 49 8b 6c df 08 48 8d 7d 2c 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 88 [..] Call Trace: nf_hook_slow+0xb1/0x170 __br_forward+0x289/0x730 maybe_deliver+0x24b/0x380 br_flood+0xc6/0x390 br_dev_xmit+0xa2e/0x12c0 For some reason ebtables rejects blobs that provide entry points that are not supported by the table, but what it should instead reject is the opposite: blobs that DO NOT provide an entry point supported by the table. t->valid_hooks is the bitmask of hooks (input, forward ...) that will see packets. Providing an entry point that is not support is harmless (never called/used), but the inverse isn't: it results in a crash because the ebtables traverser doesn't expect a NULL blob for a location its receiving packets for. Instead of fixing all the individual checks, do what iptables is doing and reject all blobs that differ from the expected hooks. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Harshit Mogalapalli Reported-by: syzkaller Signed-off-by: Florian Westphal --- include/linux/netfilter_bridge/ebtables.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index a13296d6c7ce..fd533552a062 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -94,10 +94,6 @@ struct ebt_table { struct ebt_replace_kernel *table; unsigned int valid_hooks; rwlock_t lock; - /* e.g. could be the table explicitly only allows certain - * matches, targets, ... 0 == let it in */ - int (*check)(const struct ebt_table_info *info, - unsigned int valid_hooks); /* the data used by the kernel */ struct ebt_table_info *private; struct nf_hook_ops *ops; -- cgit v1.2.3 From af67508ea6cbf0e4ea27f8120056fa2efce127dd Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:56 -0700 Subject: net: Fix data-races around sysctl_fb_tunnels_only_for_init_net. While reading sysctl_fb_tunnels_only_for_init_net, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 79134e6ce2c9 ("net: do not create fallback tunnels for non-default namespaces") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1a3cb93c3dcc..6d3a33fd0cdb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -640,9 +640,14 @@ extern int sysctl_devconf_inherit_init_net; */ static inline bool net_has_fallback_tunnels(const struct net *net) { - return !IS_ENABLED(CONFIG_SYSCTL) || - !sysctl_fb_tunnels_only_for_init_net || - (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1); +#if IS_ENABLED(CONFIG_SYSCTL) + int fb_tunnels_only_for_init_net = READ_ONCE(sysctl_fb_tunnels_only_for_init_net); + + return !fb_tunnels_only_for_init_net || + (net_eq(net, &init_net) && fb_tunnels_only_for_init_net == 1); +#else + return true; +#endif } static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) -- cgit v1.2.3 From a5612ca10d1aa05624ebe72633e0c8c792970833 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:57 -0700 Subject: net: Fix data-races around sysctl_devconf_inherit_init_net. While reading sysctl_devconf_inherit_init_net, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 856c395cfa63 ("net: introduce a knob to control whether to inherit devconf config") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6d3a33fd0cdb..05d6f3facd5a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -650,6 +650,15 @@ static inline bool net_has_fallback_tunnels(const struct net *net) #endif } +static inline int net_inherit_devconf(void) +{ +#if IS_ENABLED(CONFIG_SYSCTL) + return READ_ONCE(sysctl_devconf_inherit_init_net); +#else + return 0; +#endif +} + static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) { #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) -- cgit v1.2.3 From 40bfe7a86d84cf08ac6a8fe2f0c8bf7a43edd110 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 24 Aug 2022 17:32:56 +0200 Subject: of/device: Fix up of_dma_configure_id() stub Since the stub version of of_dma_configure_id() was added in commit a081bd4af4ce ("of/device: Add input id to of_dma_configure()"), it has not matched the signature of the full function, leading to build failure reports when code using this function is built on !OF configurations. Fixes: a081bd4af4ce ("of/device: Add input id to of_dma_configure()") Cc: stable@vger.kernel.org Signed-off-by: Thierry Reding Reviewed-by: Frank Rowand Acked-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20220824153256.1437483-1-thierry.reding@gmail.com Signed-off-by: Rob Herring --- include/linux/of_device.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 1d7992a02e36..1a803e4335d3 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -101,8 +101,9 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) } static inline int of_dma_configure_id(struct device *dev, - struct device_node *np, - bool force_dma) + struct device_node *np, + bool force_dma, + const u32 *id) { return 0; } -- cgit v1.2.3 From 2a5840124009f133bd09fd855963551fb2cefe22 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Jul 2022 12:16:22 -0700 Subject: lsm,io_uring: add LSM hooks for the new uring_cmd file op io-uring cmd support was added through ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd"), this extended the struct file_operations to allow a new command which each subsystem can use to enable command passthrough. Add an LSM specific for the command passthrough which enables LSMs to inspect the command details. This was discussed long ago without no clear pointer for something conclusive, so this enables LSMs to at least reject this new file operation. [0] https://lkml.kernel.org/r/8adf55db-7bab-f59d-d612-ed906b948d19@schaufler-ca.com Cc: stable@vger.kernel.org Fixes: ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd") Signed-off-by: Luis Chamberlain Acked-by: Jens Axboe Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 1 + include/linux/lsm_hooks.h | 3 +++ include/linux/security.h | 5 +++++ 3 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 806448173033..60fff133c0b1 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -407,4 +407,5 @@ LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) #ifdef CONFIG_IO_URING LSM_HOOK(int, 0, uring_override_creds, const struct cred *new) LSM_HOOK(int, 0, uring_sqpoll, void) +LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd) #endif /* CONFIG_IO_URING */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 84a0d7e02176..3aa6030302f5 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1582,6 +1582,9 @@ * Check whether the current task is allowed to spawn a io_uring polling * thread (IORING_SETUP_SQPOLL). * + * @uring_cmd: + * Check whether the file_operations uring_cmd is allowed to run. + * */ union security_list_options { #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); diff --git a/include/linux/security.h b/include/linux/security.h index 1bc362cb413f..7bd0c490703d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2060,6 +2060,7 @@ static inline int security_perf_event_write(struct perf_event *event) #ifdef CONFIG_SECURITY extern int security_uring_override_creds(const struct cred *new); extern int security_uring_sqpoll(void); +extern int security_uring_cmd(struct io_uring_cmd *ioucmd); #else static inline int security_uring_override_creds(const struct cred *new) { @@ -2069,6 +2070,10 @@ static inline int security_uring_sqpoll(void) { return 0; } +static inline int security_uring_cmd(struct io_uring_cmd *ioucmd) +{ + return 0; +} #endif /* CONFIG_SECURITY */ #endif /* CONFIG_IO_URING */ -- cgit v1.2.3 From 8238b4579866b7c1bb99883cfe102a43db5506ff Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 26 Aug 2022 09:17:08 -0400 Subject: wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka Acked-by: Will Deacon Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 1 + include/linux/buffer_head.h | 2 +- include/linux/wait_bit.h | 8 ++++---- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index cf9bf65039f2..3b89c64bcfd8 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -59,6 +59,7 @@ extern unsigned long __sw_hweight64(__u64 w); #define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) #define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) #define test_bit(nr, addr) bitop(_test_bit, nr, addr) +#define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr) /* * Include this here because some architectures need generic_ffs/fls in diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index def8b8d30ccc..089c9ade4325 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -156,7 +156,7 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh) * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 7dec36aecbd9..7725b7579b78 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } -- cgit v1.2.3 From fcab34b433e2c13e333b2f53c4a8409eadc432c7 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 10 Aug 2022 10:53:59 -0600 Subject: mm: re-allow pinning of zero pfns (again) The below referenced commit makes the same error as 1c563432588d ("mm: fix is_pinnable_page against a cma page"), re-interpreting the logic to exclude pinning of the zero page, which breaks device assignment with vfio. To avoid further subtle mistakes, split the logic into discrete tests. [akpm@linux-foundation.org: simplify comment, per John] Link: https://lkml.kernel.org/r/166015037385.760108.16881097713975517242.stgit@omen Link: https://lore.kernel.org/all/165490039431.944052.12458624139225785964.stgit@omen Fixes: f25cbb7a95a2 ("mm: add zone device coherent type memory support") Signed-off-by: Alex Williamson Suggested-by: Matthew Wilcox Suggested-by: Felix Kuehling Tested-by: Slawomir Laba Reviewed-by: John Hubbard Cc: Alex Sierra Cc: Christoph Hellwig Cc: Alistair Popple Signed-off-by: Andrew Morton --- include/linux/mm.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 982f2607180b..21f8b27bd9fd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1544,9 +1544,16 @@ static inline bool is_longterm_pinnable_page(struct page *page) if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE) return false; #endif - return !(is_device_coherent_page(page) || - is_zone_movable_page(page) || - is_zero_pfn(page_to_pfn(page))); + /* The zero page may always be pinned */ + if (is_zero_pfn(page_to_pfn(page))) + return true; + + /* Coherent device memory must always allow eviction. */ + if (is_device_coherent_page(page)) + return false; + + /* Otherwise, non-movable zone pages can be pinned. */ + return !is_zone_movable_page(page); } #else static inline bool is_longterm_pinnable_page(struct page *page) -- cgit v1.2.3 From dbb16df6443c59e8a1ef21c2272fcf387d600ddf Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 17 Aug 2022 17:21:39 +0000 Subject: Revert "memcg: cleanup racy sum avoidance code" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 96e51ccf1af33e82f429a0d6baebba29c6448d0f. Recently we started running the kernel with rstat infrastructure on production traffic and begin to see negative memcg stats values. Particularly the 'sock' stat is the one which we observed having negative value. $ grep "sock " /mnt/memory/job/memory.stat sock 253952 total_sock 18446744073708724224 Re-run after couple of seconds $ grep "sock " /mnt/memory/job/memory.stat sock 253952 total_sock 53248 For now we are only seeing this issue on large machines (256 CPUs) and only with 'sock' stat. I think the networking stack increase the stat on one cpu and decrease it on another cpu much more often. So, this negative sock is due to rstat flusher flushing the stats on the CPU that has seen the decrement of sock but missed the CPU that has increments. A typical race condition. For easy stable backport, revert is the most simple solution. For long term solution, I am thinking of two directions. First is just reduce the race window by optimizing the rstat flusher. Second is if the reader sees a negative stat value, force flush and restart the stat collection. Basically retry but limited. Link: https://lkml.kernel.org/r/20220817172139.3141101-1-shakeelb@google.com Fixes: 96e51ccf1af33e8 ("memcg: cleanup racy sum avoidance code") Signed-off-by: Shakeel Butt Cc: "Michal Koutný" Cc: Johannes Weiner Cc: Michal Hocko Cc: Roman Gushchin Cc: Muchun Song Cc: David Hildenbrand Cc: Yosry Ahmed Cc: Greg Thelen Cc: [5.15] Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 4d31ce55b1c0..6257867fbf95 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -987,19 +987,30 @@ static inline void mod_memcg_page_state(struct page *page, static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { - return READ_ONCE(memcg->vmstats.state[idx]); + long x = READ_ONCE(memcg->vmstats.state[idx]); +#ifdef CONFIG_SMP + if (x < 0) + x = 0; +#endif + return x; } static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { struct mem_cgroup_per_node *pn; + long x; if (mem_cgroup_disabled()) return node_page_state(lruvec_pgdat(lruvec), idx); pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - return READ_ONCE(pn->lruvec_stats.state[idx]); + x = READ_ONCE(pn->lruvec_stats.state[idx]); +#ifdef CONFIG_SMP + if (x < 0) + x = 0; +#endif + return x; } static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, -- cgit v1.2.3 From dcf8e5633e2e69ad60b730ab5905608b756a032f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 23 Aug 2022 12:59:25 -0700 Subject: tracing: Define the is_signed_type() macro once There are two definitions of the is_signed_type() macro: one in and a second definition in . As suggested by Linus, move the definition of the is_signed_type() macro into the header file. Change the definition of the is_signed_type() macro to make sure that it does not trigger any sparse warnings with future versions of sparse for bitwise types. Link: https://lore.kernel.org/all/CAHk-=whjH6p+qzwUdx5SOVVHjS3WvzJQr6mDUwhEyTf6pJWzaQ@mail.gmail.com/ Link: https://lore.kernel.org/all/CAHk-=wjQGnVfb4jehFR0XyZikdQvCZouE96xR_nnf5kqaM5qqQ@mail.gmail.com/ Cc: Rasmus Villemoes Cc: Steven Rostedt Acked-by: Kees Cook Signed-off-by: Bart Van Assche Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 6 ++++++ include/linux/overflow.h | 1 - include/linux/trace_events.h | 2 -- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 01ce94b58b42..7713d7bcdaea 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -239,6 +239,12 @@ static inline void *offset_to_ptr(const int *off) /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +/* + * Whether 'type' is a signed type or an unsigned type. Supports scalar types, + * bool and also pointer types. + */ +#define is_signed_type(type) (((type)(-1)) < (__force type)1) + /* * This is needed in functions which generate the stack canary, see * arch/x86/kernel/smpboot.c::start_secondary() for an example. diff --git a/include/linux/overflow.h b/include/linux/overflow.h index f1221d11f8e5..0eb3b192f07a 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -30,7 +30,6 @@ * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html - * credit to Christian Biere. */ -#define is_signed_type(type) (((type)(-1)) < (type)1) #define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) #define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) #define type_min(T) ((T)((T)-type_max(T)-(T)1)) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index b18759a673c6..8401dec93c15 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -814,8 +814,6 @@ extern int trace_add_event_call(struct trace_event_call *call); extern int trace_remove_event_call(struct trace_event_call *call); extern int trace_event_get_offsets(struct trace_event_call *call); -#define is_signed_type(type) (((type)(-1)) < (type)1) - int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); int trace_set_clr_event(const char *system, const char *event, int set); int trace_array_set_clr_event(struct trace_array *tr, const char *system, -- cgit v1.2.3 From 25af7406df5915f04d5f1c8f081dabb0ead1cdcc Mon Sep 17 00:00:00 2001 From: Isaac Manjarres Date: Thu, 18 Aug 2022 18:28:51 +0100 Subject: ARM: 9229/1: amba: Fix use-after-free in amba_read_periphid() After commit f2d3b9a46e0e ("ARM: 9220/1: amba: Remove deferred device addition"), it became possible for amba_read_periphid() to be invoked concurrently from two threads for a particular AMBA device. Consider the case where a thread (T0) is registering an AMBA driver, and searching for all of the devices it can match with on the AMBA bus. Suppose that another thread (T1) is executing the deferred probe work, and is searching through all of the AMBA drivers on the bus for a driver that matches a particular AMBA device. Assume that both threads begin operating on the same AMBA device and the device's peripheral ID is still unknown. In this scenario, the amba_match() function will be invoked for the same AMBA device by both threads, which means amba_read_periphid() can also be invoked by both threads, and both threads will be able to manipulate the AMBA device's pclk pointer without any synchronization. It's possible that one thread will initialize the pclk pointer, then the other thread will re-initialize it, overwriting the previous value, and both will race to free the same pclk, resulting in a use-after-free for whichever thread frees the pclk last. Add a lock per AMBA device to synchronize the handling with detecting the peripheral ID to avoid the use-after-free scenario. The following KFENCE bug report helped detect this problem: ================================================================== BUG: KFENCE: use-after-free read in clk_disable+0x14/0x34 Use-after-free read at 0x(ptrval) (in kfence-#19): clk_disable+0x14/0x34 amba_read_periphid+0xdc/0x134 amba_match+0x3c/0x84 __driver_attach+0x20/0x158 bus_for_each_dev+0x74/0xc0 bus_add_driver+0x154/0x1e8 driver_register+0x88/0x11c do_one_initcall+0x8c/0x2fc kernel_init_freeable+0x190/0x220 kernel_init+0x10/0x108 ret_from_fork+0x14/0x3c 0x0 kfence-#19: 0x(ptrval)-0x(ptrval), size=36, cache=kmalloc-64 allocated by task 8 on cpu 0 at 11.629931s: clk_hw_create_clk+0x38/0x134 amba_get_enable_pclk+0x10/0x68 amba_read_periphid+0x28/0x134 amba_match+0x3c/0x84 __device_attach_driver+0x2c/0xc4 bus_for_each_drv+0x80/0xd0 __device_attach+0xb0/0x1f0 bus_probe_device+0x88/0x90 deferred_probe_work_func+0x8c/0xc0 process_one_work+0x23c/0x690 worker_thread+0x34/0x488 kthread+0xd4/0xfc ret_from_fork+0x14/0x3c 0x0 freed by task 8 on cpu 0 at 11.630095s: amba_read_periphid+0xec/0x134 amba_match+0x3c/0x84 __device_attach_driver+0x2c/0xc4 bus_for_each_drv+0x80/0xd0 __device_attach+0xb0/0x1f0 bus_probe_device+0x88/0x90 deferred_probe_work_func+0x8c/0xc0 process_one_work+0x23c/0x690 worker_thread+0x34/0x488 kthread+0xd4/0xfc ret_from_fork+0x14/0x3c 0x0 Cc: Saravana Kannan Cc: patches@armlinux.org.uk Fixes: f2d3b9a46e0e ("ARM: 9220/1: amba: Remove deferred device addition") Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Isaac J. Manjarres Signed-off-by: Russell King (Oracle) --- include/linux/amba/bus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index e94cdf235f1d..5001e14c5c06 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -67,6 +67,7 @@ struct amba_device { struct clk *pclk; struct device_dma_parameters dma_parms; unsigned int periphid; + struct mutex periphid_lock; unsigned int cid; struct amba_cs_uci_id uci; unsigned int irq[AMBA_NR_IRQS]; -- cgit v1.2.3 From 9c6d778800b921bde3bff3cff5003d1650f942d1 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 26 Aug 2022 15:31:32 -0400 Subject: USB: core: Prevent nested device-reset calls Automatic kernel fuzzing revealed a recursive locking violation in usb-storage: ============================================ WARNING: possible recursive locking detected 5.18.0 #3 Not tainted -------------------------------------------- kworker/1:3/1205 is trying to acquire lock: ffff888018638db8 (&us_interface_key[i]){+.+.}-{3:3}, at: usb_stor_pre_reset+0x35/0x40 drivers/usb/storage/usb.c:230 but task is already holding lock: ffff888018638db8 (&us_interface_key[i]){+.+.}-{3:3}, at: usb_stor_pre_reset+0x35/0x40 drivers/usb/storage/usb.c:230 ... stack backtrace: CPU: 1 PID: 1205 Comm: kworker/1:3 Not tainted 5.18.0 #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_deadlock_bug kernel/locking/lockdep.c:2988 [inline] check_deadlock kernel/locking/lockdep.c:3031 [inline] validate_chain kernel/locking/lockdep.c:3816 [inline] __lock_acquire.cold+0x152/0x3ca kernel/locking/lockdep.c:5053 lock_acquire kernel/locking/lockdep.c:5665 [inline] lock_acquire+0x1ab/0x520 kernel/locking/lockdep.c:5630 __mutex_lock_common kernel/locking/mutex.c:603 [inline] __mutex_lock+0x14f/0x1610 kernel/locking/mutex.c:747 usb_stor_pre_reset+0x35/0x40 drivers/usb/storage/usb.c:230 usb_reset_device+0x37d/0x9a0 drivers/usb/core/hub.c:6109 r871xu_dev_remove+0x21a/0x270 drivers/staging/rtl8712/usb_intf.c:622 usb_unbind_interface+0x1bd/0x890 drivers/usb/core/driver.c:458 device_remove drivers/base/dd.c:545 [inline] device_remove+0x11f/0x170 drivers/base/dd.c:537 __device_release_driver drivers/base/dd.c:1222 [inline] device_release_driver_internal+0x1a7/0x2f0 drivers/base/dd.c:1248 usb_driver_release_interface+0x102/0x180 drivers/usb/core/driver.c:627 usb_forced_unbind_intf+0x4d/0xa0 drivers/usb/core/driver.c:1118 usb_reset_device+0x39b/0x9a0 drivers/usb/core/hub.c:6114 This turned out not to be an error in usb-storage but rather a nested device reset attempt. That is, as the rtl8712 driver was being unbound from a composite device in preparation for an unrelated USB reset (that driver does not have pre_reset or post_reset callbacks), its ->remove routine called usb_reset_device() -- thus nesting one reset call within another. Performing a reset as part of disconnect processing is a questionable practice at best. However, the bug report points out that the USB core does not have any protection against nested resets. Adding a reset_in_progress flag and testing it will prevent such errors in the future. Link: https://lore.kernel.org/all/CAB7eexKUpvX-JNiLzhXBDWgfg2T9e9_0Tw4HQ6keN==voRbP0g@mail.gmail.com/ Cc: stable@vger.kernel.org Reported-and-tested-by: Rondreis Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/YwkflDxvg0KWqyZK@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index f7a9914fc97f..9ff1ad4dfad1 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -575,6 +575,7 @@ struct usb3_lpm_parameters { * @devaddr: device address, XHCI: assigned by HW, others: same as devnum * @can_submit: URBs may be submitted * @persist_enabled: USB_PERSIST enabled for this device + * @reset_in_progress: the device is being reset * @have_langid: whether string_langid is valid * @authorized: policy has said we can use it; * (user space) policy determines if we authorize this device to be @@ -662,6 +663,7 @@ struct usb_device { unsigned can_submit:1; unsigned persist_enabled:1; + unsigned reset_in_progress:1; unsigned have_langid:1; unsigned authorized:1; unsigned authenticated:1; -- cgit v1.2.3 From ec1bd37123c607ca6485beb4542a792a4db765aa Mon Sep 17 00:00:00 2001 From: Khalid Masum Date: Thu, 18 Aug 2022 10:07:38 +0600 Subject: fscache: fix misdocumented parameter This patch fixes two warnings generated by make docs. The functions fscache_use_cookie and fscache_unuse_cookie, both have a parameter named cookie. But they are documented with the name "object" with unclear description. Which generates the warning when creating docs. This commit will replace the currently misdocumented parameter names with the correct ones while adding proper descriptions. CC: Randy Dunlap Signed-off-by: Khalid Masum Signed-off-by: David Howells Link: https://lore.kernel.org/r/20220521142446.4746-1-khalid.masum.92@gmail.com/ # v1 Link: https://lore.kernel.org/r/20220818040738.12036-1-khalid.masum.92@gmail.com/ # v2 Link: https://lore.kernel.org/r/880d7d25753fb326ee17ac08005952112fcf9bdb.1657360984.git.mchehab@kernel.org/ # Mauro's version --- include/linux/fscache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 720874e6ee94..36e5dd84cf59 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -258,7 +258,7 @@ struct fscache_cookie *fscache_acquire_cookie(struct fscache_volume *volume, /** * fscache_use_cookie - Request usage of cookie attached to an object - * @object: Object description + * @cookie: The cookie representing the cache object * @will_modify: If cache is expected to be modified locally * * Request usage of the cookie attached to an object. The caller should tell @@ -274,7 +274,7 @@ static inline void fscache_use_cookie(struct fscache_cookie *cookie, /** * fscache_unuse_cookie - Cease usage of cookie attached to an object - * @object: Object description + * @cookie: The cookie representing the cache object * @aux_data: Updated auxiliary data (or NULL) * @object_size: Revised size of the object (or NULL) * -- cgit v1.2.3 From 2555283eb40df89945557273121e9393ef9b542b Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 31 Aug 2022 19:06:00 +0200 Subject: mm/rmap: Fix anon_vma->degree ambiguity leading to double-reuse anon_vma->degree tracks the combined number of child anon_vmas and VMAs that use the anon_vma as their ->anon_vma. anon_vma_clone() then assumes that for any anon_vma attached to src->anon_vma_chain other than src->anon_vma, it is impossible for it to be a leaf node of the VMA tree, meaning that for such VMAs ->degree is elevated by 1 because of a child anon_vma, meaning that if ->degree equals 1 there are no VMAs that use the anon_vma as their ->anon_vma. This assumption is wrong because the ->degree optimization leads to leaf nodes being abandoned on anon_vma_clone() - an existing anon_vma is reused and no new parent-child relationship is created. So it is possible to reuse an anon_vma for one VMA while it is still tied to another VMA. This is an issue because is_mergeable_anon_vma() and its callers assume that if two VMAs have the same ->anon_vma, the list of anon_vmas attached to the VMAs is guaranteed to be the same. When this assumption is violated, vma_merge() can merge pages into a VMA that is not attached to the corresponding anon_vma, leading to dangling page->mapping pointers that will be dereferenced during rmap walks. Fix it by separately tracking the number of child anon_vmas and the number of VMAs using the anon_vma as their ->anon_vma. Fixes: 7a3ef208e662 ("mm: prevent endless growth of anon_vma hierarchy") Cc: stable@kernel.org Acked-by: Michal Hocko Acked-by: Vlastimil Babka Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bf80adca980b..b89b4b86951f 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -41,12 +41,15 @@ struct anon_vma { atomic_t refcount; /* - * Count of child anon_vmas and VMAs which points to this anon_vma. + * Count of child anon_vmas. Equals to the count of all anon_vmas that + * have ->parent pointing to this one, including itself. * * This counter is used for making decision about reusing anon_vma * instead of forking new one. See comments in function anon_vma_clone. */ - unsigned degree; + unsigned long num_children; + /* Count of VMAs whose ->anon_vma pointer points to this object. */ + unsigned long num_active_vmas; struct anon_vma *parent; /* Parent of this anon_vma */ -- cgit v1.2.3 From ac56a0b48da86fd1b4389632fb7c4c8a5d86eefa Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 26 Aug 2022 15:39:28 +0100 Subject: rxrpc: Fix ICMP/ICMP6 error handling Because rxrpc pretends to be a tunnel on top of a UDP/UDP6 socket, allowing it to siphon off UDP packets early in the handling of received UDP packets thereby avoiding the packet going through the UDP receive queue, it doesn't get ICMP packets through the UDP ->sk_error_report() callback. In fact, it doesn't appear that there's any usable option for getting hold of ICMP packets. Fix this by adding a new UDP encap hook to distribute error messages for UDP tunnels. If the hook is set, then the tunnel driver will be able to see ICMP packets. The hook provides the offset into the packet of the UDP header of the original packet that caused the notification. An alternative would be to call the ->error_handler() hook - but that requires that the skbuff be cloned (as ip_icmp_error() or ipv6_cmp_error() do, though isn't really necessary or desirable in rxrpc's case is we want to parse them there and then, not queue them). Changes ======= ver #3) - Fixed an uninitialised variable. ver #2) - Fixed some missing CONFIG_AF_RXRPC_IPV6 conditionals. Fixes: 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook") Signed-off-by: David Howells --- include/linux/udp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 254a2654400f..e96da4157d04 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -70,6 +70,7 @@ struct udp_sock { * For encapsulation sockets. */ int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); + void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset); int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb); void (*encap_destroy)(struct sock *sk); -- cgit v1.2.3 From 3261400639463a853ba2b3be8bd009c2a8089775 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 31 Aug 2022 23:38:09 +0000 Subject: tcp: TX zerocopy should not sense pfmemalloc status We got a recent syzbot report [1] showing a possible misuse of pfmemalloc page status in TCP zerocopy paths. Indeed, for pages coming from user space or other layers, using page_is_pfmemalloc() is moot, and possibly could give false positives. There has been attempts to make page_is_pfmemalloc() more robust, but not using it in the first place in this context is probably better, removing cpu cycles. Note to stable teams : You need to backport 84ce071e38a6 ("net: introduce __skb_fill_page_desc_noacc") as a prereq. Race is more probable after commit c07aea3ef4d4 ("mm: add a signature in struct page") because page_is_pfmemalloc() is now using low order bit from page->lru.next, which can change more often than page->index. Low order bit should never be set for lru.next (when used as an anchor in LRU list), so KCSAN report is mostly a false positive. Backporting to older kernel versions seems not necessary. [1] BUG: KCSAN: data-race in lru_add_fn / tcp_build_frag write to 0xffffea0004a1d2c8 of 8 bytes by task 18600 on cpu 0: __list_add include/linux/list.h:73 [inline] list_add include/linux/list.h:88 [inline] lruvec_add_folio include/linux/mm_inline.h:105 [inline] lru_add_fn+0x440/0x520 mm/swap.c:228 folio_batch_move_lru+0x1e1/0x2a0 mm/swap.c:246 folio_batch_add_and_move mm/swap.c:263 [inline] folio_add_lru+0xf1/0x140 mm/swap.c:490 filemap_add_folio+0xf8/0x150 mm/filemap.c:948 __filemap_get_folio+0x510/0x6d0 mm/filemap.c:1981 pagecache_get_page+0x26/0x190 mm/folio-compat.c:104 grab_cache_page_write_begin+0x2a/0x30 mm/folio-compat.c:116 ext4_da_write_begin+0x2dd/0x5f0 fs/ext4/inode.c:2988 generic_perform_write+0x1d4/0x3f0 mm/filemap.c:3738 ext4_buffered_write_iter+0x235/0x3e0 fs/ext4/file.c:270 ext4_file_write_iter+0x2e3/0x1210 call_write_iter include/linux/fs.h:2187 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x468/0x760 fs/read_write.c:578 ksys_write+0xe8/0x1a0 fs/read_write.c:631 __do_sys_write fs/read_write.c:643 [inline] __se_sys_write fs/read_write.c:640 [inline] __x64_sys_write+0x3e/0x50 fs/read_write.c:640 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffffea0004a1d2c8 of 8 bytes by task 18611 on cpu 1: page_is_pfmemalloc include/linux/mm.h:1740 [inline] __skb_fill_page_desc include/linux/skbuff.h:2422 [inline] skb_fill_page_desc include/linux/skbuff.h:2443 [inline] tcp_build_frag+0x613/0xb20 net/ipv4/tcp.c:1018 do_tcp_sendpages+0x3e8/0xaf0 net/ipv4/tcp.c:1075 tcp_sendpage_locked net/ipv4/tcp.c:1140 [inline] tcp_sendpage+0x89/0xb0 net/ipv4/tcp.c:1150 inet_sendpage+0x7f/0xc0 net/ipv4/af_inet.c:833 kernel_sendpage+0x184/0x300 net/socket.c:3561 sock_sendpage+0x5a/0x70 net/socket.c:1054 pipe_to_sendpage+0x128/0x160 fs/splice.c:361 splice_from_pipe_feed fs/splice.c:415 [inline] __splice_from_pipe+0x222/0x4d0 fs/splice.c:559 splice_from_pipe fs/splice.c:594 [inline] generic_splice_sendpage+0x89/0xc0 fs/splice.c:743 do_splice_from fs/splice.c:764 [inline] direct_splice_actor+0x80/0xa0 fs/splice.c:931 splice_direct_to_actor+0x305/0x620 fs/splice.c:886 do_splice_direct+0xfb/0x180 fs/splice.c:974 do_sendfile+0x3bf/0x910 fs/read_write.c:1249 __do_sys_sendfile64 fs/read_write.c:1317 [inline] __se_sys_sendfile64 fs/read_write.c:1303 [inline] __x64_sys_sendfile64+0x10c/0x150 fs/read_write.c:1303 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x0000000000000000 -> 0xffffea0004a1d288 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 18611 Comm: syz-executor.4 Not tainted 6.0.0-rc2-syzkaller-00248-ge022620b5d05-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/22/2022 Fixes: c07aea3ef4d4 ("mm: add a signature in struct page") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Shakeel Butt Reviewed-by: Shakeel Butt Signed-off-by: David S. Miller --- include/linux/skbuff.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ca8afa382bf2..18e163a3460d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2444,6 +2444,27 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i, skb_shinfo(skb)->nr_frags = i + 1; } +/** + * skb_fill_page_desc_noacc - initialise a paged fragment in an skb + * @skb: buffer containing fragment to be initialised + * @i: paged fragment index to initialise + * @page: the page to use for this fragment + * @off: the offset to the data with @page + * @size: the length of the data + * + * Variant of skb_fill_page_desc() which does not deal with + * pfmemalloc, if page is not owned by us. + */ +static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, + struct page *page, int off, + int size) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + + __skb_fill_page_desc_noacc(shinfo, i, page, off, size); + shinfo->nr_frags = i + 1; +} + void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size, unsigned int truesize); -- cgit v1.2.3 From b30f7c8eb0780e1479a9882526e838664271f4c9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 1 Sep 2022 13:07:32 +0100 Subject: spi: mux: Fix mux interaction with fast path optimisations The spi-mux driver is rather too clever and attempts to resubmit any message that is submitted to it to the parent controller with some adjusted callbacks. This does not play at all nicely with the fast path which now sets flags on the message indicating that it's being handled through the fast path, we see async messages flagged as being on the fast path. Ideally the spi-mux code would duplicate the message but that's rather invasive and a bit fragile in that it relies on the mux knowing which fields in the message to copy. Instead teach the core that there are controllers which can't cope with the fast path and have the mux flag itself as being such a controller, ensuring that messages going via the mux don't get partially handled via the fast path. This will reduce the performance of any spi-mux connected device since we'll now always use the thread for both the actual controller and the mux controller instead of just the actual controller but given that we were always hitting the slow path anyway it's hopefully not too much of an additional cost and it allows us to keep the fast path. Fixes: ae7d2346dc89 ("spi: Don't use the message queue if possible in spi_sync") Reported-by: Casper Andersson Tested-by: Casper Andersson Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220901120732.49245-1-broonie@kernel.org Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e6c73d5ff1a8..f089ee1ead58 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -469,6 +469,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * SPI_TRANS_FAIL_NO_START. * @queue_empty: signal green light for opportunistically skipping the queue * for spi_sync transfers. + * @must_async: disable all fast paths in the core * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals @@ -690,6 +691,7 @@ struct spi_controller { /* Flag for enabling opportunistic skipping of the queue in spi_sync */ bool queue_empty; + bool must_async; }; static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) -- cgit v1.2.3 From 2aec909912da55a6e469fd6ee8412080a5433ed2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Aug 2022 11:46:38 +0200 Subject: wifi: use struct_group to copy addresses We sometimes copy all the addresses from the 802.11 header for the AAD, which may cause complaints from fortify checks. Use struct_group() to avoid the compiler warnings/errors. Change-Id: Ic3ea389105e7813b22095b295079eecdabde5045 Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 55e6f4ad0ca6..b6e6d5b40774 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -310,9 +310,11 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; - u8 addr1[ETH_ALEN]; - u8 addr2[ETH_ALEN]; - u8 addr3[ETH_ALEN]; + struct_group(addrs, + u8 addr1[ETH_ALEN]; + u8 addr2[ETH_ALEN]; + u8 addr3[ETH_ALEN]; + ); __le16 seq_ctrl; u8 addr4[ETH_ALEN]; } __packed __aligned(2); -- cgit v1.2.3 From dec9b2f1e0455a151a7293c367da22ab973f713e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 2 Sep 2022 16:59:15 +0200 Subject: debugfs: add debugfs_lookup_and_remove() There is a very common pattern of using debugfs_remove(debufs_lookup(..)) which results in a dentry leak of the dentry that was looked up. Instead of having to open-code the correct pattern of calling dput() on the dentry, create debugfs_lookup_and_remove() to handle this pattern automatically and properly without any memory leaks. Cc: stable Reported-by: Kuyo Chang Tested-by: Kuyo Chang Link: https://lore.kernel.org/r/YxIaQ8cSinDR881k@kroah.com Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index c869f1e73d75..f60674692d36 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -91,6 +91,8 @@ struct dentry *debugfs_create_automount(const char *name, void debugfs_remove(struct dentry *dentry); #define debugfs_remove_recursive debugfs_remove +void debugfs_lookup_and_remove(const char *name, struct dentry *parent); + const struct file_operations *debugfs_real_fops(const struct file *filp); int debugfs_file_get(struct dentry *dentry); @@ -225,6 +227,10 @@ static inline void debugfs_remove(struct dentry *dentry) static inline void debugfs_remove_recursive(struct dentry *dentry) { } +static inline void debugfs_lookup_and_remove(const char *name, + struct dentry *parent) +{ } + const struct file_operations *debugfs_real_fops(const struct file *filp); static inline int debugfs_file_get(struct dentry *dentry) -- cgit v1.2.3 From 9ca05b0f27de928be121cccf07735819dc9e1ed3 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Mon, 29 Aug 2022 12:02:27 +0300 Subject: RDMA/mlx5: Rely on RoCE fw cap instead of devlink when setting profile When the RDMA auxiliary driver probes, it sets its profile based on devlink driverinit value. The latter might not be in sync with FW yet (In case devlink reload is not performed), thus causing a mismatch between RDMA driver and FW. This results in the following FW syndrome when the RDMA driver tries to adjust RoCE state, which fails the probe: "0xC1F678 | modify_nic_vport_context: roce_en set on a vport that doesn't support roce" To prevent this, select the PF profile based on FW RoCE capability instead of relying on devlink driverinit value. To provide backward compatibility of the RoCE disable feature, on older FW's where roce_rw is not set (FW RoCE capability is read-only), keep the current behavior e.g., rely on devlink driverinit value. Fixes: fbfa97b4d79f ("net/mlx5: Disable roce at HCA level") Reviewed-by: Shay Drory Reviewed-by: Michael Guralnik Reviewed-by: Saeed Mahameed Signed-off-by: Maher Sanalla Link: https://lore.kernel.org/r/cb34ce9a1df4a24c135cb804db87f7d2418bd6cc.1661763459.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 96b16fbe1aa4..d6338fb449c8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1279,16 +1279,17 @@ enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; -static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev) +bool mlx5_is_roce_on(struct mlx5_core_dev *dev); + +static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev) { - struct devlink *devlink = priv_to_devlink(dev); - union devlink_param_value val; - int err; - - err = devlink_param_driverinit_value_get(devlink, - DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, - &val); - return err ? MLX5_CAP_GEN(dev, roce) : val.vbool; + if (MLX5_CAP_GEN(dev, roce_rw_supported)) + return MLX5_CAP_GEN(dev, roce); + + /* If RoCE cap is read-only in FW, get RoCE state from devlink + * in order to support RoCE enable/disable feature + */ + return mlx5_is_roce_on(dev); } #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From 8409fe92d88c332923130149fe209d1c882b286e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sat, 27 Aug 2022 15:03:43 +0200 Subject: PCI: Move PCI_VENDOR_ID_MICROSOFT/PCI_DEVICE_ID_HYPERV_VIDEO definitions to pci_ids.h There are already three places in kernel which define PCI_VENDOR_ID_MICROSOFT and two for PCI_DEVICE_ID_HYPERV_VIDEO and there's a need to use these from core VMBus code. Move the defines where they belong. No functional change. Reviewed-by: Michael Kelley Acked-by: Bjorn Helgaas # pci_ids.h Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20220827130345.1320254-2-vkuznets@redhat.com Signed-off-by: Wei Liu --- include/linux/pci_ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 6feade66efdb..15b49e655ce3 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2079,6 +2079,9 @@ #define PCI_DEVICE_ID_ICE_1712 0x1712 #define PCI_DEVICE_ID_VT1724 0x1724 +#define PCI_VENDOR_ID_MICROSOFT 0x1414 +#define PCI_DEVICE_ID_HYPERV_VIDEO 0x5353 + #define PCI_VENDOR_ID_OXSEMI 0x1415 #define PCI_DEVICE_ID_OXSEMI_12PCI840 0x8403 #define PCI_DEVICE_ID_OXSEMI_PCIe840 0xC000 -- cgit v1.2.3 From 9fc18f6d56d5b79d527c17a8100a0965d18345cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 21 Aug 2022 16:06:44 +0200 Subject: dma-mapping: mark dma_supported static Now that the remaining users in drivers are gone, this function can be marked static. Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 25a30906289d..0ee20b764000 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -139,7 +139,6 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); bool dma_can_mmap(struct device *dev); -int dma_supported(struct device *dev, u64 mask); bool dma_pci_p2pdma_supported(struct device *dev); int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); @@ -248,10 +247,6 @@ static inline bool dma_can_mmap(struct device *dev) { return false; } -static inline int dma_supported(struct device *dev, u64 mask) -{ - return 0; -} static inline bool dma_pci_p2pdma_supported(struct device *dev) { return false; -- cgit v1.2.3 From 2f79cdfe58c13949bbbb65ba5926abfe9561d0ec Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 31 Aug 2022 09:46:12 -0700 Subject: fs: only do a memory barrier for the first set_buffer_uptodate() Commit d4252071b97d ("add barriers to buffer_uptodate and set_buffer_uptodate") added proper memory barriers to the buffer head BH_Uptodate bit, so that anybody who tests a buffer for being up-to-date will be guaranteed to actually see initialized state. However, that commit didn't _just_ add the memory barrier, it also ended up dropping the "was it already set" logic that the BUFFER_FNS() macro had. That's conceptually the right thing for a generic "this is a memory barrier" operation, but in the case of the buffer contents, we really only care about the memory barrier for the _first_ time we set the bit, in that the only memory ordering protection we need is to avoid anybody seeing uninitialized memory contents. Any other access ordering wouldn't be about the BH_Uptodate bit anyway, and would require some other proper lock (typically BH_Lock or the folio lock). A reader that races with somebody invalidating the buffer head isn't an issue wrt the memory ordering, it's a serialization issue. Now, you'd think that the buffer head operations don't matter in this day and age (and I certainly thought so), but apparently some loads still end up being heavy users of buffer heads. In particular, the kernel test robot reported that not having this bit access optimization in place caused a noticeable direct IO performance regression on ext4: fxmark.ssd_ext4_no_jnl_DWTL_54_directio.works/sec -26.5% regression although you presumably need a fast disk and a lot of cores to actually notice. Link: https://lore.kernel.org/all/Yw8L7HTZ%2FdE2%2Fo9C@xsang-OptiPlex-9020/ Reported-by: kernel test robot Tested-by: Fengwei Yin Cc: Mikulas Patocka Cc: Matthew Wilcox (Oracle) Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 089c9ade4325..df518c429667 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -137,6 +137,17 @@ BUFFER_FNS(Defer_Completion, defer_completion) static __always_inline void set_buffer_uptodate(struct buffer_head *bh) { + /* + * If somebody else already set this uptodate, they will + * have done the memory barrier, and a reader will thus + * see *some* valid buffer state. + * + * Any other serialization (with IO errors or whatever that + * might clear the bit) has to come from other state (eg BH_Lock). + */ + if (test_bit(BH_Uptodate, &bh->b_state)) + return; + /* * make it consistent with folio_mark_uptodate * pairs with smp_load_acquire in buffer_uptodate -- cgit v1.2.3 From 9cd4f1434479f1ac25c440c421fbf52069079914 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sun, 11 Sep 2022 11:18:45 +0800 Subject: iommu/vt-d: Fix possible recursive locking in intel_iommu_init() The global rwsem dmar_global_lock was introduced by commit 3a5670e8ac932 ("iommu/vt-d: Introduce a rwsem to protect global data structures"). It is used to protect DMAR related global data from DMAR hotplug operations. The dmar_global_lock used in the intel_iommu_init() might cause recursive locking issue, for example, intel_iommu_get_resv_regions() is taking the dmar_global_lock from within a section where intel_iommu_init() already holds it via probe_acpi_namespace_devices(). Using dmar_global_lock in intel_iommu_init() could be relaxed since it is unlikely that any IO board must be hot added before the IOMMU subsystem is initialized. This eliminates the possible recursive locking issue by moving down DMAR hotplug support after the IOMMU is initialized and removing the uses of dmar_global_lock in intel_iommu_init(). Fixes: d5692d4af08cd ("iommu/vt-d: Fix suspicious RCU usage in probe_acpi_namespace_devices()") Reported-by: Robin Murphy Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/894db0ccae854b35c73814485569b634237b5538.1657034828.git.robin.murphy@arm.com Link: https://lore.kernel.org/r/20220718235325.3952426-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/dmar.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index d81a51978d01..8917a32173c4 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -65,6 +65,7 @@ struct dmar_pci_notify_info { extern struct rw_semaphore dmar_global_lock; extern struct list_head dmar_drhd_units; +extern int intel_iommu_enabled; #define for_each_drhd_unit(drhd) \ list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ @@ -88,7 +89,8 @@ extern struct list_head dmar_drhd_units; static inline bool dmar_rcu_check(void) { return rwsem_is_locked(&dmar_global_lock) || - system_state == SYSTEM_BOOTING; + system_state == SYSTEM_BOOTING || + (IS_ENABLED(CONFIG_INTEL_IOMMU) && !intel_iommu_enabled); } #define dmar_rcu_dereference(p) rcu_dereference_check((p), dmar_rcu_check()) -- cgit v1.2.3 From 4b9d1bc7911c9d9159c4881455c584cde99fbb19 Mon Sep 17 00:00:00 2001 From: Jiangshan Yi Date: Tue, 6 Sep 2022 13:49:01 +0800 Subject: Input: hp_sdc: fix spelling typo in comment Fix spelling typo in comment. Reported-by: k2ci Signed-off-by: Jiangshan Yi Signed-off-by: Helge Deller --- include/linux/hp_sdc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hp_sdc.h b/include/linux/hp_sdc.h index 6f1dee7e67e0..9be8704e2d38 100644 --- a/include/linux/hp_sdc.h +++ b/include/linux/hp_sdc.h @@ -180,7 +180,7 @@ switch (val) { \ #define HP_SDC_CMD_SET_IM 0x40 /* 010xxxxx == set irq mask */ -/* The documents provided do not explicitly state that all registers betweem +/* The documents provided do not explicitly state that all registers between * 0x01 and 0x1f inclusive can be read by sending their register index as a * command, but this is implied and appears to be the case. */ -- cgit v1.2.3