From e1827807e8d9d5b67c21aa565697211df3f23d25 Mon Sep 17 00:00:00 2001 From: Lauri Kasanen Date: Fri, 15 Jan 2021 16:01:02 +0200 Subject: video: fbdev: simplefb: Add "r5g5b5a1" mode Add "r5g5b5a1" which is used on the N64. Signed-off-by: Lauri Kasanen Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20210115160102.cf4c85db9f815758716f086f@gmx.com --- include/linux/platform_data/simplefb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/simplefb.h b/include/linux/platform_data/simplefb.h index ca8337695c2a..27ea99af6e1d 100644 --- a/include/linux/platform_data/simplefb.h +++ b/include/linux/platform_data/simplefb.h @@ -16,6 +16,7 @@ #define SIMPLEFB_FORMATS \ { \ { "r5g6b5", 16, {11, 5}, {5, 6}, {0, 5}, {0, 0}, DRM_FORMAT_RGB565 }, \ + { "r5g5b5a1", 16, {11, 5}, {6, 5}, {1, 5}, {0, 1}, DRM_FORMAT_RGBA5551 }, \ { "x1r5g5b5", 16, {10, 5}, {5, 5}, {0, 5}, {0, 0}, DRM_FORMAT_XRGB1555 }, \ { "a1r5g5b5", 16, {10, 5}, {5, 5}, {0, 5}, {15, 1}, DRM_FORMAT_ARGB1555 }, \ { "r8g8b8", 24, {16, 8}, {8, 8}, {0, 8}, {0, 0}, DRM_FORMAT_RGB888 }, \ -- cgit v1.2.3 From 7621350c6bb20fb6ab7eb988833ab96eac3dcbef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 15 Jan 2021 14:32:39 +0100 Subject: drm/syncobj: make lockdep complain on WAIT_FOR_SUBMIT v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT can't be used when we hold locks since we are basically waiting for userspace to do something. Holding a lock while doing so can trivial deadlock with page faults etc... So make lockdep complain when a driver tries to do this. v2: Add lockdep_assert_none_held() macro. v3: Add might_sleep() and also use lockdep_assert_none_held() in the IOCTL path. Signed-off-by: Christian König Reviewed-by: Daniel Vetter Acked-by: Peter Zijlstra (Intel) Link: https://patchwork.freedesktop.org/patch/414944/ --- include/linux/lockdep.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b9e9adec73e8..6eb117c0d0f3 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -310,6 +310,10 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); WARN_ON_ONCE(debug_locks && !lockdep_is_held(l)); \ } while (0) +#define lockdep_assert_none_held_once() do { \ + WARN_ON_ONCE(debug_locks && current->lockdep_depth); \ + } while (0) + #define lockdep_recursing(tsk) ((tsk)->lockdep_recursion) #define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map) @@ -387,6 +391,7 @@ extern int lockdep_is_held(const void *); #define lockdep_assert_held_write(l) do { (void)(l); } while (0) #define lockdep_assert_held_read(l) do { (void)(l); } while (0) #define lockdep_assert_held_once(l) do { (void)(l); } while (0) +#define lockdep_assert_none_held_once() do { } while (0) #define lockdep_recursing(tsk) (0) -- cgit v1.2.3 From c7f59e3dd60313071a989227dcb69094f499d310 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 19 Jan 2021 20:45:08 +0000 Subject: dma-buf: heaps: Rework heap allocation hooks to return struct dma_buf instead of fd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every heap needs to create a dmabuf and then export it to a fd via dma_buf_fd(), so to consolidate things a bit, have the heaps just return a struct dmabuf * and let the top level dma_heap_buffer_alloc() call handle creating the fd via dma_buf_fd(). Cc: Sumit Semwal Cc: Liam Mark Cc: Laura Abbott Cc: Brian Starkey Cc: Hridya Valsaraju Cc: Suren Baghdasaryan Cc: Sandeep Patil Cc: Daniel Mentz Cc: Chris Goldsworthy Cc: Ørjan Eide Cc: Robin Murphy Cc: Ezequiel Garcia Cc: Simon Ser Cc: James Jones Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: John Stultz Signed-off-by: Sumit Semwal [sumits: minor reword of commit message] Link: https://patchwork.freedesktop.org/patch/msgid/20210119204508.9256-3-john.stultz@linaro.org --- include/linux/dma-heap.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h index 454e354d1ffb..5bc5c946af58 100644 --- a/include/linux/dma-heap.h +++ b/include/linux/dma-heap.h @@ -16,15 +16,15 @@ struct dma_heap; /** * struct dma_heap_ops - ops to operate on a given heap - * @allocate: allocate dmabuf and return fd + * @allocate: allocate dmabuf and return struct dma_buf ptr * - * allocate returns dmabuf fd on success, -errno on error. + * allocate returns dmabuf on success, ERR_PTR(-errno) on error. */ struct dma_heap_ops { - int (*allocate)(struct dma_heap *heap, - unsigned long len, - unsigned long fd_flags, - unsigned long heap_flags); + struct dma_buf *(*allocate)(struct dma_heap *heap, + unsigned long len, + unsigned long fd_flags, + unsigned long heap_flags); }; /** -- cgit v1.2.3 From 5a164ac4dbd21b82bcdc03186d40e455ff467fdc Mon Sep 17 00:00:00 2001 From: Veera Sundaram Sankaran Date: Fri, 15 Jan 2021 16:31:46 -0800 Subject: dma-fence: allow signaling drivers to set fence timestamp Some drivers have hardware capability to get the precise HW timestamp of certain events based on which the fences are triggered. The delta between the event HW timestamp & current HW reference timestamp can be used to calculate the timestamp in kernel's CLOCK_MONOTONIC time domain. This allows it to set accurate timestamp factoring out any software and IRQ latencies. Add a timestamp variant of fence signal function, dma_fence_signal_timestamp to allow drivers to update the precise timestamp for fences. Changes in v2: - Add a new fence signal variant instead of modifying fence struct Changes in v3: - Add timestamp domain information to commit-text and dma_fence_signal_timestamp documentation Signed-off-by: Veera Sundaram Sankaran Reviewed-by: John Stultz Signed-off-by: Sumit Semwal [sumits: minor parenthesis alignment] Link: https://patchwork.freedesktop.org/patch/msgid/1610757107-11892-1-git-send-email-veeras@codeaurora.org --- include/linux/dma-fence.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 09e23adb351d..9f12efaaa93a 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -372,6 +372,9 @@ static inline void __dma_fence_might_wait(void) {} int dma_fence_signal(struct dma_fence *fence); int dma_fence_signal_locked(struct dma_fence *fence); +int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp); +int dma_fence_signal_timestamp_locked(struct dma_fence *fence, + ktime_t timestamp); signed long dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout); int dma_fence_add_callback(struct dma_fence *fence, -- cgit v1.2.3 From 66922f850910056a2a3193388dd69b900e824717 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Sun, 24 Jan 2021 05:40:10 +0100 Subject: drm: Fix HDMI_STATIC_METADATA_TYPE1 constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the CTA 861.G spec, HDMI_STATIC_METADATA_TYPE1 is not 1, but zero, so fix this enum. While this doesn't cause problems in the kernel yet, as the constant isn't actively used by drivers yet, it did create confusion while debugging HDR problems in yours truly, and also potential bugs in userspace components, as the wrong enum propagates to components, e.g., like it did already into intel-gpu-tools (tests/kms_hdr.c) or is used as wrong reference when writing future new userspace HDR components like compositors. Fixes: fbb5d0353c62 ("drm: Add HDR source metadata property") Signed-off-by: Mario Kleiner Cc: Uma Shankar Cc: Shashank Sharma Cc: Ville Syrjälä Reviewed-by: Simon Ser Signed-off-by: Simon Ser Link: https://patchwork.freedesktop.org/patch/msgid/20210124044010.18678-1-mario.kleiner.de@gmail.com --- include/linux/hdmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index 9850d59d6f1c..c8ec982ff498 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -156,7 +156,7 @@ enum hdmi_content_type { }; enum hdmi_metadata_type { - HDMI_STATIC_METADATA_TYPE1 = 1, + HDMI_STATIC_METADATA_TYPE1 = 0, }; enum hdmi_eotf { -- cgit v1.2.3 From 2463e073497385ef63c220571013a2b89e9b95cc Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 18 Feb 2021 20:49:41 +0000 Subject: netdevice: Add missing IFF_PHONY_HEADROOM self-definition This is harmless for now, but can be fatal for future refactors. Fixes: 871b642adebe3 ("netdev: introduce ndo_set_rx_headroom") Signed-off-by: Alexander Lobakin Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210218204908.5455-2-alobakin@pm.me --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ddf4cfc12615..3b6f82c2c271 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1577,6 +1577,7 @@ enum netdev_priv_flags { #define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE #define IFF_TEAM IFF_TEAM #define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED +#define IFF_PHONY_HEADROOM IFF_PHONY_HEADROOM #define IFF_MACSEC IFF_MACSEC #define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER #define IFF_FAILOVER IFF_FAILOVER -- cgit v1.2.3 From c2ff53d8049f30098153cd2d1299a44d7b124c57 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 18 Feb 2021 20:50:02 +0000 Subject: net: Add priv_flags for allow tx skb without linear In some cases, we hope to construct skb directly based on the existing memory without copying data. In this case, the page will be placed directly in the skb, and the linear space of skb is empty. But unfortunately, many the network card does not support this operation. For example Mellanox Technologies MT27710 Family [ConnectX-4 Lx] will get the following error message: mlx5_core 0000:3b:00.1 eth1: Error cqe on cqn 0x817, ci 0x8, qn 0x1dbb, opcode 0xd, syndrome 0x1, vendor syndrome 0x68 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000030: 00 00 00 00 60 10 68 01 0a 00 1d bb 00 0f 9f d2 WQE DUMP: WQ size 1024 WQ cur size 0, WQE index 0xf, len: 64 00000000: 00 00 0f 0a 00 1d bb 03 00 00 00 08 00 00 00 00 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000020: 00 00 00 2b 00 08 00 00 00 00 00 05 9e e3 08 00 00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 mlx5_core 0000:3b:00.1 eth1: ERR CQE on SQ: 0x1dbb So a priv_flag is added here to indicate whether the network card supports this feature. Suggested-by: Alexander Lobakin Signed-off-by: Xuan Zhuo Signed-off-by: Alexander Lobakin Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210218204908.5455-3-alobakin@pm.me --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3b6f82c2c271..6cef47b76cc6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1518,6 +1518,8 @@ struct net_device_ops { * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running + * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with + * skb_headlen(skb) == 0 (data starts from frag0) */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1551,6 +1553,7 @@ enum netdev_priv_flags { IFF_FAILOVER_SLAVE = 1<<28, IFF_L3MDEV_RX_HANDLER = 1<<29, IFF_LIVE_RENAME_OK = 1<<30, + IFF_TX_SKB_NO_LINEAR = 1<<31, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1584,6 +1587,7 @@ enum netdev_priv_flags { #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER #define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK +#define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR /** * struct net_device - The DEVICE structure. -- cgit v1.2.3 From 98cd02da467cb4a319b7041d6242de8a4a33e833 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 9 Feb 2021 19:48:17 +0000 Subject: dma-buf: dma-heap: Provide accessor to get heap name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It can be useful to access the name for the heap, so provide an accessor to do so. Cc: Daniel Vetter Cc: Sumit Semwal Cc: Liam Mark Cc: Chris Goldsworthy Cc: Laura Abbott Cc: Brian Starkey Cc: Hridya Valsaraju Cc: Suren Baghdasaryan Cc: Sandeep Patil Cc: Daniel Mentz Cc: Ørjan Eide Cc: Robin Murphy Cc: Ezequiel Garcia Cc: Simon Ser Cc: James Jones Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: John Stultz Acked-by: Daniel Vetter Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20210209194818.2459062-1-john.stultz@linaro.org --- include/linux/dma-heap.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h index 5bc5c946af58..0c05561cad6e 100644 --- a/include/linux/dma-heap.h +++ b/include/linux/dma-heap.h @@ -50,6 +50,15 @@ struct dma_heap_export_info { */ void *dma_heap_get_drvdata(struct dma_heap *heap); +/** + * dma_heap_get_name() - get heap name + * @heap: DMA-Heap to retrieve private data for + * + * Returns: + * The char* for the heap name. + */ +const char *dma_heap_get_name(struct dma_heap *heap); + /** * dma_heap_add - adds a heap to dmabuf heaps * @exp_info: information needed to register this heap -- cgit v1.2.3 From f00bdce0455233a0b76dae6364442dca717a574c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 23 Feb 2021 14:19:04 +0800 Subject: vdpa: set the virtqueue num during register This patch delay the queue number setting to vDPA device registering. This allows us to probe the virtqueue numbers between device allocation and registering. Reviewed-by: Stefano Garzarella Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20210223061905.422659-3-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 4ab5494503a8..15fa085fab05 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -250,20 +250,20 @@ struct vdpa_config_ops { struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, - int nvqs, size_t size, const char *name); + size_t size, const char *name); -#define vdpa_alloc_device(dev_struct, member, parent, config, nvqs, name) \ +#define vdpa_alloc_device(dev_struct, member, parent, config, name) \ container_of(__vdpa_alloc_device( \ - parent, config, nvqs, \ + parent, config, \ sizeof(dev_struct) + \ BUILD_BUG_ON_ZERO(offsetof( \ dev_struct, member)), name), \ dev_struct, member) -int vdpa_register_device(struct vdpa_device *vdev); +int vdpa_register_device(struct vdpa_device *vdev, int nvqs); void vdpa_unregister_device(struct vdpa_device *vdev); -int _vdpa_register_device(struct vdpa_device *vdev); +int _vdpa_register_device(struct vdpa_device *vdev, int nvqs); void _vdpa_unregister_device(struct vdpa_device *vdev); /** -- cgit v1.2.3 From a10787e6d58c24b51e91c19c6d16c5da89fcaa4b Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 25 Feb 2021 15:43:14 -0800 Subject: bpf: Enable task local storage for tracing programs To access per-task data, BPF programs usually creates a hash table with pid as the key. This is not ideal because: 1. The user need to estimate the proper size of the hash table, which may be inaccurate; 2. Big hash tables are slow; 3. To clean up the data properly during task terminations, the user need to write extra logic. Task local storage overcomes these issues and offers a better option for these per-task data. Task local storage is only available to BPF_LSM. Now enable it for tracing programs. Unlike LSM programs, tracing programs can be called in IRQ contexts. Helpers that access task local storage are updated to use raw_spin_lock_irqsave() instead of raw_spin_lock_bh(). Tracing programs can attach to functions on the task free path, e.g. exit_creds(). To avoid allocating task local storage after bpf_task_storage_free(). bpf_task_storage_get() is updated to not allocate new storage when the task is not refcounted (task->usage == 0). Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: KP Singh Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210225234319.336131-2-songliubraving@fb.com --- include/linux/bpf.h | 7 +++++++ include/linux/bpf_lsm.h | 22 ---------------------- include/linux/bpf_types.h | 2 +- include/linux/sched.h | 5 +++++ 4 files changed, 13 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cccaef1088ea..e2cfc4809219 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1499,6 +1499,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); +void bpf_task_storage_free(struct task_struct *task); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -1684,6 +1685,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) { return NULL; } + +static inline void bpf_task_storage_free(struct task_struct *task) +{ +} #endif /* CONFIG_BPF_SYSCALL */ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, @@ -1886,6 +1891,8 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; extern const struct bpf_func_proto bpf_sock_from_file_proto; extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; +extern const struct bpf_func_proto bpf_task_storage_get_proto; +extern const struct bpf_func_proto bpf_task_storage_delete_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 0d1c33ace398..479c101546ad 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode( return inode->i_security + bpf_lsm_blob_sizes.lbs_inode; } -static inline struct bpf_storage_blob *bpf_task( - const struct task_struct *task) -{ - if (unlikely(!task->security)) - return NULL; - - return task->security + bpf_lsm_blob_sizes.lbs_task; -} - extern const struct bpf_func_proto bpf_inode_storage_get_proto; extern const struct bpf_func_proto bpf_inode_storage_delete_proto; -extern const struct bpf_func_proto bpf_task_storage_get_proto; -extern const struct bpf_func_proto bpf_task_storage_delete_proto; void bpf_inode_storage_free(struct inode *inode); -void bpf_task_storage_free(struct task_struct *task); #else /* !CONFIG_BPF_LSM */ @@ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode( return NULL; } -static inline struct bpf_storage_blob *bpf_task( - const struct task_struct *task) -{ - return NULL; -} - static inline void bpf_inode_storage_free(struct inode *inode) { } -static inline void bpf_task_storage_free(struct task_struct *task) -{ -} - #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 99f7fd657d87..b9edee336d80 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -109,8 +109,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) #endif #ifdef CONFIG_BPF_LSM BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) #endif +BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) diff --git a/include/linux/sched.h b/include/linux/sched.h index 4d568288abf9..e5fbf8e6952a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -42,6 +42,7 @@ struct audit_context; struct backing_dev_info; struct bio_list; struct blk_plug; +struct bpf_local_storage; struct capture_control; struct cfs_rq; struct fs_struct; @@ -1348,6 +1349,10 @@ struct task_struct { /* Used by LSM modules for access restriction: */ void *security; #endif +#ifdef CONFIG_BPF_SYSCALL + /* Used by BPF task local storage */ + struct bpf_local_storage __rcu *bpf_storage; +#endif #ifdef CONFIG_GCC_PLUGIN_STACKLEAK unsigned long lowest_stack; -- cgit v1.2.3 From bc235cdb423a2daed6f337676006a66557429cd1 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 25 Feb 2021 15:43:15 -0800 Subject: bpf: Prevent deadlock from recursive bpf_task_storage_[get|delete] BPF helpers bpf_task_storage_[get|delete] could hold two locks: bpf_local_storage_map_bucket->lock and bpf_local_storage->lock. Calling these helpers from fentry/fexit programs on functions in bpf_*_storage.c may cause deadlock on either locks. Prevent such deadlock with a per cpu counter, bpf_task_storage_busy. We need this counter to be global, because the two locks here belong to two different objects: bpf_local_storage_map and bpf_local_storage. If we pick one of them as the owner of the counter, it is still possible to trigger deadlock on the other lock. For example, if bpf_local_storage_map owns the counters, it cannot prevent deadlock on bpf_local_storage->lock when two maps are used. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210225234319.336131-3-songliubraving@fb.com --- include/linux/bpf_local_storage.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index b2c9463f36a1..b902c580c48d 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -126,7 +126,8 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, bool cacheit_lockit); -void bpf_local_storage_map_free(struct bpf_local_storage_map *smap); +void bpf_local_storage_map_free(struct bpf_local_storage_map *smap, + int __percpu *busy_counter); int bpf_local_storage_map_check_btf(const struct bpf_map *map, const struct btf *btf, -- cgit v1.2.3 From 523a4cf491b3c9e2d546040d57250f1a0ca84f03 Mon Sep 17 00:00:00 2001 From: Dmitrii Banshchikov Date: Fri, 26 Feb 2021 00:26:29 +0400 Subject: bpf: Use MAX_BPF_FUNC_REG_ARGS macro Instead of using integer literal here and there use macro name for better context. Signed-off-by: Dmitrii Banshchikov Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210225202629.585485-1-me@ubique.spb.ru --- include/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e2cfc4809219..ae2c35641619 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -506,6 +506,11 @@ enum bpf_cgroup_storage_type { */ #define MAX_BPF_FUNC_ARGS 12 +/* The maximum number of arguments passed through registers + * a single function may have. + */ +#define MAX_BPF_FUNC_REG_ARGS 5 + struct btf_func_model { u8 ret_size; u8 nr_args; -- cgit v1.2.3 From 887596095ec2a9ea39ffcf98f27bf2e77c5eb512 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 23 Feb 2021 10:49:26 -0800 Subject: bpf: Clean up sockmap related Kconfigs As suggested by John, clean up sockmap related Kconfigs: Reduce the scope of CONFIG_BPF_STREAM_PARSER down to TCP stream parser, to reflect its name. Make the rest sockmap code simply depend on CONFIG_BPF_SYSCALL and CONFIG_INET, the latter is still needed at this point because of TCP/UDP proto update. And leave CONFIG_NET_SOCK_MSG untouched, as it is used by non-sockmap cases. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Reviewed-by: Lorenz Bauer Acked-by: John Fastabend Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20210223184934.6054-2-xiyou.wangcong@gmail.com --- include/linux/bpf.h | 26 ++++++++++++-------------- include/linux/bpf_types.h | 6 ++---- include/linux/skmsg.h | 18 ++++++++++++++++++ 3 files changed, 32 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ae2c35641619..2be47ada5f2d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1778,7 +1778,7 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) } #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ -#if defined(CONFIG_BPF_STREAM_PARSER) +#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, struct bpf_prog *old, u32 which); int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); @@ -1786,7 +1786,18 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); void sock_map_unhash(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); + +void bpf_sk_reuseport_detach(struct sock *sk); +int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, + void *value); +int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags); #else +static inline void bpf_sk_reuseport_detach(struct sock *sk) +{ +} + +#ifdef CONFIG_BPF_SYSCALL static inline int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, struct bpf_prog *old, u32 which) @@ -1811,20 +1822,7 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void { return -EOPNOTSUPP; } -#endif /* CONFIG_BPF_STREAM_PARSER */ -#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) -void bpf_sk_reuseport_detach(struct sock *sk); -int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, - void *value); -int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags); -#else -static inline void bpf_sk_reuseport_detach(struct sock *sk) -{ -} - -#ifdef CONFIG_BPF_SYSCALL static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, void *value) { diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index b9edee336d80..f883f01a5061 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -103,10 +103,6 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) -#if defined(CONFIG_BPF_STREAM_PARSER) -BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) -#endif #ifdef CONFIG_BPF_LSM BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) #endif @@ -116,6 +112,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) #endif #ifdef CONFIG_INET +BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) #endif #endif diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 8edbbf5f2f93..db7a08be4725 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -71,7 +71,9 @@ struct sk_psock_link { }; struct sk_psock_parser { +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) struct strparser strp; +#endif bool enabled; void (*saved_data_ready)(struct sock *sk); }; @@ -305,9 +307,25 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err) struct sk_psock *sk_psock_init(struct sock *sk, int node); +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock); void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock); +#else +static inline int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock) +{ + return -EOPNOTSUPP; +} + +static inline void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) +{ +} + +static inline void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) +{ +} +#endif + void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock); void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock); -- cgit v1.2.3 From 5a685cd94b21a88efa6be77169eddef525368034 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 23 Feb 2021 10:49:27 -0800 Subject: skmsg: Get rid of struct sk_psock_parser struct sk_psock_parser is embedded in sk_psock, it is unnecessary as skb verdict also uses ->saved_data_ready. We can simply fold these fields into sk_psock, and get rid of ->enabled. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20210223184934.6054-3-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index db7a08be4725..22e26f82de33 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -70,14 +70,6 @@ struct sk_psock_link { void *link_raw; }; -struct sk_psock_parser { -#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) - struct strparser strp; -#endif - bool enabled; - void (*saved_data_ready)(struct sock *sk); -}; - struct sk_psock_work_state { struct sk_buff *skb; u32 len; @@ -92,7 +84,9 @@ struct sk_psock { u32 eval; struct sk_msg *cork; struct sk_psock_progs progs; - struct sk_psock_parser parser; +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) + struct strparser strp; +#endif struct sk_buff_head ingress_skb; struct list_head ingress_msg; unsigned long state; @@ -102,6 +96,7 @@ struct sk_psock { void (*saved_unhash)(struct sock *sk); void (*saved_close)(struct sock *sk, long timeout); void (*saved_write_space)(struct sock *sk); + void (*saved_data_ready)(struct sock *sk); struct proto *sk_proto; struct sk_psock_work_state work_state; struct work_struct work; @@ -418,8 +413,8 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock) { - if (psock->parser.enabled) - psock->parser.saved_data_ready(sk); + if (psock->saved_data_ready) + psock->saved_data_ready(sk); else sk->sk_data_ready(sk); } @@ -458,6 +453,6 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) { if (!psock) return false; - return psock->parser.enabled; + return !!psock->saved_data_ready; } #endif /* _LINUX_SKMSG_H */ -- cgit v1.2.3 From e3526bb92a2084cdaec6cb2855bcec98b280426c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 23 Feb 2021 10:49:29 -0800 Subject: skmsg: Move sk_redir from TCP_SKB_CB to skb Currently TCP_SKB_CB() is hard-coded in skmsg code, it certainly does not work for any other non-TCP protocols. We can move them to skb ext, but it introduces a memory allocation on fast path. Fortunately, we only need to a word-size to store all the information, because the flags actually only contains 1 bit so can be just packed into the lowest bit of the "pointer", which is stored as unsigned long. Inside struct sk_buff, '_skb_refdst' can be reused because skb dst is no longer needed after ->sk_data_ready() so we can just drop it. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20210223184934.6054-5-xiyou.wangcong@gmail.com --- include/linux/skbuff.h | 3 +++ include/linux/skmsg.h | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6d0a33d1c0db..bd84f799c952 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -755,6 +755,9 @@ struct sk_buff { void (*destructor)(struct sk_buff *skb); }; struct list_head tcp_tsorted_anchor; +#ifdef CONFIG_NET_SOCK_MSG + unsigned long _sk_redir; +#endif }; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 22e26f82de33..e0de45527bb6 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -455,4 +455,42 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) return false; return !!psock->saved_data_ready; } + +#if IS_ENABLED(CONFIG_NET_SOCK_MSG) + +/* We only have one bit so far. */ +#define BPF_F_PTR_MASK ~(BPF_F_INGRESS) + +static inline bool skb_bpf_ingress(const struct sk_buff *skb) +{ + unsigned long sk_redir = skb->_sk_redir; + + return sk_redir & BPF_F_INGRESS; +} + +static inline void skb_bpf_set_ingress(struct sk_buff *skb) +{ + skb->_sk_redir |= BPF_F_INGRESS; +} + +static inline void skb_bpf_set_redir(struct sk_buff *skb, struct sock *sk_redir, + bool ingress) +{ + skb->_sk_redir = (unsigned long)sk_redir; + if (ingress) + skb->_sk_redir |= BPF_F_INGRESS; +} + +static inline struct sock *skb_bpf_redirect_fetch(const struct sk_buff *skb) +{ + unsigned long sk_redir = skb->_sk_redir; + + return (struct sock *)(sk_redir & BPF_F_PTR_MASK); +} + +static inline void skb_bpf_redirect_clear(struct sk_buff *skb) +{ + skb->_sk_redir = 0; +} +#endif /* CONFIG_NET_SOCK_MSG */ #endif /* _LINUX_SKMSG_H */ -- cgit v1.2.3 From ae8b8332fbb512f53bf50ff6a7586dd0f90ed18a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 23 Feb 2021 10:49:30 -0800 Subject: sock_map: Rename skb_parser and skb_verdict These two eBPF programs are tied to BPF_SK_SKB_STREAM_PARSER and BPF_SK_SKB_STREAM_VERDICT, rename them to reflect the fact they are only used for TCP. And save the name 'skb_verdict' for general use later. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Reviewed-by: Lorenz Bauer Acked-by: John Fastabend Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20210223184934.6054-6-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index e0de45527bb6..d9f6ec4a9cf2 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -56,8 +56,8 @@ struct sk_msg { struct sk_psock_progs { struct bpf_prog *msg_parser; - struct bpf_prog *skb_parser; - struct bpf_prog *skb_verdict; + struct bpf_prog *stream_parser; + struct bpf_prog *stream_verdict; }; enum sk_psock_state_bits { @@ -443,8 +443,8 @@ static inline int psock_replace_prog(struct bpf_prog **pprog, static inline void psock_progs_drop(struct sk_psock_progs *progs) { psock_set_prog(&progs->msg_parser, NULL); - psock_set_prog(&progs->skb_parser, NULL); - psock_set_prog(&progs->skb_verdict, NULL); + psock_set_prog(&progs->stream_parser, NULL); + psock_set_prog(&progs->stream_verdict, NULL); } int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb); -- cgit v1.2.3 From 4675e234b9e15159894b90ead9340e1dc202b670 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 23 Feb 2021 10:49:31 -0800 Subject: sock_map: Make sock_map_prog_update() static It is only used within sock_map.c so can become static. Suggested-by: Jakub Sitnicki Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20210223184934.6054-7-xiyou.wangcong@gmail.com --- include/linux/bpf.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2be47ada5f2d..e1e4d2f60527 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1779,8 +1779,6 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) -int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - struct bpf_prog *old, u32 which); int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); @@ -1798,13 +1796,6 @@ static inline void bpf_sk_reuseport_detach(struct sock *sk) } #ifdef CONFIG_BPF_SYSCALL -static inline int sock_map_prog_update(struct bpf_map *map, - struct bpf_prog *prog, - struct bpf_prog *old, u32 which) -{ - return -EOPNOTSUPP; -} - static inline int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) { -- cgit v1.2.3 From cd81cefb1abc52bd164f4d9760cd22eadc0e4468 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 23 Feb 2021 10:49:32 -0800 Subject: skmsg: Make __sk_psock_purge_ingress_msg() static It is only used within skmsg.c so can become static. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20210223184934.6054-8-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index d9f6ec4a9cf2..676d48e08159 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -340,8 +340,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link) struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock); -void __sk_psock_purge_ingress_msg(struct sk_psock *psock); - static inline void sk_psock_cork_free(struct sk_psock *psock) { if (psock->cork) { -- cgit v1.2.3 From ff9614b81be65d648ec4615b593c6e4b2dac6375 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 23 Feb 2021 10:49:34 -0800 Subject: skmsg: Remove unused sk_psock_stop() declaration It is not defined or used anywhere. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20210223184934.6054-10-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 676d48e08159..6c09d94be2e9 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -400,7 +400,6 @@ static inline struct sk_psock *sk_psock_get(struct sock *sk) return psock; } -void sk_psock_stop(struct sock *sk, struct sk_psock *psock); void sk_psock_drop(struct sock *sk, struct sk_psock *psock); static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) -- cgit v1.2.3 From 69c087ba6225b574afb6e505b72cb75242a3d844 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 26 Feb 2021 12:49:25 -0800 Subject: bpf: Add bpf_for_each_map_elem() helper The bpf_for_each_map_elem() helper is introduced which iterates all map elements with a callback function. The helper signature looks like long bpf_for_each_map_elem(map, callback_fn, callback_ctx, flags) and for each map element, the callback_fn will be called. For example, like hashmap, the callback signature may look like long callback_fn(map, key, val, callback_ctx) There are two known use cases for this. One is from upstream ([1]) where a for_each_map_elem helper may help implement a timeout mechanism in a more generic way. Another is from our internal discussion for a firewall use case where a map contains all the rules. The packet data can be compared to all these rules to decide allow or deny the packet. For array maps, users can already use a bounded loop to traverse elements. Using this helper can avoid using bounded loop. For other type of maps (e.g., hash maps) where bounded loop is hard or impossible to use, this helper provides a convenient way to operate on all elements. For callback_fn, besides map and map element, a callback_ctx, allocated on caller stack, is also passed to the callback function. This callback_ctx argument can provide additional input and allow to write to caller stack for output. If the callback_fn returns 0, the helper will iterate through next element if available. If the callback_fn returns 1, the helper will stop iterating and returns to the bpf program. Other return values are not used for now. Currently, this helper is only available with jit. It is possible to make it work with interpreter with so effort but I leave it as the future work. [1]: https://lore.kernel.org/bpf/20210122205415.113822-1-xiyou.wangcong@gmail.com/ Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210226204925.3884923-1-yhs@fb.com --- include/linux/bpf.h | 13 +++++++++++++ include/linux/bpf_verifier.h | 3 +++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e1e4d2f60527..aeb1b93a4d75 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -39,6 +39,7 @@ struct bpf_local_storage; struct bpf_local_storage_map; struct kobject; struct mem_cgroup; +struct bpf_func_state; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -129,6 +130,13 @@ struct bpf_map_ops { bool (*map_meta_equal)(const struct bpf_map *meta0, const struct bpf_map *meta1); + + int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee); + int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn, + void *callback_ctx, u64 flags); + /* BTF name and id of struct allocated by map_alloc */ const char * const map_btf_name; int *map_btf_id; @@ -295,6 +303,8 @@ enum bpf_arg_type { ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ + ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ + ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ __BPF_ARG_TYPE_MAX, }; @@ -411,6 +421,8 @@ enum bpf_reg_type { PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ + PTR_TO_FUNC, /* reg points to a bpf program function */ + PTR_TO_MAP_KEY, /* reg points to a map element key */ }; /* The information passed from prog-specific *_is_valid_access @@ -1887,6 +1899,7 @@ extern const struct bpf_func_proto bpf_sock_from_file_proto; extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; extern const struct bpf_func_proto bpf_task_storage_get_proto; extern const struct bpf_func_proto bpf_task_storage_delete_proto; +extern const struct bpf_func_proto bpf_for_each_map_elem_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 971b33aca13d..51c2ffa3d901 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -68,6 +68,8 @@ struct bpf_reg_state { unsigned long raw1; unsigned long raw2; } raw; + + u32 subprogno; /* for PTR_TO_FUNC */ }; /* For PTR_TO_PACKET, used to find other pointers with the same variable * offset, so they can share range knowledge. @@ -204,6 +206,7 @@ struct bpf_func_state { int acquired_refs; struct bpf_reference_state *refs; int allocated_stack; + bool in_callback_fn; struct bpf_stack_state *stack; }; -- cgit v1.2.3 From 314ee05e2fc601a7bece14376547d2b7a04bab67 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 26 Feb 2021 12:49:27 -0800 Subject: bpf: Add hashtab support for bpf_for_each_map_elem() helper This patch added support for hashmap, percpu hashmap, lru hashmap and percpu lru hashmap. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210226204927.3885020-1-yhs@fb.com --- include/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index aeb1b93a4d75..4c730863fa77 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1397,6 +1397,10 @@ void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux, int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info); +int map_set_for_each_callback_args(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee); + int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, -- cgit v1.2.3 From e940e0895a82c6fbaa259f2615eb52b57ee91a7e Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 26 Feb 2021 10:24:56 +0100 Subject: can: skb: can_skb_set_owner(): fix ref counting if socket was closed before setting skb ownership There are two ref count variables controlling the free()ing of a socket: - struct sock::sk_refcnt - which is changed by sock_hold()/sock_put() - struct sock::sk_wmem_alloc - which accounts the memory allocated by the skbs in the send path. In case there are still TX skbs on the fly and the socket() is closed, the struct sock::sk_refcnt reaches 0. In the TX-path the CAN stack clones an "echo" skb, calls sock_hold() on the original socket and references it. This produces the following back trace: | WARNING: CPU: 0 PID: 280 at lib/refcount.c:25 refcount_warn_saturate+0x114/0x134 | refcount_t: addition on 0; use-after-free. | Modules linked in: coda_vpu(E) v4l2_jpeg(E) videobuf2_vmalloc(E) imx_vdoa(E) | CPU: 0 PID: 280 Comm: test_can.sh Tainted: G E 5.11.0-04577-gf8ff6603c617 #203 | Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) | Backtrace: | [<80bafea4>] (dump_backtrace) from [<80bb0280>] (show_stack+0x20/0x24) r7:00000000 r6:600f0113 r5:00000000 r4:81441220 | [<80bb0260>] (show_stack) from [<80bb593c>] (dump_stack+0xa0/0xc8) | [<80bb589c>] (dump_stack) from [<8012b268>] (__warn+0xd4/0x114) r9:00000019 r8:80f4a8c2 r7:83e4150c r6:00000000 r5:00000009 r4:80528f90 | [<8012b194>] (__warn) from [<80bb09c4>] (warn_slowpath_fmt+0x88/0xc8) r9:83f26400 r8:80f4a8d1 r7:00000009 r6:80528f90 r5:00000019 r4:80f4a8c2 | [<80bb0940>] (warn_slowpath_fmt) from [<80528f90>] (refcount_warn_saturate+0x114/0x134) r8:00000000 r7:00000000 r6:82b44000 r5:834e5600 r4:83f4d540 | [<80528e7c>] (refcount_warn_saturate) from [<8079a4c8>] (__refcount_add.constprop.0+0x4c/0x50) | [<8079a47c>] (__refcount_add.constprop.0) from [<8079a57c>] (can_put_echo_skb+0xb0/0x13c) | [<8079a4cc>] (can_put_echo_skb) from [<8079ba98>] (flexcan_start_xmit+0x1c4/0x230) r9:00000010 r8:83f48610 r7:0fdc0000 r6:0c080000 r5:82b44000 r4:834e5600 | [<8079b8d4>] (flexcan_start_xmit) from [<80969078>] (netdev_start_xmit+0x44/0x70) r9:814c0ba0 r8:80c8790c r7:00000000 r6:834e5600 r5:82b44000 r4:82ab1f00 | [<80969034>] (netdev_start_xmit) from [<809725a4>] (dev_hard_start_xmit+0x19c/0x318) r9:814c0ba0 r8:00000000 r7:82ab1f00 r6:82b44000 r5:00000000 r4:834e5600 | [<80972408>] (dev_hard_start_xmit) from [<809c6584>] (sch_direct_xmit+0xcc/0x264) r10:834e5600 r9:00000000 r8:00000000 r7:82b44000 r6:82ab1f00 r5:834e5600 r4:83f27400 | [<809c64b8>] (sch_direct_xmit) from [<809c6c0c>] (__qdisc_run+0x4f0/0x534) To fix this problem, only set skb ownership to sockets which have still a ref count > 0. Fixes: 0ae89beb283a ("can: add destructor for self generated skbs") Cc: Oliver Hartkopp Cc: Andre Naujoks Link: https://lore.kernel.org/r/20210226092456.27126-1-o.rempel@pengutronix.de Suggested-by: Eric Dumazet Signed-off-by: Oleksij Rempel Reviewed-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index 685f34cfba20..d438eb058069 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -65,8 +65,12 @@ static inline void can_skb_reserve(struct sk_buff *skb) static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) { - if (sk) { - sock_hold(sk); + /* If the socket has already been closed by user space, the + * refcount may already be 0 (and the socket will be freed + * after the last TX skb has been freed). So only increase + * socket refcount if the refcount is > 0. + */ + if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) { skb->destructor = sock_efree; skb->sk = sk; } -- cgit v1.2.3 From 79bfe480a0a0b259ab9fddcd2fe52c03542b1196 Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Tue, 16 Feb 2021 00:58:49 +0900 Subject: firmware: xilinx: Remove zynqmp_pm_get_eemi_ops() in IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE) zynqmp_pm_get_eemi_ops() was removed in commit 4db8180ffe7c: "Firmware: xilinx: Remove eemi ops for fpga related APIs", but not in IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE). Any driver who want to communicate with PMC using EEMI APIs use the functions provided for each function This removed zynqmp_pm_get_eemi_ops() in IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE), and also modify the documentation for this driver. Fixes: 4db8180ffe7c ("firmware: xilinx: Remove eemi ops for fpga related APIs") Signed-off-by: Nobuhiro Iwamatsu Link: https://lore.kernel.org/r/20210215155849.2425846-1-iwamatsu@nigauri.org Signed-off-by: Michal Simek --- include/linux/firmware/xlnx-zynqmp.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 71177b17eee5..66e2423d9feb 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -354,11 +354,6 @@ int zynqmp_pm_read_pggs(u32 index, u32 *value); int zynqmp_pm_system_shutdown(const u32 type, const u32 subtype); int zynqmp_pm_set_boot_health_status(u32 value); #else -static inline struct zynqmp_eemi_ops *zynqmp_pm_get_eemi_ops(void) -{ - return ERR_PTR(-ENODEV); -} - static inline int zynqmp_pm_get_api_version(u32 *version) { return -ENODEV; -- cgit v1.2.3 From b228c9b058760500fda5edb3134527f629fc2dc3 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 1 Mar 2021 15:09:44 +0000 Subject: net: expand textsearch ts_state to fit skb_seq_state The referenced commit expands the skb_seq_state used by skb_find_text with a 4B frag_off field, growing it to 48B. This exceeds container ts_state->cb, causing a stack corruption: [ 73.238353] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: skb_find_text+0xc5/0xd0 [ 73.247384] CPU: 1 PID: 376 Comm: nping Not tainted 5.11.0+ #4 [ 73.252613] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 [ 73.260078] Call Trace: [ 73.264677] dump_stack+0x57/0x6a [ 73.267866] panic+0xf6/0x2b7 [ 73.270578] ? skb_find_text+0xc5/0xd0 [ 73.273964] __stack_chk_fail+0x10/0x10 [ 73.277491] skb_find_text+0xc5/0xd0 [ 73.280727] string_mt+0x1f/0x30 [ 73.283639] ipt_do_table+0x214/0x410 The struct is passed between skb_find_text and its callbacks skb_prepare_seq_read, skb_seq_read and skb_abort_seq read through the textsearch interface using TS_SKB_CB. I assumed that this mapped to skb->cb like other .._SKB_CB wrappers. skb->cb is 48B. But it maps to ts_state->cb, which is only 40B. skb->cb was increased from 40B to 48B after ts_state was introduced, in commit 3e3850e989c5 ("[NETFILTER]: Fix xfrm lookup in ip_route_me_harder/ip6_route_me_harder"). Increase ts_state.cb[] to 48 to fit the struct. Also add a BUILD_BUG_ON to avoid a repeat. The alternative is to directly add a dependency from textsearch onto linux/skbuff.h, but I think the intent is textsearch to have no such dependencies on its callers. Link: https://bugzilla.kernel.org/show_bug.cgi?id=211911 Fixes: 97550f6fa592 ("net: compound page support in skb_seq_read") Reported-by: Kris Karas Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/textsearch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h index 13770cfe33ad..6673e4d4ac2e 100644 --- a/include/linux/textsearch.h +++ b/include/linux/textsearch.h @@ -23,7 +23,7 @@ struct ts_config; struct ts_state { unsigned int offset; - char cb[40]; + char cb[48]; }; /** -- cgit v1.2.3 From c95c34f01bbda4421c25fdc9b04a4a4aab10d36c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 3 Mar 2021 19:56:34 +0100 Subject: xsk: Remove dangling function declaration from header file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xdp_umem_query() is dead for a long time, drop the declaration from include/linux/netdevice.h Fixes: c9b47cc1fabc ("xsk: fix bug when trying to use both copy and zero-copy on one queue id") Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20210303185636.18070-2-maciej.fijalkowski@intel.com --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f06fbee8638e..5b67ea89d5f2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3959,8 +3959,6 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); -int xdp_umem_query(struct net_device *dev, u16 queue_id); - int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb); -- cgit v1.2.3 From 6ed6e1c761f6c8391af654facbbbf1748ae9f386 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 1 Mar 2021 10:48:05 -0800 Subject: skmsg: Add function doc for skb->_sk_redir This should fix the following warning: include/linux/skbuff.h:932: warning: Function parameter or member '_sk_redir' not described in 'sk_buff' Reported-by: Lorenz Bauer Signed-off-by: Cong Wang Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20210301184805.8174-1-xiyou.wangcong@gmail.com --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bd84f799c952..0503c917d773 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -656,6 +656,7 @@ typedef unsigned char *sk_buff_data_t; * @protocol: Packet protocol from driver * @destructor: Destruct function * @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue) + * @_sk_redir: socket redirection information for skmsg * @_nfct: Associated connection, if any (with nfctinfo bits) * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on -- cgit v1.2.3 From 4ec591790356f0e5a95f8d278b0cfd04aea2ae52 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 22 Jan 2021 10:33:05 +0800 Subject: scsi: sbitmap: Remove sbitmap_clear_bit_unlock No one uses this helper any more, so kill it. Link: https://lore.kernel.org/r/20210122023317.687987-2-ming.lei@redhat.com Cc: Omar Sandoval Cc: Kashyap Desai Cc: Sumanesh Samanta Cc: Ewan D. Milne Cc: Hannes Reinecke Tested-by: Sumanesh Samanta Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- include/linux/sbitmap.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 74cc6384715e..16353fbee765 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -315,12 +315,6 @@ static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int b set_bit(SB_NR_TO_BIT(sb, bitnr), addr); } -static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb, - unsigned int bitnr) -{ - clear_bit_unlock(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); -} - static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) { return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); -- cgit v1.2.3 From efe1f3a1d5833c0ddd61ee50dbef8908f65a0a5e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 22 Jan 2021 10:33:06 +0800 Subject: scsi: sbitmap: Maintain allocation round_robin in sbitmap Currently the allocation round_robin info is maintained by sbitmap_queue. However, bit allocation really belongs to sbitmap. Move it there. Link: https://lore.kernel.org/r/20210122023317.687987-3-ming.lei@redhat.com Cc: Omar Sandoval Cc: Kashyap Desai Cc: Sumanesh Samanta Cc: Ewan D. Milne Cc: Hannes Reinecke Cc: virtualization@lists.linux-foundation.org Tested-by: Sumanesh Samanta Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- include/linux/sbitmap.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 16353fbee765..734ee6214cd6 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -56,6 +56,11 @@ struct sbitmap { */ unsigned int map_nr; + /** + * @round_robin: Allocate bits in strict round-robin order. + */ + bool round_robin; + /** * @map: Allocated bitmap. */ @@ -124,11 +129,6 @@ struct sbitmap_queue { */ atomic_t ws_active; - /** - * @round_robin: Allocate bits in strict round-robin order. - */ - bool round_robin; - /** * @min_shallow_depth: The minimum shallow depth which may be passed to * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow(). @@ -144,11 +144,14 @@ struct sbitmap_queue { * given, a good default is chosen. * @flags: Allocation flags. * @node: Memory node to allocate on. + * @round_robin: If true, be stricter about allocation order; always allocate + * starting from the last allocated bit. This is less efficient + * than the default behavior (false). * * Return: Zero on success or negative errno on failure. */ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, - gfp_t flags, int node); + gfp_t flags, int node, bool round_robin); /** * sbitmap_free() - Free memory used by a &struct sbitmap. @@ -174,15 +177,12 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth); * sbitmap_get() - Try to allocate a free bit from a &struct sbitmap. * @sb: Bitmap to allocate from. * @alloc_hint: Hint for where to start searching for a free bit. - * @round_robin: If true, be stricter about allocation order; always allocate - * starting from the last allocated bit. This is less efficient - * than the default behavior (false). * * This operation provides acquire barrier semantics if it succeeds. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ -int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin); +int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint); /** * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, -- cgit v1.2.3 From c548e62bcf6adc7066ff201e9ecc88e536dd8890 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 22 Jan 2021 10:33:08 +0800 Subject: scsi: sbitmap: Move allocation hint into sbitmap Allocation hint should have belonged to sbitmap. Also, when sbitmap's depth is high and there is no need to use mulitple wakeup queues, user can benefit from percpu allocation hint too. Move allocation hint into sbitmap, then SCSI device queue can benefit from allocation hint when converting to plain sbitmap. Convert vhost/scsi.c to use sbitmap allocation with percpu alloc hint. This is more efficient than the previous approach. Link: https://lore.kernel.org/r/20210122023317.687987-5-ming.lei@redhat.com Cc: Omar Sandoval Cc: Kashyap Desai Cc: Sumanesh Samanta Cc: Ewan D. Milne Cc: Mike Christie Cc: virtualization@lists.linux-foundation.org Tested-by: Sumanesh Samanta Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- include/linux/sbitmap.h | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 734ee6214cd6..247776fcc02c 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -65,6 +65,14 @@ struct sbitmap { * @map: Allocated bitmap. */ struct sbitmap_word *map; + + /* + * @alloc_hint: Cache of last successfully allocated or freed bit. + * + * This is per-cpu, which allows multiple users to stick to different + * cachelines until the map is exhausted. + */ + unsigned int __percpu *alloc_hint; }; #define SBQ_WAIT_QUEUES 8 @@ -100,14 +108,6 @@ struct sbitmap_queue { */ struct sbitmap sb; - /* - * @alloc_hint: Cache of last successfully allocated or freed bit. - * - * This is per-cpu, which allows multiple users to stick to different - * cachelines until the map is exhausted. - */ - unsigned int __percpu *alloc_hint; - /** * @wake_batch: Number of bits which must be freed before we wake up any * waiters. @@ -147,11 +147,13 @@ struct sbitmap_queue { * @round_robin: If true, be stricter about allocation order; always allocate * starting from the last allocated bit. This is less efficient * than the default behavior (false). + * @alloc_hint: If true, apply percpu hint for where to start searching for + * a free bit. * * Return: Zero on success or negative errno on failure. */ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, - gfp_t flags, int node, bool round_robin); + gfp_t flags, int node, bool round_robin, bool alloc_hint); /** * sbitmap_free() - Free memory used by a &struct sbitmap. @@ -159,6 +161,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, */ static inline void sbitmap_free(struct sbitmap *sb) { + free_percpu(sb->alloc_hint); kfree(sb->map); sb->map = NULL; } @@ -176,19 +179,17 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth); /** * sbitmap_get() - Try to allocate a free bit from a &struct sbitmap. * @sb: Bitmap to allocate from. - * @alloc_hint: Hint for where to start searching for a free bit. * * This operation provides acquire barrier semantics if it succeeds. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ -int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint); +int sbitmap_get(struct sbitmap *sb); /** * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, * limiting the depth used from each word. * @sb: Bitmap to allocate from. - * @alloc_hint: Hint for where to start searching for a free bit. * @shallow_depth: The maximum number of bits to allocate from a single word. * * This rather specific operation allows for having multiple users with @@ -200,8 +201,7 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint); * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ -int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, - unsigned long shallow_depth); +int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth); /** * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap. @@ -315,6 +315,18 @@ static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int b set_bit(SB_NR_TO_BIT(sb, bitnr), addr); } +/* + * Pair of sbitmap_get, and this one applies both cleared bit and + * allocation hint. + */ +static inline void sbitmap_put(struct sbitmap *sb, unsigned int bitnr) +{ + sbitmap_deferred_clear_bit(sb, bitnr); + + if (likely(sb->alloc_hint && !sb->round_robin && bitnr < sb->depth)) + *this_cpu_ptr(sb->alloc_hint) = bitnr; +} + static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) { return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); @@ -363,7 +375,6 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) { kfree(sbq->ws); - free_percpu(sbq->alloc_hint); sbitmap_free(&sbq->sb); } -- cgit v1.2.3 From cbb9950b41dd9dfb7c2be3429ba09f83b8b1ff98 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 22 Jan 2021 10:33:09 +0800 Subject: scsi: sbitmap: Export sbitmap_weight SCSI's .device_busy will be converted to sbitmap and sbitmap_weight is needed. Export the helper. The only existing user of sbitmap_weight() uses it to find out how many bits are set and not cleared. Align sbitmap_weight() meaning with this usage model. Link: https://lore.kernel.org/r/20210122023317.687987-6-ming.lei@redhat.com Cc: Omar Sandoval Cc: Kashyap Desai Cc: Sumanesh Samanta Cc: Ewan D. Milne Tested-by: Sumanesh Samanta Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- include/linux/sbitmap.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 247776fcc02c..c65ba887dcc3 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -341,6 +341,16 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) */ void sbitmap_show(struct sbitmap *sb, struct seq_file *m); + +/** + * sbitmap_weight() - Return how many set and not cleared bits in a &struct + * sbitmap. + * @sb: Bitmap to check. + * + * Return: How many set and not cleared bits set + */ +unsigned int sbitmap_weight(const struct sbitmap *sb); + /** * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct * seq_file. -- cgit v1.2.3 From 2d13b1ea9f4affdaa7af0e0e4a1358d28f80c54f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 22 Jan 2021 10:33:10 +0800 Subject: scsi: sbitmap: Add sbitmap_calculate_shift() helper Move code for calculating default shift into a public helper which can be used by SCSI. Link: https://lore.kernel.org/r/20210122023317.687987-7-ming.lei@redhat.com Cc: Omar Sandoval Cc: Kashyap Desai Cc: Sumanesh Samanta Cc: Ewan D. Milne Tested-by: Sumanesh Samanta Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- include/linux/sbitmap.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index c65ba887dcc3..3087e1f15fdd 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -332,6 +332,24 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } +static inline int sbitmap_calculate_shift(unsigned int depth) +{ + int shift = ilog2(BITS_PER_LONG); + + /* + * If the bitmap is small, shrink the number of bits per word so + * we spread over a few cachelines, at least. If less than 4 + * bits, just forget about it, it's not going to work optimally + * anyway. + */ + if (depth >= 4) { + while ((4U << shift) > depth) + shift--; + } + + return shift; +} + /** * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file. * @sb: Bitmap to show. -- cgit v1.2.3 From d022d18c045fc2ccf92d0f14cf80f98eb0a8e119 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 22 Jan 2021 10:33:11 +0800 Subject: scsi: blk-mq: Add callbacks for storing & retrieving budget token Since SCSI is the only driver which requires dispatch budget move the token from struct request to struct scsi_cmnd. Link: https://lore.kernel.org/r/20210122023317.687987-8-ming.lei@redhat.com Cc: Omar Sandoval Cc: Kashyap Desai Cc: Sumanesh Samanta Cc: Ewan D. Milne Cc: Hannes Reinecke Tested-by: Sumanesh Samanta Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- include/linux/blk-mq.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2c473c9b8990..5fae401f083d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -313,6 +313,15 @@ struct blk_mq_ops { */ void (*put_budget)(struct request_queue *); + /* + * @set_rq_budget_toekn: store rq's budget token + */ + void (*set_rq_budget_token)(struct request *, int); + /* + * @get_rq_budget_toekn: retrieve rq's budget token + */ + int (*get_rq_budget_token)(struct request *); + /** * @timeout: Called on request timeout. */ -- cgit v1.2.3 From 2a5a24aa83382a88c43d18a901fab66e6ffe1199 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 22 Jan 2021 10:33:12 +0800 Subject: scsi: blk-mq: Return budget token from .get_budget callback SCSI uses a global atomic variable to track queue depth for each LUN/request queue. This doesn't scale well when there are lots of CPU cores and the disk is very fast. It has been observed that IOPS is affected a lot by tracking queue depth via sdev->device_busy in the I/O path. Return budget token from .get_budget callback. The budget token can be passed to driver so that we can replace the atomic variable with sbitmap_queue and alleviate the scaling problems that way. Link: https://lore.kernel.org/r/20210122023317.687987-9-ming.lei@redhat.com Cc: Omar Sandoval Cc: Kashyap Desai Cc: Sumanesh Samanta Cc: Ewan D. Milne Tested-by: Sumanesh Samanta Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- include/linux/blk-mq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 5fae401f083d..3bd3ee651143 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -306,12 +306,12 @@ struct blk_mq_ops { * reserved budget. Also we have to handle failure case * of .get_budget for avoiding I/O deadlock. */ - bool (*get_budget)(struct request_queue *); + int (*get_budget)(struct request_queue *); /** * @put_budget: Release the reserved budget. */ - void (*put_budget)(struct request_queue *); + void (*put_budget)(struct request_queue *, int); /* * @set_rq_budget_toekn: store rq's budget token -- cgit v1.2.3 From 7c32e8f8bc33a5f4b113a630857e46634e3e143b Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Wed, 3 Mar 2021 10:18:13 +0000 Subject: bpf: Add PROG_TEST_RUN support for sk_lookup programs Allow to pass sk_lookup programs to PROG_TEST_RUN. User space provides the full bpf_sk_lookup struct as context. Since the context includes a socket pointer that can't be exposed to user space we define that PROG_TEST_RUN returns the cookie of the selected socket or zero in place of the socket pointer. We don't support testing programs that select a reuseport socket, since this would mean running another (unrelated) BPF program from the sk_lookup test handler. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210303101816.36774-3-lmb@cloudflare.com --- include/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4c730863fa77..c931bc97019d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1491,6 +1491,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); @@ -1692,6 +1695,13 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, return -ENOTSUPP; } +static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + static inline void bpf_map_put(struct bpf_map *map) { } -- cgit v1.2.3 From 183f47fcaa54a5ffe671d990186d330ac8c63b10 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 18 Feb 2021 18:31:24 +0100 Subject: kcov: Remove kcov include from sched.h and move it to its users. The recent addition of in_serving_softirq() to kconv.h results in compile failure on PREEMPT_RT because it requires task_struct::softirq_disable_cnt. This is not available if kconv.h is included from sched.h. It is not needed to include kconv.h from sched.h. All but the net/ user already include the kconv header file. Move the include of the kconv.h header from sched.h it its users. Additionally include sched.h from kconv.h to ensure that everything task_struct related is available. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Johannes Berg Acked-by: Andrey Konovalov Link: https://lkml.kernel.org/r/20210218173124.iy5iyqv3a4oia4vv@linutronix.de --- include/linux/kcov.h | 1 + include/linux/sched.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 4e3037dc1204..55dc338f6bcd 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -2,6 +2,7 @@ #ifndef _LINUX_KCOV_H #define _LINUX_KCOV_H +#include #include struct task_struct; diff --git a/include/linux/sched.h b/include/linux/sched.h index ef00bb22164c..cf245bc237e7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From e7fcd762282332f765af2035a9568fb126fa3c01 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Wed, 3 Mar 2021 11:46:56 +0800 Subject: psi: Add PSI_CPU_FULL state The FULL state doesn't exist for the CPU resource at the system level, but exist at the cgroup level, means all non-idle tasks in a cgroup are delayed on the CPU resource which used by others outside of the cgroup or throttled by the cgroup cpu.max configuration. Co-developed-by: Muchun Song Signed-off-by: Muchun Song Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Johannes Weiner Link: https://lkml.kernel.org/r/20210303034659.91735-2-zhouchengming@bytedance.com --- include/linux/psi_types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index b95f3211566a..0a23300d49af 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -50,9 +50,10 @@ enum psi_states { PSI_MEM_SOME, PSI_MEM_FULL, PSI_CPU_SOME, + PSI_CPU_FULL, /* Only per-CPU, to weigh the CPU in the global average: */ PSI_NONIDLE, - NR_PSI_STATES = 6, + NR_PSI_STATES = 7, }; enum psi_aggregators { -- cgit v1.2.3 From 7fae6c8171d20ac55402930ee8ae760cf85dff7b Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Wed, 3 Mar 2021 11:46:57 +0800 Subject: psi: Use ONCPU state tracking machinery to detect reclaim Move the reclaim detection from the timer tick to the task state tracking machinery using the recently added ONCPU state. And we also add task psi_flags changes checking in the psi_task_switch() optimization to update the parents properly. In terms of performance and cost, this ONCPU task state tracking is not cheaper than previous timer tick in aggregate. But the code is simpler and shorter this way, so it's a maintainability win. And Johannes did some testing with perf bench, the performace and cost changes would be acceptable for real workloads. Thanks to Johannes Weiner for pointing out the psi_task_switch() optimization things and the clearer changelog. Co-developed-by: Muchun Song Signed-off-by: Muchun Song Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Johannes Weiner Link: https://lkml.kernel.org/r/20210303034659.91735-3-zhouchengming@bytedance.com --- include/linux/psi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/psi.h b/include/linux/psi.h index 7361023f3fdd..65eb1476ac70 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -20,7 +20,6 @@ void psi_task_change(struct task_struct *task, int clear, int set); void psi_task_switch(struct task_struct *prev, struct task_struct *next, bool sleep); -void psi_memstall_tick(struct task_struct *task, int cpu); void psi_memstall_enter(unsigned long *flags); void psi_memstall_leave(unsigned long *flags); -- cgit v1.2.3 From cbe16f35bee6880becca6f20d2ebf6b457148552 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 3 Mar 2021 11:49:15 +1300 Subject: genirq: Add IRQF_NO_AUTOEN for request_irq/nmi() Many drivers don't want interrupts enabled automatically via request_irq(). So they are handling this issue by either way of the below two: (1) irq_set_status_flags(irq, IRQ_NOAUTOEN); request_irq(dev, irq...); (2) request_irq(dev, irq...); disable_irq(irq); The code in the second way is silly and unsafe. In the small time gap between request_irq() and disable_irq(), interrupts can still come. The code in the first way is safe though it's subobtimal. Add a new IRQF_NO_AUTOEN flag which can be handed in by drivers to request_irq() and request_nmi(). It prevents the automatic enabling of the requested interrupt/nmi in the same safe way as #1 above. With that the various usage sites of #1 and #2 above can be simplified and corrected. Signed-off-by: Barry Song Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Cc: dmitry.torokhov@gmail.com Link: https://lore.kernel.org/r/20210302224916.13980-2-song.bao.hua@hisilicon.com --- include/linux/interrupt.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 967e25767153..76f1161a441a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -61,6 +61,9 @@ * interrupt handler after suspending interrupts. For system * wakeup devices users need to implement wakeup detection in * their interrupt handlers. + * IRQF_NO_AUTOEN - Don't enable IRQ or NMI automatically when users request it. + * Users will enable it explicitly by enable_irq() or enable_nmi() + * later. */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 @@ -74,6 +77,7 @@ #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 #define IRQF_COND_SUSPEND 0x00040000 +#define IRQF_NO_AUTOEN 0x00080000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) -- cgit v1.2.3 From 3e31f94752e454bdd0ca4a1d046ee21f80c166c5 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 26 Feb 2021 17:06:58 -0700 Subject: lockdep: Add lockdep_assert_not_held() Some kernel functions must be called without holding a specific lock. Add lockdep_assert_not_held() to be used in these functions to detect incorrect calls while holding a lock. lockdep_assert_not_held() provides the opposite functionality of lockdep_assert_held() which is used to assert calls that require holding a specific lock. Incorporates suggestions from Peter Zijlstra to avoid misfires when lockdep_off() is employed. The need for lockdep_assert_not_held() came up in a discussion on ath10k patch. ath10k_drain_tx() and i915_vma_pin_ww() are examples of functions that can use lockdep_assert_not_held(). Signed-off-by: Shuah Khan Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/linux-wireless/871rdmu9z9.fsf@codeaurora.org/ --- include/linux/lockdep.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 7b7ebf2e28ec..dbd9ea846b36 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -301,8 +301,12 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) -#define lockdep_assert_held(l) do { \ - WARN_ON(debug_locks && !lockdep_is_held(l)); \ +#define lockdep_assert_held(l) do { \ + WARN_ON(debug_locks && lockdep_is_held(l) == 0); \ + } while (0) + +#define lockdep_assert_not_held(l) do { \ + WARN_ON(debug_locks && lockdep_is_held(l) == 1); \ } while (0) #define lockdep_assert_held_write(l) do { \ @@ -393,7 +397,8 @@ extern int lockdep_is_held(const void *); #define lockdep_is_held_type(l, r) (1) #define lockdep_assert_held(l) do { (void)(l); } while (0) -#define lockdep_assert_held_write(l) do { (void)(l); } while (0) +#define lockdep_assert_not_held(l) do { (void)(l); } while (0) +#define lockdep_assert_held_write(l) do { (void)(l); } while (0) #define lockdep_assert_held_read(l) do { (void)(l); } while (0) #define lockdep_assert_held_once(l) do { (void)(l); } while (0) -- cgit v1.2.3 From f8cfa46608f8aa5ca5421ce281ab314129c15411 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 26 Feb 2021 17:06:59 -0700 Subject: lockdep: Add lockdep lock state defines Adds defines for lock state returns from lock_is_held_type() based on Johannes Berg's suggestions as it make it easier to read and maintain the lock states. These are defines and a enum to avoid changes to lock_is_held_type() and lockdep_is_held() return types. Updates to lock_is_held_type() and __lock_is_held() to use the new defines. Signed-off-by: Shuah Khan Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/linux-wireless/871rdmu9z9.fsf@codeaurora.org/ --- include/linux/lockdep.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index dbd9ea846b36..17805aac0e85 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -268,6 +268,11 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, extern void lock_release(struct lockdep_map *lock, unsigned long ip); +/* lock_is_held_type() returns */ +#define LOCK_STATE_UNKNOWN -1 +#define LOCK_STATE_NOT_HELD 0 +#define LOCK_STATE_HELD 1 + /* * Same "read" as for lock_acquire(), except -1 means any. */ @@ -302,11 +307,13 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) #define lockdep_assert_held(l) do { \ - WARN_ON(debug_locks && lockdep_is_held(l) == 0); \ + WARN_ON(debug_locks && \ + lockdep_is_held(l) == LOCK_STATE_NOT_HELD); \ } while (0) #define lockdep_assert_not_held(l) do { \ - WARN_ON(debug_locks && lockdep_is_held(l) == 1); \ + WARN_ON(debug_locks && \ + lockdep_is_held(l) == LOCK_STATE_HELD); \ } while (0) #define lockdep_assert_held_write(l) do { \ -- cgit v1.2.3 From a5398bffc01fe044848c5024e5e867e407f239b8 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 30 Nov 2020 11:38:40 -0800 Subject: perf/core: Flush PMU internal buffers for per-CPU events Sometimes the PMU internal buffers have to be flushed for per-CPU events during a context switch, e.g., large PEBS. Otherwise, the perf tool may report samples in locations that do not belong to the process where the samples are processed in, because PEBS does not tag samples with PID/TID. The current code only flush the buffers for a per-task event. It doesn't check a per-CPU event. Add a new event state flag, PERF_ATTACH_SCHED_CB, to indicate that the PMU internal buffers have to be flushed for this event during a context switch. Add sched_cb_entry and perf_sched_cb_usages back to track the PMU/cpuctx which is required to be flushed. Only need to invoke the sched_task() for per-CPU events in this patch. The per-task events have been handled in perf_event_context_sched_in/out already. Fixes: 9c964efa4330 ("perf/x86/intel: Drain the PEBS buffer during context switches") Reported-by: Gabriel Marin Originally-by: Namhyung Kim Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20201130193842.10569-1-kan.liang@linux.intel.com --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fab42cfbd350..3f7f89ea5e51 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -606,6 +606,7 @@ struct swevent_hlist { #define PERF_ATTACH_TASK 0x04 #define PERF_ATTACH_TASK_DATA 0x08 #define PERF_ATTACH_ITRACE 0x10 +#define PERF_ATTACH_SCHED_CB 0x20 struct perf_cgroup; struct perf_buffer; @@ -872,6 +873,7 @@ struct perf_cpu_context { struct list_head cgrp_cpuctx_entry; #endif + struct list_head sched_cb_entry; int sched_cb_usage; int online; -- cgit v1.2.3 From 291c4011dd7ac0cd0cebb727a75ee5a50d16dcf7 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sat, 20 Feb 2021 15:17:10 -0800 Subject: cpumask: Mark functions as pure cpumask_next_and() and cpumask_any_but() are pure, and marking them as such seems to generate different and presumably better code for native_flush_tlb_multi(). Signed-off-by: Nadav Amit Signed-off-by: Ingo Molnar Reviewed-by: Dave Hansen Link: https://lore.kernel.org/r/20210220231712.2475218-8-namit@vmware.com --- include/linux/cpumask.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 383684e30f12..c53364c4296d 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -235,7 +235,7 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp) return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits); } -unsigned int cpumask_next(int n, const struct cpumask *srcp); +unsigned int __pure cpumask_next(int n, const struct cpumask *srcp); /** * cpumask_next_zero - get the next unset cpu in a cpumask @@ -252,8 +252,8 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); } -int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); -int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); +int __pure cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); +int __pure cpumask_any_but(const struct cpumask *mask, unsigned int cpu); unsigned int cpumask_local_spread(unsigned int i, int node); int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p); -- cgit v1.2.3 From a5aa5ce300597224ec76dacc8e63ba3ad7a18bbd Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sat, 20 Feb 2021 15:17:12 -0800 Subject: smp: Inline on_each_cpu_cond() and on_each_cpu() Simplify the code and avoid having an additional function on the stack by inlining on_each_cpu_cond() and on_each_cpu(). Suggested-by: Peter Zijlstra Signed-off-by: Nadav Amit [ Minor edits. ] Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210220231712.2475218-10-namit@vmware.com --- include/linux/smp.h | 50 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 70c6f6284dcf..84a0b4828f66 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -50,30 +50,52 @@ extern unsigned int total_cpus; int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, int wait); +void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, + void *info, bool wait, const struct cpumask *mask); + +int smp_call_function_single_async(int cpu, call_single_data_t *csd); + /* * Call a function on all processors */ -void on_each_cpu(smp_call_func_t func, void *info, int wait); +static inline void on_each_cpu(smp_call_func_t func, void *info, int wait) +{ + on_each_cpu_cond_mask(NULL, func, info, wait, cpu_online_mask); +} -/* - * Call a function on processors specified by mask, which might include - * the local one. +/** + * on_each_cpu_mask(): Run a function on processors specified by + * cpumask, which may include the local processor. + * @mask: The set of cpus to run on (only runs on online subset). + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @wait: If true, wait (atomically) until function has completed + * on other CPUs. + * + * If @wait is true, then returns once @func has returned. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. The + * exception is that it may be used during early boot while + * early_boot_irqs_disabled is set. */ -void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, - void *info, bool wait); +static inline void on_each_cpu_mask(const struct cpumask *mask, + smp_call_func_t func, void *info, bool wait) +{ + on_each_cpu_cond_mask(NULL, func, info, wait, mask); +} /* * Call a function on each processor for which the supplied function * cond_func returns a positive value. This may include the local - * processor. + * processor. May be used during early boot while early_boot_irqs_disabled is + * set. Use local_irq_save/restore() instead of local_irq_disable/enable(). */ -void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func, - void *info, bool wait); - -void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, - void *info, bool wait, const struct cpumask *mask); - -int smp_call_function_single_async(int cpu, call_single_data_t *csd); +static inline void on_each_cpu_cond(smp_cond_func_t cond_func, + smp_call_func_t func, void *info, bool wait) +{ + on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask); +} #ifdef CONFIG_SMP -- cgit v1.2.3 From baf186c4d345f5a105e63df01100936ad622f369 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 6 Mar 2021 11:02:15 +0000 Subject: io_uring: index io_uring->xa by ctx not file We don't use task file notes anymore, and no need left in indexing task->io_uring->xa by file, and replace it with ctx. It's better design-wise, especially since we keep a dangling file, and so have to keep an eye on not dereferencing it. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 7cb7bd0e334c..9761a0ec9f95 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -18,7 +18,7 @@ struct io_uring_task { /* submission side */ struct xarray xa; struct wait_queue_head wait; - struct file *last; + void *last; void *io_wq; struct percpu_counter inflight; atomic_t in_idle; -- cgit v1.2.3 From ae2177cf318d169e349319b24a26881ba0e5248f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 4 Mar 2021 16:08:20 +0200 Subject: mtd: spi-nor: intel-spi: Move platform data header to x86 subfolder In order to group x86 related platform data move intel-spi.h to x86 folder. While at it, remove duplicate inclusion in C file. Signed-off-by: Andy Shevchenko [ta: s/x85/x86] Signed-off-by: Tudor Ambarus Reviewed-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/20210304140820.56692-1-andriy.shevchenko@linux.intel.com --- include/linux/mfd/lpc_ich.h | 2 +- include/linux/platform_data/intel-spi.h | 29 ----------------------------- include/linux/platform_data/x86/intel-spi.h | 29 +++++++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 30 deletions(-) delete mode 100644 include/linux/platform_data/intel-spi.h create mode 100644 include/linux/platform_data/x86/intel-spi.h (limited to 'include/linux') diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h index 6ddca2bbb3a8..39967a5eca6d 100644 --- a/include/linux/mfd/lpc_ich.h +++ b/include/linux/mfd/lpc_ich.h @@ -8,7 +8,7 @@ #ifndef LPC_ICH_H #define LPC_ICH_H -#include +#include /* GPIO resources */ #define ICH_RES_GPIO 0 diff --git a/include/linux/platform_data/intel-spi.h b/include/linux/platform_data/intel-spi.h deleted file mode 100644 index 7f53a5c6f35e..000000000000 --- a/include/linux/platform_data/intel-spi.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Intel PCH/PCU SPI flash driver. - * - * Copyright (C) 2016, Intel Corporation - * Author: Mika Westerberg - */ - -#ifndef INTEL_SPI_PDATA_H -#define INTEL_SPI_PDATA_H - -enum intel_spi_type { - INTEL_SPI_BYT = 1, - INTEL_SPI_LPT, - INTEL_SPI_BXT, - INTEL_SPI_CNL, -}; - -/** - * struct intel_spi_boardinfo - Board specific data for Intel SPI driver - * @type: Type which this controller is compatible with - * @writeable: The chip is writeable - */ -struct intel_spi_boardinfo { - enum intel_spi_type type; - bool writeable; -}; - -#endif /* INTEL_SPI_PDATA_H */ diff --git a/include/linux/platform_data/x86/intel-spi.h b/include/linux/platform_data/x86/intel-spi.h new file mode 100644 index 000000000000..7f53a5c6f35e --- /dev/null +++ b/include/linux/platform_data/x86/intel-spi.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Intel PCH/PCU SPI flash driver. + * + * Copyright (C) 2016, Intel Corporation + * Author: Mika Westerberg + */ + +#ifndef INTEL_SPI_PDATA_H +#define INTEL_SPI_PDATA_H + +enum intel_spi_type { + INTEL_SPI_BYT = 1, + INTEL_SPI_LPT, + INTEL_SPI_BXT, + INTEL_SPI_CNL, +}; + +/** + * struct intel_spi_boardinfo - Board specific data for Intel SPI driver + * @type: Type which this controller is compatible with + * @writeable: The chip is writeable + */ +struct intel_spi_boardinfo { + enum intel_spi_type type; + bool writeable; +}; + +#endif /* INTEL_SPI_PDATA_H */ -- cgit v1.2.3 From f567d6ef8606fb427636e824c867229ecb5aefab Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Sun, 7 Feb 2021 16:47:40 +0200 Subject: HID: plantronics: Workaround for double volume key presses Plantronics Blackwire 3220 Series (047f:c056) sends HID reports twice for each volume key press. This patch adds a quirk to hid-plantronics for this product ID, which will ignore the second volume key press if it happens within 5 ms from the last one that was handled. The patch was tested on the mentioned model only, it shouldn't affect other models, however, this quirk might be needed for them too. Auto-repeat (when a key is held pressed) is not affected, because the rate is about 3 times per second, which is far less frequent than once in 5 ms. Fixes: 81bb773faed7 ("HID: plantronics: Update to map volume up/down controls") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index ef702b3f56e3..3e33eb14118c 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -262,6 +262,8 @@ struct hid_item { #define HID_CP_SELECTION 0x000c0080 #define HID_CP_MEDIASELECTION 0x000c0087 #define HID_CP_SELECTDISC 0x000c00ba +#define HID_CP_VOLUMEUP 0x000c00e9 +#define HID_CP_VOLUMEDOWN 0x000c00ea #define HID_CP_PLAYBACKSPEED 0x000c00f1 #define HID_CP_PROXIMITY 0x000c0109 #define HID_CP_SPEAKERSYSTEM 0x000c0160 -- cgit v1.2.3 From 03ee318391707e822eb915f4f30fe42c78b9d89b Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Thu, 4 Mar 2021 20:05:24 +0100 Subject: platform/surface: aggregator: Make SSAM_DEFINE_SYNC_REQUEST_x define static functions The SSAM_DEFINE_SYNC_REQUEST_x() macros are intended to reduce boiler-plate code for SSAM request definitions by defining a wrapper function for the specified request. The client device variants of those macros, i.e. SSAM_DEFINE_SYNC_REQUEST_CL_x() in particular rely on the multi-device (MD) variants, e.g.: #define SSAM_DEFINE_SYNC_REQUEST_CL_R(name, rtype, spec...) \ SSAM_DEFINE_SYNC_REQUEST_MD_R(__raw_##name, rtype, spec) \ int name(struct ssam_device *sdev, rtype *ret) \ { \ return __raw_##name(sdev->ctrl, sdev->uid.target, \ sdev->uid.instance, ret); \ } This now creates the problem that it is not possible to declare the generated functions static via static SSAM_DEFINE_SYNC_REQUEST_CL_R(...) as this will only apply to the function defined by the multi-device macro, i.e. SSAM_DEFINE_SYNC_REQUEST_MD_R(). Thus compiling with `-Wmissing-prototypes' rightfully complains that there is a 'static' keyword missing. To solve this, make all SSAM_DEFINE_SYNC_REQUEST_x() macros define static functions. Non-client-device macros are also changed for consistency. In general, we expect those functions to be only used locally in the respective drivers for the corresponding interfaces, so having to define a wrapper function to be able to export this should be the odd case out. Reported-by: kernel test robot Fixes: b78b4982d763 ("platform/surface: Add platform profile driver") Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20210304190524.1172197-1-luzmaximilian@gmail.com Signed-off-by: Hans de Goede --- include/linux/surface_aggregator/controller.h | 74 +++++++++++++-------------- include/linux/surface_aggregator/device.h | 31 +++++------ 2 files changed, 53 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h index f4b1ba887384..0806796eabcb 100644 --- a/include/linux/surface_aggregator/controller.h +++ b/include/linux/surface_aggregator/controller.h @@ -344,16 +344,16 @@ struct ssam_request_spec_md { * request has been fully completed. The required transport buffer will be * allocated on the stack. * - * The generated function is defined as ``int name(struct ssam_controller - * *ctrl)``, returning the status of the request, which is zero on success and - * negative on failure. The ``ctrl`` parameter is the controller via which the - * request is being sent. + * The generated function is defined as ``static int name(struct + * ssam_controller *ctrl)``, returning the status of the request, which is + * zero on success and negative on failure. The ``ctrl`` parameter is the + * controller via which the request is being sent. * * Refer to ssam_request_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_N(name, spec...) \ - int name(struct ssam_controller *ctrl) \ + static int name(struct ssam_controller *ctrl) \ { \ struct ssam_request_spec s = (struct ssam_request_spec)spec; \ struct ssam_request rqst; \ @@ -383,17 +383,17 @@ struct ssam_request_spec_md { * returning once the request has been fully completed. The required transport * buffer will be allocated on the stack. * - * The generated function is defined as ``int name(struct ssam_controller - * *ctrl, const atype *arg)``, returning the status of the request, which is - * zero on success and negative on failure. The ``ctrl`` parameter is the - * controller via which the request is sent. The request argument is specified - * via the ``arg`` pointer. + * The generated function is defined as ``static int name(struct + * ssam_controller *ctrl, const atype *arg)``, returning the status of the + * request, which is zero on success and negative on failure. The ``ctrl`` + * parameter is the controller via which the request is sent. The request + * argument is specified via the ``arg`` pointer. * * Refer to ssam_request_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_W(name, atype, spec...) \ - int name(struct ssam_controller *ctrl, const atype *arg) \ + static int name(struct ssam_controller *ctrl, const atype *arg) \ { \ struct ssam_request_spec s = (struct ssam_request_spec)spec; \ struct ssam_request rqst; \ @@ -424,17 +424,17 @@ struct ssam_request_spec_md { * request itself, returning once the request has been fully completed. The * required transport buffer will be allocated on the stack. * - * The generated function is defined as ``int name(struct ssam_controller - * *ctrl, rtype *ret)``, returning the status of the request, which is zero on - * success and negative on failure. The ``ctrl`` parameter is the controller - * via which the request is sent. The request's return value is written to the - * memory pointed to by the ``ret`` parameter. + * The generated function is defined as ``static int name(struct + * ssam_controller *ctrl, rtype *ret)``, returning the status of the request, + * which is zero on success and negative on failure. The ``ctrl`` parameter is + * the controller via which the request is sent. The request's return value is + * written to the memory pointed to by the ``ret`` parameter. * * Refer to ssam_request_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_R(name, rtype, spec...) \ - int name(struct ssam_controller *ctrl, rtype *ret) \ + static int name(struct ssam_controller *ctrl, rtype *ret) \ { \ struct ssam_request_spec s = (struct ssam_request_spec)spec; \ struct ssam_request rqst; \ @@ -483,17 +483,17 @@ struct ssam_request_spec_md { * returning once the request has been fully completed. The required transport * buffer will be allocated on the stack. * - * The generated function is defined as ``int name(struct ssam_controller - * *ctrl, u8 tid, u8 iid)``, returning the status of the request, which is - * zero on success and negative on failure. The ``ctrl`` parameter is the - * controller via which the request is sent, ``tid`` the target ID for the - * request, and ``iid`` the instance ID. + * The generated function is defined as ``static int name(struct + * ssam_controller *ctrl, u8 tid, u8 iid)``, returning the status of the + * request, which is zero on success and negative on failure. The ``ctrl`` + * parameter is the controller via which the request is sent, ``tid`` the + * target ID for the request, and ``iid`` the instance ID. * * Refer to ssam_request_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_MD_N(name, spec...) \ - int name(struct ssam_controller *ctrl, u8 tid, u8 iid) \ + static int name(struct ssam_controller *ctrl, u8 tid, u8 iid) \ { \ struct ssam_request_spec_md s = (struct ssam_request_spec_md)spec; \ struct ssam_request rqst; \ @@ -524,18 +524,18 @@ struct ssam_request_spec_md { * the request itself, returning once the request has been fully completed. * The required transport buffer will be allocated on the stack. * - * The generated function is defined as ``int name(struct ssam_controller - * *ctrl, u8 tid, u8 iid, const atype *arg)``, returning the status of the - * request, which is zero on success and negative on failure. The ``ctrl`` - * parameter is the controller via which the request is sent, ``tid`` the - * target ID for the request, and ``iid`` the instance ID. The request argument - * is specified via the ``arg`` pointer. + * The generated function is defined as ``static int name(struct + * ssam_controller *ctrl, u8 tid, u8 iid, const atype *arg)``, returning the + * status of the request, which is zero on success and negative on failure. + * The ``ctrl`` parameter is the controller via which the request is sent, + * ``tid`` the target ID for the request, and ``iid`` the instance ID. The + * request argument is specified via the ``arg`` pointer. * * Refer to ssam_request_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_MD_W(name, atype, spec...) \ - int name(struct ssam_controller *ctrl, u8 tid, u8 iid, const atype *arg)\ + static int name(struct ssam_controller *ctrl, u8 tid, u8 iid, const atype *arg) \ { \ struct ssam_request_spec_md s = (struct ssam_request_spec_md)spec; \ struct ssam_request rqst; \ @@ -567,18 +567,18 @@ struct ssam_request_spec_md { * execution of the request itself, returning once the request has been fully * completed. The required transport buffer will be allocated on the stack. * - * The generated function is defined as ``int name(struct ssam_controller - * *ctrl, u8 tid, u8 iid, rtype *ret)``, returning the status of the request, - * which is zero on success and negative on failure. The ``ctrl`` parameter is - * the controller via which the request is sent, ``tid`` the target ID for the - * request, and ``iid`` the instance ID. The request's return value is written - * to the memory pointed to by the ``ret`` parameter. + * The generated function is defined as ``static int name(struct + * ssam_controller *ctrl, u8 tid, u8 iid, rtype *ret)``, returning the status + * of the request, which is zero on success and negative on failure. The + * ``ctrl`` parameter is the controller via which the request is sent, ``tid`` + * the target ID for the request, and ``iid`` the instance ID. The request's + * return value is written to the memory pointed to by the ``ret`` parameter. * * Refer to ssam_request_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_MD_R(name, rtype, spec...) \ - int name(struct ssam_controller *ctrl, u8 tid, u8 iid, rtype *ret) \ + static int name(struct ssam_controller *ctrl, u8 tid, u8 iid, rtype *ret) \ { \ struct ssam_request_spec_md s = (struct ssam_request_spec_md)spec; \ struct ssam_request rqst; \ diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index 02f3e06c0a60..4441ad667c3f 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -336,17 +336,18 @@ void ssam_device_driver_unregister(struct ssam_device_driver *d); * request has been fully completed. The required transport buffer will be * allocated on the stack. * - * The generated function is defined as ``int name(struct ssam_device *sdev)``, - * returning the status of the request, which is zero on success and negative - * on failure. The ``sdev`` parameter specifies both the target device of the - * request and by association the controller via which the request is sent. + * The generated function is defined as ``static int name(struct ssam_device + * *sdev)``, returning the status of the request, which is zero on success and + * negative on failure. The ``sdev`` parameter specifies both the target + * device of the request and by association the controller via which the + * request is sent. * * Refer to ssam_request_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_CL_N(name, spec...) \ SSAM_DEFINE_SYNC_REQUEST_MD_N(__raw_##name, spec) \ - int name(struct ssam_device *sdev) \ + static int name(struct ssam_device *sdev) \ { \ return __raw_##name(sdev->ctrl, sdev->uid.target, \ sdev->uid.instance); \ @@ -368,19 +369,19 @@ void ssam_device_driver_unregister(struct ssam_device_driver *d); * itself, returning once the request has been fully completed. The required * transport buffer will be allocated on the stack. * - * The generated function is defined as ``int name(struct ssam_device *sdev, - * const atype *arg)``, returning the status of the request, which is zero on - * success and negative on failure. The ``sdev`` parameter specifies both the - * target device of the request and by association the controller via which - * the request is sent. The request's argument is specified via the ``arg`` - * pointer. + * The generated function is defined as ``static int name(struct ssam_device + * *sdev, const atype *arg)``, returning the status of the request, which is + * zero on success and negative on failure. The ``sdev`` parameter specifies + * both the target device of the request and by association the controller via + * which the request is sent. The request's argument is specified via the + * ``arg`` pointer. * * Refer to ssam_request_sync_onstack() for more details on the behavior of * the generated function. */ #define SSAM_DEFINE_SYNC_REQUEST_CL_W(name, atype, spec...) \ SSAM_DEFINE_SYNC_REQUEST_MD_W(__raw_##name, atype, spec) \ - int name(struct ssam_device *sdev, const atype *arg) \ + static int name(struct ssam_device *sdev, const atype *arg) \ { \ return __raw_##name(sdev->ctrl, sdev->uid.target, \ sdev->uid.instance, arg); \ @@ -402,8 +403,8 @@ void ssam_device_driver_unregister(struct ssam_device_driver *d); * itself, returning once the request has been fully completed. The required * transport buffer will be allocated on the stack. * - * The generated function is defined as ``int name(struct ssam_device *sdev, - * rtype *ret)``, returning the status of the request, which is zero on + * The generated function is defined as ``static int name(struct ssam_device + * *sdev, rtype *ret)``, returning the status of the request, which is zero on * success and negative on failure. The ``sdev`` parameter specifies both the * target device of the request and by association the controller via which * the request is sent. The request's return value is written to the memory @@ -414,7 +415,7 @@ void ssam_device_driver_unregister(struct ssam_device_driver *d); */ #define SSAM_DEFINE_SYNC_REQUEST_CL_R(name, rtype, spec...) \ SSAM_DEFINE_SYNC_REQUEST_MD_R(__raw_##name, rtype, spec) \ - int name(struct ssam_device *sdev, rtype *ret) \ + static int name(struct ssam_device *sdev, rtype *ret) \ { \ return __raw_##name(sdev->ctrl, sdev->uid.target, \ sdev->uid.instance, ret); \ -- cgit v1.2.3 From 2b329f5694aec86107931584413c7d2ebc6b548d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 1 Mar 2021 17:04:04 +0100 Subject: platform/x86: wmi: Make remove callback return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver core ignores the return value of struct bus_type::remove() (and so wmi_dev_remove()) because there is only little that can be done. To simplify the quest to make this function return void, let struct wmi_driver::remove() return void, too. All implementers of this callback return 0 already and this way it should be obvious to driver authors that returning an error code is a bad idea. Signed-off-by: Uwe Kleine-König Reviewed-by: Pali Rohár Link: https://lore.kernel.org/r/20210301160404.1677064-1-u.kleine-koenig@pengutronix.de Signed-off-by: Hans de Goede --- include/linux/wmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 8ef7e7faea1e..2cb3913c1f50 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -37,7 +37,7 @@ struct wmi_driver { const struct wmi_device_id *id_table; int (*probe)(struct wmi_device *wdev, const void *context); - int (*remove)(struct wmi_device *wdev); + void (*remove)(struct wmi_device *wdev); void (*notify)(struct wmi_device *device, union acpi_object *data); long (*filter_callback)(struct wmi_device *wdev, unsigned int cmd, struct wmi_ioctl_buffer *arg); -- cgit v1.2.3 From e831e400f73b0e7aa4629200c2ef71e068262414 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:17 +0100 Subject: printk: kmsg_dump: remove unused fields struct kmsg_dumper still contains some fields that were used to iterate the old ringbuffer. They are no longer used. Remove them and update the struct documentation. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-5-john.ogness@linutronix.de --- include/linux/kmsg_dump.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 3378bcbe585e..ae38035f1dca 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -36,6 +36,9 @@ enum kmsg_dump_reason { * through the record iterator * @max_reason: filter for highest reason number that should be dumped * @registered: Flag that specifies if this is already registered + * @active: Flag that specifies if this is currently dumping + * @cur_seq: Points to the oldest message to dump + * @next_seq: Points after the newest message to dump */ struct kmsg_dumper { struct list_head list; @@ -45,8 +48,6 @@ struct kmsg_dumper { bool registered; /* private state of the kmsg iterator */ - u32 cur_idx; - u32 next_idx; u64 cur_seq; u64 next_seq; }; -- cgit v1.2.3 From 726b5097701a8d46f5354be780e1a11fc4ca1187 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:18 +0100 Subject: printk: refactor kmsg_dump_get_buffer() kmsg_dump_get_buffer() requires nearly the same logic as syslog_print_all(), but uses different variable names and does not make use of the ringbuffer loop macros. Modify kmsg_dump_get_buffer() so that the implementation is as similar to syslog_print_all() as possible. A follow-up commit will move this common logic into a separate helper function. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-6-john.ogness@linutronix.de --- include/linux/kmsg_dump.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index ae38035f1dca..070c994ff19f 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -62,7 +62,7 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, char *line, size_t size, size_t *len); bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, - char *buf, size_t size, size_t *len); + char *buf, size_t size, size_t *len_out); void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper); -- cgit v1.2.3 From 5f6c7648e556f41a3064bb6dceb9e102c50b618d Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:24 +0100 Subject: printk: kmsg_dumper: remove @active field All 6 kmsg_dumpers do not benefit from the @active flag: (provide their own synchronization) - arch/powerpc/kernel/nvram_64.c - arch/um/kernel/kmsg_dump.c - drivers/mtd/mtdoops.c - fs/pstore/platform.c (only dump on KMSG_DUMP_PANIC, which does not require synchronization) - arch/powerpc/platforms/powernv/opal-kmsg.c - drivers/hv/vmbus_drv.c The other 2 kmsg_dump users also do not rely on @active: (hard-code @active to always be true) - arch/powerpc/xmon/xmon.c - kernel/debug/kdb/kdb_main.c Therefore, @active can be removed. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-12-john.ogness@linutronix.de --- include/linux/kmsg_dump.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 070c994ff19f..84eaa2090efa 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -36,7 +36,6 @@ enum kmsg_dump_reason { * through the record iterator * @max_reason: filter for highest reason number that should be dumped * @registered: Flag that specifies if this is already registered - * @active: Flag that specifies if this is currently dumping * @cur_seq: Points to the oldest message to dump * @next_seq: Points after the newest message to dump */ @@ -44,7 +43,6 @@ struct kmsg_dumper { struct list_head list; void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); enum kmsg_dump_reason max_reason; - bool active; bool registered; /* private state of the kmsg iterator */ -- cgit v1.2.3 From f9f3f02db98bbe678a8e57fe9432b196174744a3 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:25 +0100 Subject: printk: introduce a kmsg_dump iterator Rather than storing the iterator information in the registered kmsg_dumper structure, create a separate iterator structure. The kmsg_dump_iter structure can reside on the stack of the caller, thus allowing lockless use of the kmsg_dump functions. Update code that accesses the kernel logs using the kmsg_dumper structure to use the new kmsg_dump_iter structure. For kmsg_dumpers, this also means adding a call to kmsg_dump_rewind() to initialize the iterator. All this is in preparation for removal of @logbuf_lock. Signed-off-by: John Ogness Reviewed-by: Kees Cook # pstore Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-13-john.ogness@linutronix.de --- include/linux/kmsg_dump.h | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 84eaa2090efa..36c8c57e1051 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -29,6 +29,16 @@ enum kmsg_dump_reason { KMSG_DUMP_MAX }; +/** + * struct kmsg_dump_iter - iterator for retrieving kernel messages + * @cur_seq: Points to the oldest message to dump + * @next_seq: Points after the newest message to dump + */ +struct kmsg_dump_iter { + u64 cur_seq; + u64 next_seq; +}; + /** * struct kmsg_dumper - kernel crash message dumper structure * @list: Entry in the dumper list (private) @@ -36,35 +46,29 @@ enum kmsg_dump_reason { * through the record iterator * @max_reason: filter for highest reason number that should be dumped * @registered: Flag that specifies if this is already registered - * @cur_seq: Points to the oldest message to dump - * @next_seq: Points after the newest message to dump */ struct kmsg_dumper { struct list_head list; void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); enum kmsg_dump_reason max_reason; bool registered; - - /* private state of the kmsg iterator */ - u64 cur_seq; - u64 next_seq; }; #ifdef CONFIG_PRINTK void kmsg_dump(enum kmsg_dump_reason reason); -bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, +bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, bool syslog, char *line, size_t size, size_t *len); -bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, +bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, char *line, size_t size, size_t *len); -bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, +bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, char *buf, size_t size, size_t *len_out); -void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper); +void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter); -void kmsg_dump_rewind(struct kmsg_dumper *dumper); +void kmsg_dump_rewind(struct kmsg_dump_iter *iter); int kmsg_dump_register(struct kmsg_dumper *dumper); @@ -76,30 +80,30 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason) { } -static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, +static inline bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, bool syslog, const char *line, size_t size, size_t *len) { return false; } -static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, +static inline bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, const char *line, size_t size, size_t *len) { return false; } -static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, +static inline bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, char *buf, size_t size, size_t *len) { return false; } -static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) +static inline void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter) { } -static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper) +static inline void kmsg_dump_rewind(struct kmsg_dump_iter *iter) { } -- cgit v1.2.3 From a4f987653241db9fa1f99531b430cebb83f1eae1 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 3 Mar 2021 11:15:27 +0100 Subject: printk: kmsg_dump: remove _nolock() variants kmsg_dump_rewind() and kmsg_dump_get_line() are lockless, so there is no need for _nolock() variants. Remove these functions and switch all callers of the _nolock() variants. The functions without _nolock() were chosen because they are already exported to kernel modules. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210303101528.29901-15-john.ogness@linutronix.de --- include/linux/kmsg_dump.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 36c8c57e1051..906521c2329c 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -57,17 +57,12 @@ struct kmsg_dumper { #ifdef CONFIG_PRINTK void kmsg_dump(enum kmsg_dump_reason reason); -bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, bool syslog, - char *line, size_t size, size_t *len); - bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, char *line, size_t size, size_t *len); bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, char *buf, size_t size, size_t *len_out); -void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter); - void kmsg_dump_rewind(struct kmsg_dump_iter *iter); int kmsg_dump_register(struct kmsg_dumper *dumper); @@ -80,13 +75,6 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason) { } -static inline bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, - bool syslog, const char *line, - size_t size, size_t *len) -{ - return false; -} - static inline bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, const char *line, size_t size, size_t *len) { @@ -99,10 +87,6 @@ static inline bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog return false; } -static inline void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter) -{ -} - static inline void kmsg_dump_rewind(struct kmsg_dump_iter *iter) { } -- cgit v1.2.3 From 62d5247d239d4b48762192a251c647d7c997616a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 25 Feb 2021 18:33:18 +0200 Subject: gpiolib: acpi: Add ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER quirk On some systems the ACPI tables has wrong pin number and instead of having a relative one it provides an absolute one in the global GPIO number space. Add ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER quirk to cope with such cases. Fixes: ba8c90c61847 ("gpio: pca953x: Override IRQ for one of the expanders on Galileo Gen 2") Depends-on: 0ea683931adb ("gpio: dwapb: Convert driver to using the GPIO-lib-based IRQ-chip") Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Acked-by: Linus Walleij --- include/linux/gpio/consumer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index ef49307611d2..c73b25bc9213 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -674,6 +674,8 @@ struct acpi_gpio_mapping { * get GpioIo type explicitly, this quirk may be used. */ #define ACPI_GPIO_QUIRK_ONLY_GPIOIO BIT(1) +/* Use given pin as an absolute GPIO number in the system */ +#define ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER BIT(2) unsigned int quirks; }; -- cgit v1.2.3 From 809390219fb9c2421239afe5c9eb862d73978ba0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 25 Feb 2021 18:33:19 +0200 Subject: gpiolib: acpi: Allow to find GpioInt() resource by name and index Currently only search by index is supported. However, in some cases we might need to pass the quirks to the acpi_dev_gpio_irq_get(). For this, split out acpi_dev_gpio_irq_get_by() and replace acpi_dev_gpio_irq_get() by calling above with NULL for name parameter. Fixes: ba8c90c61847 ("gpio: pca953x: Override IRQ for one of the expanders on Galileo Gen 2") Depends-on: 0ea683931adb ("gpio: dwapb: Convert driver to using the GPIO-lib-based IRQ-chip") Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Acked-by: Linus Walleij --- include/linux/acpi.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9f432411e988..fcdaab723916 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1079,19 +1079,25 @@ void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const c #if defined(CONFIG_ACPI) && defined(CONFIG_GPIOLIB) bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); -int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index); +int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index); #else static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio) { return false; } -static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) +static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, + const char *name, int index) { return -ENXIO; } #endif +static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) +{ + return acpi_dev_gpio_irq_get_by(adev, NULL, index); +} + /* Device properties */ #ifdef CONFIG_ACPI -- cgit v1.2.3 From 6e3e2c4362e41a2f18e3f7a5ad81bd2f49a47b85 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 1 Mar 2021 20:37:10 -0500 Subject: new helper: inode_wrong_type() inode_wrong_type(inode, mode) returns true if setting inode->i_mode to given value would've changed the inode type. We have enough of those checks open-coded to make a helper worthwhile. Signed-off-by: Al Viro --- include/linux/fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ec8f3ddf4a6a..9e0d76a41229 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2884,6 +2884,11 @@ static inline bool execute_ok(struct inode *inode) return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); } +static inline bool inode_wrong_type(const struct inode *inode, umode_t mode) +{ + return (inode->i_mode ^ mode) & S_IFMT; +} + static inline void file_start_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) -- cgit v1.2.3 From d0f1088b31db2d03497a74ca67755df5515f8ff4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 8 Mar 2020 09:16:37 -0400 Subject: coredump: don't bother with do_truncate() have dump_skip() just remember how much needs to be skipped, leave actual seeks/writing zeroes to the next dump_emit() or the end of coredump output, whichever comes first. And instead of playing with do_truncate() in the end, just write one NUL at the end of the last gap (if any). Signed-off-by: Al Viro --- include/linux/binfmts.h | 1 + include/linux/coredump.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 0abd93efc181..049cf9421d83 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -86,6 +86,7 @@ struct coredump_params { unsigned long mm_flags; loff_t written; loff_t pos; + loff_t to_skip; }; /* diff --git a/include/linux/coredump.h b/include/linux/coredump.h index e58e8c207782..247aae4e69c8 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -18,10 +18,10 @@ struct core_vma_metadata { * functions to write out all the necessary info. */ struct coredump_params; -extern int dump_skip(struct coredump_params *cprm, size_t nr); +extern void dump_skip_to(struct coredump_params *cprm, unsigned long to); +extern void dump_skip(struct coredump_params *cprm, size_t nr); extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); -extern void dump_truncate(struct coredump_params *cprm); int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len); int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, -- cgit v1.2.3 From a64b89088bb1413bb84424f0b16a4d1f9bb0e947 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 Jan 2021 20:51:48 -0500 Subject: coredump.h: move CONFIG_COREDUMP-only stuff inside the ifdef Signed-off-by: Al Viro --- include/linux/coredump.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 247aae4e69c8..78fcd776b185 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -7,12 +7,17 @@ #include #include +#ifdef CONFIG_COREDUMP struct core_vma_metadata { unsigned long start, end; unsigned long flags; unsigned long dump_size; }; +extern int core_uses_pid; +extern char core_pattern[]; +extern unsigned int core_pipe_limit; + /* * These are the only things you should do on a core-file: use only these * functions to write out all the necessary info. @@ -27,14 +32,9 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start, int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, struct core_vma_metadata **vma_meta, size_t *vma_data_size_ptr); -#ifdef CONFIG_COREDUMP extern void do_coredump(const kernel_siginfo_t *siginfo); #else static inline void do_coredump(const kernel_siginfo_t *siginfo) {} #endif -extern int core_uses_pid; -extern char core_pattern[]; -extern unsigned int core_pipe_limit; - #endif /* _LINUX_COREDUMP_H */ -- cgit v1.2.3 From 4c324548f09fec413b4ee589174dabacfe17d953 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Mar 2021 19:41:44 +0100 Subject: ACPI: utils: Introduce acpi_evaluation_failure_warn() Quite a few users of ACPI objects want to log a warning message if the evaluation fails which is a repeating pattern, so introduce a helper function for that purpose and convert some code where it is open-coded to using it. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9f432411e988..35aa70defc57 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1027,9 +1027,14 @@ static inline void acpi_ec_set_gpe_wake_mask(u8 action) {} __printf(3, 4) void acpi_handle_printk(const char *level, acpi_handle handle, const char *fmt, ...); +void acpi_evaluation_failure_warn(acpi_handle handle, const char *name, + acpi_status status); #else /* !CONFIG_ACPI */ static inline __printf(3, 4) void acpi_handle_printk(const char *level, void *handle, const char *fmt, ...) {} +static inline void acpi_evaluation_failure_warn(acpi_handle handle, + const char *name, + acpi_status status) {} #endif /* !CONFIG_ACPI */ #if defined(CONFIG_ACPI) && defined(CONFIG_DYNAMIC_DEBUG) -- cgit v1.2.3 From 9336a5f64b54d2913fb5daa1ac0280ff36f1c5ed Mon Sep 17 00:00:00 2001 From: Lakshmi Ramasubramanian Date: Sun, 21 Feb 2021 09:49:18 -0800 Subject: kexec: Move ELF fields to struct kimage ELF related fields elf_headers, elf_headers_sz, and elf_load_addr are defined in architecture specific 'struct kimage_arch' for x86, powerpc, and arm64. The name of these fields are different in these architectures that makes it hard to have a common code for setting up the device tree for kexec system call. Move the ELF fields to 'struct kimage' defined in include/linux/kexec.h so common code can use it. Suggested-by: Rob Herring Reported-by: kernel test robot Signed-off-by: Lakshmi Ramasubramanian Reviewed-by: Thiago Jung Bauermann Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20210221174930.27324-2-nramas@linux.microsoft.com --- include/linux/kexec.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 8a7aa1d7e0e3..0bfab392367f 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -305,6 +305,11 @@ struct kimage { /* Virtual address of IMA measurement buffer for kexec syscall */ void *ima_buffer; #endif + + /* Core ELF header buffer */ + void *elf_headers; + unsigned long elf_headers_sz; + unsigned long elf_load_addr; }; /* kexec interface functions */ -- cgit v1.2.3 From b30be4dc733e5067b56def359b0823f1e54ded8c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Sun, 21 Feb 2021 09:49:22 -0800 Subject: of: Add a common kexec FDT setup function Both arm64 and powerpc do essentially the same FDT /chosen setup for kexec. The differences are either omissions that arm64 should have or additional properties that will be ignored. The setup code can be combined and shared by both powerpc and arm64. The differences relative to the arm64 version: - If /chosen doesn't exist, it will be created (should never happen). - Any old dtb and initrd reserved memory will be released. - The new initrd and elfcorehdr are marked reserved. - "linux,booted-from-kexec" is set. The differences relative to the powerpc version: - "kaslr-seed" and "rng-seed" may be set. - "linux,elfcorehdr" is set. - Any existing "linux,usable-memory-range" is removed. Combine the code for setting up the /chosen node in the FDT and updating the memory reservation for kexec, for powerpc and arm64, in of_kexec_alloc_and_setup_fdt() and move it to "drivers/of/kexec.c". Signed-off-by: Rob Herring Signed-off-by: Lakshmi Ramasubramanian Reviewed-by: Thiago Jung Bauermann Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20210221174930.27324-6-nramas@linux.microsoft.com --- include/linux/of.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 4b27c9a27df3..d66c915df910 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -560,6 +560,11 @@ int of_map_id(struct device_node *np, u32 id, phys_addr_t of_dma_get_max_cpu_address(struct device_node *np); +struct kimage; +void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, + unsigned long initrd_load_addr, + unsigned long initrd_len, + const char *cmdline, size_t extra_fdt_size); #else /* CONFIG_OF */ static inline void of_core_init(void) -- cgit v1.2.3 From 0c605158be32104bb85cbd12fb575e6f1e17d3e7 Mon Sep 17 00:00:00 2001 From: Lakshmi Ramasubramanian Date: Sun, 21 Feb 2021 09:49:25 -0800 Subject: powerpc: Move ima buffer fields to struct kimage The fields ima_buffer_addr and ima_buffer_size in "struct kimage_arch" for powerpc are used to carry forward the IMA measurement list across kexec system call. These fields are not architecture specific, but are currently limited to powerpc. arch_ima_add_kexec_buffer() defined in "arch/powerpc/kexec/ima.c" sets ima_buffer_addr and ima_buffer_size for the kexec system call. This function does not have architecture specific code, but is currently limited to powerpc. Move ima_buffer_addr and ima_buffer_size to "struct kimage". Set ima_buffer_addr and ima_buffer_size in ima_add_kexec_buffer() in security/integrity/ima/ima_kexec.c. Co-developed-by: Prakhar Srivastava Signed-off-by: Prakhar Srivastava Signed-off-by: Lakshmi Ramasubramanian Suggested-by: Will Deacon Reviewed-by: Thiago Jung Bauermann Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20210221174930.27324-9-nramas@linux.microsoft.com --- include/linux/kexec.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 0bfab392367f..0c994ae37729 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -304,6 +304,9 @@ struct kimage { #ifdef CONFIG_IMA_KEXEC /* Virtual address of IMA measurement buffer for kexec syscall */ void *ima_buffer; + + phys_addr_t ima_buffer_addr; + size_t ima_buffer_size; #endif /* Core ELF header buffer */ -- cgit v1.2.3 From fee3ff99bc67604fba77f19da0106f3ec52b1956 Mon Sep 17 00:00:00 2001 From: Lakshmi Ramasubramanian Date: Sun, 21 Feb 2021 09:49:27 -0800 Subject: powerpc: Move arch independent ima kexec functions to drivers/of/kexec.c The functions defined in "arch/powerpc/kexec/ima.c" handle setting up and freeing the resources required to carry over the IMA measurement list from the current kernel to the next kernel across kexec system call. These functions do not have architecture specific code, but are currently limited to powerpc. Move remove_ima_buffer() and setup_ima_buffer() calls into of_kexec_alloc_and_setup_fdt() defined in "drivers/of/kexec.c". Move the remaining architecture independent functions from "arch/powerpc/kexec/ima.c" to "drivers/of/kexec.c". Delete "arch/powerpc/kexec/ima.c" and "arch/powerpc/include/asm/ima.h". Remove references to the deleted files and functions in powerpc and in ima. Co-developed-by: Prakhar Srivastava Signed-off-by: Prakhar Srivastava Signed-off-by: Lakshmi Ramasubramanian Reviewed-by: Thiago Jung Bauermann Tested-by: Thiago Jung Bauermann Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20210221174930.27324-11-nramas@linux.microsoft.com --- include/linux/of.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index d66c915df910..e9209ef44cc0 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -565,6 +565,8 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline, size_t extra_fdt_size); +int ima_get_kexec_buffer(void **addr, size_t *size); +int ima_free_kexec_buffer(void); #else /* CONFIG_OF */ static inline void of_core_init(void) -- cgit v1.2.3 From 69dd4503a7e6bae3389b8e028e5768008be8f2d7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Feb 2021 15:36:07 +0100 Subject: irqdomain: Remove debugfs_file from struct irq_domain There's no need to keep around a dentry pointer to a simple file that debugfs itself can look up when we need to remove it from the system. So simplify the code by deleting the variable and cleaning up the logic around the debugfs file. Cc: Marc Zyngier Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/YCvYV53ZdzQSWY6w@kroah.com --- include/linux/irqdomain.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 42d196805f58..33cacc8af26d 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -150,7 +150,6 @@ struct irq_domain_chip_generic; * setting up one or more generic chips for interrupt controllers * drivers using the generic chip library which uses this pointer. * @parent: Pointer to parent irq_domain to support hierarchy irq_domains - * @debugfs_file: dentry for the domain debugfs file * * Revmap data, used internally by irq_domain * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that @@ -174,9 +173,6 @@ struct irq_domain { #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_domain *parent; #endif -#ifdef CONFIG_GENERIC_IRQ_DEBUGFS - struct dentry *debugfs_file; -#endif /* reverse map data. The linear map gets appended to the irq_domain */ irq_hw_number_t hwirq_max; -- cgit v1.2.3 From 5bb1369d4bea078dd1298dfc2c6ce781d9e34dde Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Sat, 16 Jan 2021 00:11:45 +0900 Subject: rculist: Replace reference to atomic_ops.rst The hlist_nulls_for_each_entry_rcu() docbook header references the atomic_ops.rst file, which was removed in commit f0400a77ebdc ("atomic: Delete obsolete documentation"). This commit therefore substitutes a section in memory-barriers.txt discussing the use of barrier() in loops. Cc: Peter Zijlstra Signed-off-by: Akira Yokosawa Signed-off-by: Paul E. McKenney --- include/linux/rculist_nulls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index ff3e94779e73..d8afdb8784c1 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -161,7 +161,7 @@ static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n) * * The barrier() is needed to make sure compiler doesn't cache first element [1], * as this loop can be restarted [2] - * [1] Documentation/core-api/atomic_ops.rst around line 114 + * [1] Documentation/memory-barriers.txt around line 1533 * [2] Documentation/RCU/rculist_nulls.rst around line 146 */ #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ -- cgit v1.2.3 From e75956bd00cf4246067c6aee7751faf313233435 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 08:22:02 +0100 Subject: rcu: Fix kfree_rcu() docbook errors After commit 5130b8fd0690 ("rcu: Introduce kfree_rcu() single-argument macro"), kernel-doc now emits two warnings: ./include/linux/rcupdate.h:884: warning: Excess function parameter 'ptr' description in 'kfree_rcu' ./include/linux/rcupdate.h:884: warning: Excess function parameter 'rhf' description in 'kfree_rcu' This commit added some macro magic in order to call two different versions of kfree_rcu(), the first having just one argument and the second having two arguments. That makes it difficult to document the kfree_rcu() arguments in the docboook header. In order to make clearer that this macro accepts optional arguments, this commit uses macro concatenation so that this macro changes from: #define kfree_rcu kvfree_rcu to: #define kfree_rcu(ptr, rhf...) kvfree_rcu(ptr, ## rhf) That not only helps kernel-doc understand the macro arguments, but also provides a better C definition that makes clearer that the first argument is mandatory and the second one is optional. Fixes: 5130b8fd0690 ("rcu: Introduce kfree_rcu() single-argument macro") Tested-by: Uladzislau Rezki (Sony) Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index bd04f722714f..5cc6deaa5df2 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -881,7 +881,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * The BUILD_BUG_ON check must not involve any function calls, hence the * checks are done in macros here. */ -#define kfree_rcu kvfree_rcu +#define kfree_rcu(ptr, rhf...) kvfree_rcu(ptr, ## rhf) /** * kvfree_rcu() - kvfree an object after a grace period. -- cgit v1.2.3 From 5bb1bb353cfe343fc3c84faf06f72ba309fde541 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 7 Jan 2021 13:46:11 -0800 Subject: mm: Don't build mm_dump_obj() on CONFIG_PRINTK=n kernels The mem_dump_obj() functionality adds a few hundred bytes, which is a small price to pay. Except on kernels built with CONFIG_PRINTK=n, in which mem_dump_obj() messages will be suppressed. This commit therefore makes mem_dump_obj() be a static inline empty function on kernels built with CONFIG_PRINTK=n and excludes all of its support functions as well. This avoids kernel bloat on systems that cannot use mem_dump_obj(). Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Suggested-by: Andrew Morton Signed-off-by: Paul E. McKenney --- include/linux/mm.h | 4 ++++ include/linux/slab.h | 2 ++ include/linux/vmalloc.h | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 77e64e3eac80..89fca443e6f1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3135,7 +3135,11 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping, extern int sysctl_nr_trim_pages; +#ifdef CONFIG_PRINTK void mem_dump_obj(void *object); +#else +static inline void mem_dump_obj(void *object) {} +#endif #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 7ae604076767..0c97d788762c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -186,8 +186,10 @@ void kfree(const void *); void kfree_sensitive(const void *); size_t __ksize(const void *); size_t ksize(const void *); +#ifdef CONFIG_PRINTK bool kmem_valid_obj(void *object); void kmem_dump_obj(void *object); +#endif #ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR void __check_heap_object(const void *ptr, unsigned long n, struct page *page, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index df92211cf771..3de7be6dd17c 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -241,7 +241,7 @@ pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) int register_vmap_purge_notifier(struct notifier_block *nb); int unregister_vmap_purge_notifier(struct notifier_block *nb); -#ifdef CONFIG_MMU +#if defined(CONFIG_MMU) && defined(CONFIG_PRINTK) bool vmalloc_dump_obj(void *object); #else static inline bool vmalloc_dump_obj(void *object) { return false; } -- cgit v1.2.3 From bd0ccc4afca2d6ae0029cae35c4f1d2e2ade7579 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 15 Jan 2021 18:09:53 +0100 Subject: kcsan: Add missing license and copyright headers Adds missing license and/or copyright headers for KCSAN source files. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan-checks.h | 6 ++++++ include/linux/kcsan.h | 7 +++++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index cf14840609ce..9fd0ad80fef6 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -1,4 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * KCSAN access checks and modifiers. These can be used to explicitly check + * uninstrumented accesses, or change KCSAN checking behaviour of accesses. + * + * Copyright (C) 2019, Google LLC. + */ #ifndef _LINUX_KCSAN_CHECKS_H #define _LINUX_KCSAN_CHECKS_H diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h index 53340d8789f9..fc266ecb2a4d 100644 --- a/include/linux/kcsan.h +++ b/include/linux/kcsan.h @@ -1,4 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * The Kernel Concurrency Sanitizer (KCSAN) infrastructure. Public interface and + * data structures to set up runtime. See kcsan-checks.h for explicit checks and + * modifiers. For more info please see Documentation/dev-tools/kcsan.rst. + * + * Copyright (C) 2019, Google LLC. + */ #ifndef _LINUX_KCSAN_H #define _LINUX_KCSAN_H -- cgit v1.2.3 From 1019d7923d9d4cc878a1a85d4fc2d6619cfe1a6a Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sun, 7 Mar 2021 22:25:28 -0500 Subject: atm: fix a typo in the struct description phy_data means private PHY data not date Signed-off-by: Tong Zhang Signed-off-by: David S. Miller --- include/linux/atmdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 60cd25c0461b..9b02961d65ee 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -151,7 +151,7 @@ struct atm_dev { const char *type; /* device type name */ int number; /* device index */ void *dev_data; /* per-device data */ - void *phy_data; /* private PHY date */ + void *phy_data; /* private PHY data */ unsigned long flags; /* device flags (ATM_DF_*) */ struct list_head local; /* local ATM addresses */ struct list_head lecs; /* LECS ATM addresses learned via ILMI */ -- cgit v1.2.3 From 1224451bb6f938023dd7fa4e7ba43bfb185bc9e3 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Mar 2021 14:30:37 +0100 Subject: PM / devfreq: Register devfreq as a cooling device on demand Currently the default behavior is to manually having the devfreq backend to register themselves as a devfreq cooling device. Instead of adding the code in the drivers for the thermal cooling device registering, let's provide a flag in the devfreq's profile to tell the common devfreq code to register the newly created devfreq as a cooling device. Suggested-by: Chanwoo Choi Signed-off-by: Daniel Lezcano Reviewed-by: Steven Price Reviewed-by: Lukasz Luba Signed-off-by: Chanwoo Choi --- include/linux/devfreq.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 26ea0850be9b..142474b4af96 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -38,6 +38,7 @@ enum devfreq_timer { struct devfreq; struct devfreq_governor; +struct thermal_cooling_device; /** * struct devfreq_dev_status - Data given from devfreq user device to @@ -98,11 +99,15 @@ struct devfreq_dev_status { * @freq_table: Optional list of frequencies to support statistics * and freq_table must be generated in ascending order. * @max_state: The size of freq_table. + * + * @is_cooling_device: A self-explanatory boolean giving the device a + * cooling effect property. */ struct devfreq_dev_profile { unsigned long initial_freq; unsigned int polling_ms; enum devfreq_timer timer; + bool is_cooling_device; int (*target)(struct device *dev, unsigned long *freq, u32 flags); int (*get_dev_status)(struct device *dev, @@ -156,6 +161,7 @@ struct devfreq_stats { * @suspend_count: suspend requests counter for a device. * @stats: Statistics of devfreq device behavior * @transition_notifier_list: list head of DEVFREQ_TRANSITION_NOTIFIER notifier + * @cdev: Cooling device pointer if the devfreq has cooling property * @nb_min: Notifier block for DEV_PM_QOS_MIN_FREQUENCY * @nb_max: Notifier block for DEV_PM_QOS_MAX_FREQUENCY * @@ -198,6 +204,9 @@ struct devfreq { struct srcu_notifier_head transition_notifier_list; + /* Pointer to the cooling device if used for thermal mitigation */ + struct thermal_cooling_device *cdev; + struct notifier_block nb_min; struct notifier_block nb_max; }; -- cgit v1.2.3 From f38a1644832792ece8c63c9a5d9cd7122c62bf64 Mon Sep 17 00:00:00 2001 From: Ray Chi Date: Mon, 8 Mar 2021 21:31:46 +0800 Subject: power: supply: core: provide function stubs if CONFIG_POWER_SUPPLY=n Fix build error when CONFIG_POWER_SUPPLY is not enabled. The build error occurs in mips (cavium_octeon_defconfig). mips-linux-gnu-ld: drivers/usb/dwc3/core.o: in function `dwc3_remove': drivers/usb/dwc3/core.c:1657: undefined reference to `power_supply_put' mips-linux-gnu-ld: drivers/usb/dwc3/core.o: in function `dwc3_get_properties': drivers/usb/dwc3/core.c:1270: undefined reference to `power_supply_get_by_name' mips-linux-gnu-ld: drivers/usb/dwc3/core.o: in function `dwc3_probe': drivers/usb/dwc3/core.c:1632: undefined reference to `power_supply_put' Fixes: 59fa3def35de ("usb: dwc3: add a power supply for current control") Reported-by: Naresh Kamboju Signed-off-by: Ray Chi Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 81a55e974feb..6e776be5bfa0 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -381,8 +381,14 @@ struct power_supply_battery_info { extern struct atomic_notifier_head power_supply_notifier; extern int power_supply_reg_notifier(struct notifier_block *nb); extern void power_supply_unreg_notifier(struct notifier_block *nb); +#if IS_ENABLED(CONFIG_POWER_SUPPLY) extern struct power_supply *power_supply_get_by_name(const char *name); extern void power_supply_put(struct power_supply *psy); +#else +static inline void power_supply_put(struct power_supply *psy) {} +static inline struct power_supply *power_supply_get_by_name(const char *name) +{ return NULL; } +#endif #ifdef CONFIG_OF extern struct power_supply *power_supply_get_by_phandle(struct device_node *np, const char *property); -- cgit v1.2.3 From 05a68ce5fa51a83c360381630f823545c5757aa2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 9 Mar 2021 10:50:28 -0800 Subject: bpf: Don't do bpf_cgroup_storage_set() for kuprobe/tp programs For kuprobe and tracepoint bpf programs, kernel calls trace_call_bpf() which calls BPF_PROG_RUN_ARRAY_CHECK() to run the program array. Currently, BPF_PROG_RUN_ARRAY_CHECK() also calls bpf_cgroup_storage_set() to set percpu cgroup local storage with NULL value. This is due to Commit 394e40a29788 ("bpf: extend bpf_prog_array to store pointers to the cgroup storage") which modified __BPF_PROG_RUN_ARRAY() to call bpf_cgroup_storage_set() and this macro is also used by BPF_PROG_RUN_ARRAY_CHECK(). kuprobe and tracepoint programs are not allowed to call bpf_get_local_storage() helper hence does not access percpu cgroup local storage. Let us change BPF_PROG_RUN_ARRAY_CHECK() not to modify percpu cgroup local storage. The issue is observed when I tried to debug [1] where percpu data is overwritten due to preempt_disable -> migration_disable change. This patch does not completely fix the above issue, which will be addressed separately, e.g., multiple cgroup prog runs may preempt each other. But it does fix any potential issue caused by tracing program overwriting percpu cgroup storage: - in a busy system, a tracing program is to run between bpf_cgroup_storage_set() and the cgroup prog run. - a kprobe program is triggered by a helper in cgroup prog before bpf_get_local_storage() is called. [1] https://lore.kernel.org/bpf/CAKH8qBuXCfUz=w8L+Fj74OaUpbosO29niYwTki7e3Ag044_aww@mail.gmail.com/T Fixes: 394e40a29788 ("bpf: extend bpf_prog_array to store pointers to the cgroup storage") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Roman Gushchin Link: https://lore.kernel.org/bpf/20210309185028.3763817-1-yhs@fb.com --- include/linux/bpf.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cccaef1088ea..d7e0f479a5b0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1093,7 +1093,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, _ret; \ }) -#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \ +#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage) \ ({ \ struct bpf_prog_array_item *_item; \ struct bpf_prog *_prog; \ @@ -1106,7 +1106,8 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, goto _out; \ _item = &_array->items[0]; \ while ((_prog = READ_ONCE(_item->prog))) { \ - bpf_cgroup_storage_set(_item->cgroup_storage); \ + if (set_cg_storage) \ + bpf_cgroup_storage_set(_item->cgroup_storage); \ _ret &= func(_prog, ctx); \ _item++; \ } \ @@ -1153,10 +1154,10 @@ _out: \ }) #define BPF_PROG_RUN_ARRAY(array, ctx, func) \ - __BPF_PROG_RUN_ARRAY(array, ctx, func, false) + __BPF_PROG_RUN_ARRAY(array, ctx, func, false, true) #define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \ - __BPF_PROG_RUN_ARRAY(array, ctx, func, true) + __BPF_PROG_RUN_ARRAY(array, ctx, func, true, false) #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); -- cgit v1.2.3 From e6a4750ffe9d701c4d55212b14b615e63571d235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 8 Mar 2021 12:29:06 +0100 Subject: bpf, xdp: Make bpf_redirect_map() a map operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the bpf_redirect_map() implementation dispatches to the correct map-lookup function via a switch-statement. To avoid the dispatching, this change adds bpf_redirect_map() as a map operation. Each map provides its bpf_redirect_map() version, and correct function is automatically selected by the BPF verifier. A nice side-effect of the code movement is that the map lookup functions are now local to the map implementation files, which removes one additional function call. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20210308112907.559576-2-bjorn.topel@gmail.com --- include/linux/bpf.h | 26 ++++++-------------------- include/linux/filter.h | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c931bc97019d..a25730eaa148 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -118,6 +118,9 @@ struct bpf_map_ops { void *owner, u32 size); struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner); + /* Misc helpers.*/ + int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags); + /* map_meta_equal must be implemented for maps that can be * used as an inner map. It is a runtime check to ensure * an inner map can be inserted to an outer map. @@ -1450,9 +1453,9 @@ struct btf *bpf_get_btf_vmlinux(void); /* Map specifics */ struct xdp_buff; struct sk_buff; +struct bpf_dtab_netdev; +struct bpf_cpu_map_entry; -struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); -struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key); void __dev_flush(void); int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, struct net_device *dev_rx); @@ -1462,7 +1465,6 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, struct bpf_prog *xdp_prog); bool dev_map_can_have_prog(struct bpf_map *map); -struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); void __cpu_map_flush(void); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); @@ -1593,17 +1595,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } -static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, - u32 key) -{ - return NULL; -} - -static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map, - u32 key) -{ - return NULL; -} static inline bool dev_map_can_have_prog(struct bpf_map *map) { return false; @@ -1615,6 +1606,7 @@ static inline void __dev_flush(void) struct xdp_buff; struct bpf_dtab_netdev; +struct bpf_cpu_map_entry; static inline int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, @@ -1639,12 +1631,6 @@ static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, return 0; } -static inline -struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) -{ - return NULL; -} - static inline void __cpu_map_flush(void) { } diff --git a/include/linux/filter.h b/include/linux/filter.h index 3b00fc906ccd..008691fd3b58 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1472,4 +1472,31 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, } #endif /* IS_ENABLED(CONFIG_IPV6) */ +static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags, + void *lookup_elem(struct bpf_map *map, u32 key)) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + + /* Lower bits of the flags are used as return code on lookup failure */ + if (unlikely(flags > XDP_TX)) + return XDP_ABORTED; + + ri->tgt_value = lookup_elem(map, ifindex); + if (unlikely(!ri->tgt_value)) { + /* If the lookup fails we want to clear out the state in the + * redirect_info struct completely, so that if an eBPF program + * performs multiple lookups, the last one always takes + * precedence. + */ + WRITE_ONCE(ri->map, NULL); + return flags; + } + + ri->flags = flags; + ri->tgt_index = ifindex; + WRITE_ONCE(ri->map, map); + + return XDP_REDIRECT; +} + #endif /* __LINUX_FILTER_H__ */ -- cgit v1.2.3 From ee75aef23afe6e88497151c127c13ed69f41aaa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 8 Mar 2021 12:29:07 +0100 Subject: bpf, xdp: Restructure redirect actions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The XDP_REDIRECT implementations for maps and non-maps are fairly similar, but obviously need to take different code paths depending on if the target is using a map or not. Today, the redirect targets for XDP either uses a map, or is based on ifindex. Here, the map type and id are added to bpf_redirect_info, instead of the actual map. Map type, map item/ifindex, and the map_id (if any) is passed to xdp_do_redirect(). For ifindex-based redirect, used by the bpf_redirect() XDP BFP helper, a special map type/id are used. Map type of UNSPEC together with map id equal to INT_MAX has the special meaning of an ifindex based redirect. Note that valid map ids are 1 inclusive, INT_MAX exclusive ([1,INT_MAX[). In addition to making the code easier to follow, using explicit type and id in bpf_redirect_info has a slight positive performance impact by avoiding a pointer indirection for the map type lookup, and instead use the cacheline for bpf_redirect_info. Since the actual map is not passed via bpf_redirect_info anymore, the map lookup is only done in the BPF helper. This means that the bpf_clear_redirect_map() function can be removed. The actual map item is RCU protected. The bpf_redirect_info flags member is not used by XDP, and not read/written any more. The map member is only written to when required/used, and not unconditionally. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Reviewed-by: Maciej Fijalkowski Acked-by: Jesper Dangaard Brouer Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20210308112907.559576-3-bjorn.topel@gmail.com --- include/linux/filter.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 008691fd3b58..b2b85b2cad8e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -646,7 +646,8 @@ struct bpf_redirect_info { u32 flags; u32 tgt_index; void *tgt_value; - struct bpf_map *map; + u32 map_id; + enum bpf_map_type map_type; u32 kern_flags; struct bpf_nh_params nh; }; @@ -1488,13 +1489,14 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind * performs multiple lookups, the last one always takes * precedence. */ - WRITE_ONCE(ri->map, NULL); + ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */ + ri->map_type = BPF_MAP_TYPE_UNSPEC; return flags; } - ri->flags = flags; ri->tgt_index = ifindex; - WRITE_ONCE(ri->map, map); + ri->map_id = map->id; + ri->map_type = map->map_type; return XDP_REDIRECT; } -- cgit v1.2.3 From 924a9bc362a5223cd448ca08c3dde21235adc310 Mon Sep 17 00:00:00 2001 From: Balazs Nemeth Date: Tue, 9 Mar 2021 12:31:00 +0100 Subject: net: check if protocol extracted by virtio_net_hdr_set_proto is correct For gso packets, virtio_net_hdr_set_proto sets the protocol (if it isn't set) based on the type in the virtio net hdr, but the skb could contain anything since it could come from packet_snd through a raw socket. If there is a mismatch between what virtio_net_hdr_set_proto sets and the actual protocol, then the skb could be handled incorrectly later on. An example where this poses an issue is with the subsequent call to skb_flow_dissect_flow_keys_basic which relies on skb->protocol being set correctly. A specially crafted packet could fool skb_flow_dissect_flow_keys_basic preventing EINVAL to be returned. Avoid blindly trusting the information provided by the virtio net header by checking that the protocol in the packet actually matches the protocol set by virtio_net_hdr_set_proto. Note that since the protocol is only checked if skb->dev implements header_ops->parse_protocol, packets from devices without the implementation are not checked at this stage. Fixes: 9274124f023b ("net: stricter validation of untrusted gso packets") Signed-off-by: Balazs Nemeth Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index e8a924eeea3d..6b5fcfa1e555 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -79,8 +79,13 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (gso_type && skb->network_header) { struct flow_keys_basic keys; - if (!skb->protocol) + if (!skb->protocol) { + __be16 protocol = dev_parse_header_protocol(skb); + virtio_net_hdr_set_proto(skb, hdr); + if (protocol && protocol != skb->protocol) + return -EINVAL; + } retry: if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, NULL, 0, 0, 0, -- cgit v1.2.3 From eec73529a9321616ed13cf732cd21a17eb1a2836 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 10 Mar 2021 08:16:40 +0530 Subject: arch_topology: Rename freq_scale as arch_freq_scale Rename freq_scale to a less generic name, as it will get exported soon for modules. Since x86 already names its own implementation of this as arch_freq_scale, lets stick to that. Suggested-by: Will Deacon Signed-off-by: Viresh Kumar --- include/linux/arch_topology.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 0f6cd6b73a61..583af517f123 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -23,11 +23,11 @@ static inline unsigned long topology_get_cpu_scale(int cpu) void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); -DECLARE_PER_CPU(unsigned long, freq_scale); +DECLARE_PER_CPU(unsigned long, arch_freq_scale); static inline unsigned long topology_get_freq_scale(int cpu) { - return per_cpu(freq_scale, cpu); + return per_cpu(arch_freq_scale, cpu); } void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, -- cgit v1.2.3 From 01e055c120a46e78650b5f903088badbbdaae9ad Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 10 Mar 2021 08:21:04 +0530 Subject: arch_topology: Allow multiple entities to provide sched_freq_tick() callback This patch attempts to make it generic enough so other parts of the kernel can also provide their own implementation of scale_freq_tick() callback, which is called by the scheduler periodically to update the per-cpu arch_freq_scale variable. The implementations now need to provide 'struct scale_freq_data' for the CPUs for which they have hardware counters available, and a callback gets registered for each possible CPU in a per-cpu variable. The arch specific (or ARM AMU) counters are updated to adapt to this and they take the highest priority if they are available, i.e. they will be used instead of CPPC based counters for example. The special code to rebuild the sched domains, in case invariance status change for the system, is moved out of arm64 specific code and is added to arch_topology.c. Note that this also defines SCALE_FREQ_SOURCE_CPUFREQ but doesn't use it and it is added to show that cpufreq is also acts as source of information for FIE and will be used by default if no other counters are supported for a platform. Reviewed-by: Ionela Voinescu Tested-by: Ionela Voinescu Acked-by: Will Deacon # for arm64 Tested-by: Vincent Guittot Signed-off-by: Viresh Kumar --- include/linux/arch_topology.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 583af517f123..11e555cfaecb 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -34,7 +34,19 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, unsigned long max_freq); bool topology_scale_freq_invariant(void); -bool arch_freq_counters_available(const struct cpumask *cpus); +enum scale_freq_source { + SCALE_FREQ_SOURCE_CPUFREQ = 0, + SCALE_FREQ_SOURCE_ARCH, +}; + +struct scale_freq_data { + enum scale_freq_source source; + void (*set_freq_scale)(void); +}; + +void topology_scale_freq_tick(void); +void topology_set_scale_freq_source(struct scale_freq_data *data, const struct cpumask *cpus); +void topology_clear_scale_freq_source(enum scale_freq_source source, const struct cpumask *cpus); DECLARE_PER_CPU(unsigned long, thermal_pressure); -- cgit v1.2.3 From 51f24030358bdeeb9e75a38618dd029c5a53beeb Mon Sep 17 00:00:00 2001 From: Shuo Liu Date: Sun, 21 Feb 2021 21:43:38 +0800 Subject: cpu/hotplug: Fix build error of using {add,remove}_cpu() with !CONFIG_SMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 279dcf693ac7 ("virt: acrn: Introduce an interface for Service VM to control vCPU") introduced {add,remove}_cpu() usage and it hit below error with !CONFIG_SMP: ../drivers/virt/acrn/hsm.c: In function ‘remove_cpu_store’: ../drivers/virt/acrn/hsm.c:389:3: error: implicit declaration of function ‘remove_cpu’; [-Werror=implicit-function-declaration] remove_cpu(cpu); ../drivers/virt/acrn/hsm.c:402:2: error: implicit declaration of function ‘add_cpu’; [-Werror=implicit-function-declaration] add_cpu(cpu); Add add_cpu() function prototypes with !CONFIG_SMP and remove_cpu() with !CONFIG_HOTPLUG_CPU for such usage. Fixes: 279dcf693ac7 ("virt: acrn: Introduce an interface for Service VM to control vCPU") Cc: Stephen Rothwell Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Qais Yousef Reported-by: Randy Dunlap Reviewed-by: Qais Yousef Acked-by: Randy Dunlap # build-tested Signed-off-by: Shuo Liu Link: https://lore.kernel.org/r/20210221134339.57851-1-shuo.a.liu@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/cpu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 3aaa0687e8df..94a578a96202 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -108,6 +108,8 @@ static inline void cpu_maps_update_done(void) { } +static inline int add_cpu(unsigned int cpu) { return 0;} + #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; @@ -137,6 +139,7 @@ static inline int cpus_read_trylock(void) { return true; } static inline void lockdep_assert_cpus_held(void) { } static inline void cpu_hotplug_disable(void) { } static inline void cpu_hotplug_enable(void) { } +static inline int remove_cpu(unsigned int cpu) { return -EPERM; } static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } #endif /* !CONFIG_HOTPLUG_CPU */ -- cgit v1.2.3 From 00025161b2d931f97d72418114abf73bfb51ae4c Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 2 Mar 2021 07:21:33 +0100 Subject: PCI: remove synclink entries from pci_ids The drivers were removed in a1f714b44e34 (tty: Remove redundant synclink driver) and 3d608a591b2b (tty: Remove redundant synclinkmp driver). So remove also the PCI ID entries. Cc: Bjorn Helgaas Cc: linux-pci@vger.kernel.org Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20210302062214.29627-3-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/pci_ids.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a76ccb697bef..8a18517696c1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2065,8 +2065,6 @@ #define PCI_DEVICE_ID_EXAR_XR17V358 0x0358 #define PCI_VENDOR_ID_MICROGATE 0x13c0 -#define PCI_DEVICE_ID_MICROGATE_USC 0x0010 -#define PCI_DEVICE_ID_MICROGATE_SCA 0x0030 #define PCI_VENDOR_ID_3WARE 0x13C1 #define PCI_DEVICE_ID_3WARE_1000 0x1000 -- cgit v1.2.3 From f76edd8f7ce06cdff2fe5b6b39a49644c684a161 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 2 Mar 2021 07:21:35 +0100 Subject: tty: cyclades, remove this orphan The Cyclades driver was orphaned by commit d459883e6c54 (MAINTAINERS: remove two dead e-mail) 13 years ago. Noone stepped up to take care of them and to fix all the issues the driver has. On the top of that, there is no way to obtain the firmware for Z cards from the vendor as cyclades.com ceased to exist. So it's time to drop the driver with all its traces. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20210302062214.29627-5-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/cyclades.h | 364 ----------------------------------------------- include/linux/pci_ids.h | 8 -- 2 files changed, 372 deletions(-) delete mode 100644 include/linux/cyclades.h (limited to 'include/linux') diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h deleted file mode 100644 index 05ee0f19448a..000000000000 --- a/include/linux/cyclades.h +++ /dev/null @@ -1,364 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* $Revision: 3.0 $$Date: 1998/11/02 14:20:59 $ - * linux/include/linux/cyclades.h - * - * This file was initially written by - * Randolph Bentson and is maintained by - * Ivan Passos . - * - * This file contains the general definitions for the cyclades.c driver - *$Log: cyclades.h,v $ - *Revision 3.1 2002/01/29 11:36:16 henrique - *added throttle field on struct cyclades_port to indicate whether the - *port is throttled or not - * - *Revision 3.1 2000/04/19 18:52:52 ivan - *converted address fields to unsigned long and added fields for physical - *addresses on cyclades_card structure; - * - *Revision 3.0 1998/11/02 14:20:59 ivan - *added nports field on cyclades_card structure; - * - *Revision 2.5 1998/08/03 16:57:01 ivan - *added cyclades_idle_stats structure; - * - *Revision 2.4 1998/06/01 12:09:53 ivan - *removed closing_wait2 from cyclades_port structure; - * - *Revision 2.3 1998/03/16 18:01:12 ivan - *changes in the cyclades_port structure to get it closer to the - *standard serial port structure; - *added constants for new ioctls; - * - *Revision 2.2 1998/02/17 16:50:00 ivan - *changes in the cyclades_port structure (addition of shutdown_wait and - *chip_rev variables); - *added constants for new ioctls and for CD1400 rev. numbers. - * - *Revision 2.1 1997/10/24 16:03:00 ivan - *added rflow (which allows enabling the CD1400 special flow control - *feature) and rtsdtr_inv (which allows DTR/RTS pin inversion) to - *cyclades_port structure; - *added Alpha support - * - *Revision 2.0 1997/06/30 10:30:00 ivan - *added some new doorbell command constants related to IOCTLW and - *UART error signaling - * - *Revision 1.8 1997/06/03 15:30:00 ivan - *added constant ZFIRM_HLT - *added constant CyPCI_Ze_win ( = 2 * Cy_PCI_Zwin) - * - *Revision 1.7 1997/03/26 10:30:00 daniel - *new entries at the end of cyclades_port struct to reallocate - *variables illegally allocated within card memory. - * - *Revision 1.6 1996/09/09 18:35:30 bentson - *fold in changes for Cyclom-Z -- including structures for - *communicating with board as well modest changes to original - *structures to support new features. - * - *Revision 1.5 1995/11/13 21:13:31 bentson - *changes suggested by Michael Chastain - *to support use of this file in non-kernel applications - * - * - */ -#ifndef _LINUX_CYCLADES_H -#define _LINUX_CYCLADES_H - -#include - - -/* Per card data structure */ -struct cyclades_card { - void __iomem *base_addr; - union { - void __iomem *p9050; - struct RUNTIME_9060 __iomem *p9060; - } ctl_addr; - struct BOARD_CTRL __iomem *board_ctrl; /* cyz specific */ - int irq; - unsigned int num_chips; /* 0 if card absent, -1 if Z/PCI, else Y */ - unsigned int first_line; /* minor number of first channel on card */ - unsigned int nports; /* Number of ports in the card */ - int bus_index; /* address shift - 0 for ISA, 1 for PCI */ - int intr_enabled; /* FW Interrupt flag - 0 disabled, 1 enabled */ - u32 hw_ver; - spinlock_t card_lock; - struct cyclades_port *ports; -}; - -/*************************************** - * Memory access functions/macros * - * (required to support Alpha systems) * - ***************************************/ - -#define cy_writeb(port,val) do { writeb((val), (port)); mb(); } while (0) -#define cy_writew(port,val) do { writew((val), (port)); mb(); } while (0) -#define cy_writel(port,val) do { writel((val), (port)); mb(); } while (0) - -/* - * Statistics counters - */ -struct cyclades_icount { - __u32 cts, dsr, rng, dcd, tx, rx; - __u32 frame, parity, overrun, brk; - __u32 buf_overrun; -}; - -/* - * This is our internal structure for each serial port's state. - * - * Many fields are paralleled by the structure used by the serial_struct - * structure. - * - * For definitions of the flags field, see tty.h - */ - -struct cyclades_port { - int magic; - struct tty_port port; - struct cyclades_card *card; - union { - struct { - void __iomem *base_addr; - } cyy; - struct { - struct CH_CTRL __iomem *ch_ctrl; - struct BUF_CTRL __iomem *buf_ctrl; - } cyz; - } u; - int line; - int flags; /* defined in tty.h */ - int type; /* UART type */ - int read_status_mask; - int ignore_status_mask; - int timeout; - int xmit_fifo_size; - int cor1,cor2,cor3,cor4,cor5; - int tbpr,tco,rbpr,rco; - int baud; - int rflow; - int rtsdtr_inv; - int chip_rev; - int custom_divisor; - u8 x_char; /* to be pushed out ASAP */ - int breakon; - int breakoff; - int xmit_head; - int xmit_tail; - int xmit_cnt; - int default_threshold; - int default_timeout; - unsigned long rflush_count; - struct cyclades_monitor mon; - struct cyclades_idle_stats idle_stats; - struct cyclades_icount icount; - struct completion shutdown_wait; - int throttle; -#ifdef CONFIG_CYZ_INTR - struct timer_list rx_full_timer; -#endif -}; - -#define CLOSING_WAIT_DELAY 30*HZ -#define CY_CLOSING_WAIT_NONE ASYNC_CLOSING_WAIT_NONE -#define CY_CLOSING_WAIT_INF ASYNC_CLOSING_WAIT_INF - - -#define CyMAX_CHIPS_PER_CARD 8 -#define CyMAX_CHAR_FIFO 12 -#define CyPORTS_PER_CHIP 4 -#define CD1400_MAX_SPEED 115200 - -#define CyISA_Ywin 0x2000 - -#define CyPCI_Ywin 0x4000 -#define CyPCI_Yctl 0x80 -#define CyPCI_Zctl CTRL_WINDOW_SIZE -#define CyPCI_Zwin 0x80000 -#define CyPCI_Ze_win (2 * CyPCI_Zwin) - -#define PCI_DEVICE_ID_MASK 0x06 - -/**** CD1400 registers ****/ - -#define CD1400_REV_G 0x46 -#define CD1400_REV_J 0x48 - -#define CyRegSize 0x0400 -#define Cy_HwReset 0x1400 -#define Cy_ClrIntr 0x1800 -#define Cy_EpldRev 0x1e00 - -/* Global Registers */ - -#define CyGFRCR (0x40*2) -#define CyRevE (44) -#define CyCAR (0x68*2) -#define CyCHAN_0 (0x00) -#define CyCHAN_1 (0x01) -#define CyCHAN_2 (0x02) -#define CyCHAN_3 (0x03) -#define CyGCR (0x4B*2) -#define CyCH0_SERIAL (0x00) -#define CyCH0_PARALLEL (0x80) -#define CySVRR (0x67*2) -#define CySRModem (0x04) -#define CySRTransmit (0x02) -#define CySRReceive (0x01) -#define CyRICR (0x44*2) -#define CyTICR (0x45*2) -#define CyMICR (0x46*2) -#define CyICR0 (0x00) -#define CyICR1 (0x01) -#define CyICR2 (0x02) -#define CyICR3 (0x03) -#define CyRIR (0x6B*2) -#define CyTIR (0x6A*2) -#define CyMIR (0x69*2) -#define CyIRDirEq (0x80) -#define CyIRBusy (0x40) -#define CyIRUnfair (0x20) -#define CyIRContext (0x1C) -#define CyIRChannel (0x03) -#define CyPPR (0x7E*2) -#define CyCLOCK_20_1MS (0x27) -#define CyCLOCK_25_1MS (0x31) -#define CyCLOCK_25_5MS (0xf4) -#define CyCLOCK_60_1MS (0x75) -#define CyCLOCK_60_2MS (0xea) - -/* Virtual Registers */ - -#define CyRIVR (0x43*2) -#define CyTIVR (0x42*2) -#define CyMIVR (0x41*2) -#define CyIVRMask (0x07) -#define CyIVRRxEx (0x07) -#define CyIVRRxOK (0x03) -#define CyIVRTxOK (0x02) -#define CyIVRMdmOK (0x01) -#define CyTDR (0x63*2) -#define CyRDSR (0x62*2) -#define CyTIMEOUT (0x80) -#define CySPECHAR (0x70) -#define CyBREAK (0x08) -#define CyPARITY (0x04) -#define CyFRAME (0x02) -#define CyOVERRUN (0x01) -#define CyMISR (0x4C*2) -/* see CyMCOR_ and CyMSVR_ for bits*/ -#define CyEOSRR (0x60*2) - -/* Channel Registers */ - -#define CyLIVR (0x18*2) -#define CyMscsr (0x01) -#define CyTdsr (0x02) -#define CyRgdsr (0x03) -#define CyRedsr (0x07) -#define CyCCR (0x05*2) -/* Format 1 */ -#define CyCHAN_RESET (0x80) -#define CyCHIP_RESET (0x81) -#define CyFlushTransFIFO (0x82) -/* Format 2 */ -#define CyCOR_CHANGE (0x40) -#define CyCOR1ch (0x02) -#define CyCOR2ch (0x04) -#define CyCOR3ch (0x08) -/* Format 3 */ -#define CySEND_SPEC_1 (0x21) -#define CySEND_SPEC_2 (0x22) -#define CySEND_SPEC_3 (0x23) -#define CySEND_SPEC_4 (0x24) -/* Format 4 */ -#define CyCHAN_CTL (0x10) -#define CyDIS_RCVR (0x01) -#define CyENB_RCVR (0x02) -#define CyDIS_XMTR (0x04) -#define CyENB_XMTR (0x08) -#define CySRER (0x06*2) -#define CyMdmCh (0x80) -#define CyRxData (0x10) -#define CyTxRdy (0x04) -#define CyTxMpty (0x02) -#define CyNNDT (0x01) -#define CyCOR1 (0x08*2) -#define CyPARITY_NONE (0x00) -#define CyPARITY_0 (0x20) -#define CyPARITY_1 (0xA0) -#define CyPARITY_E (0x40) -#define CyPARITY_O (0xC0) -#define Cy_1_STOP (0x00) -#define Cy_1_5_STOP (0x04) -#define Cy_2_STOP (0x08) -#define Cy_5_BITS (0x00) -#define Cy_6_BITS (0x01) -#define Cy_7_BITS (0x02) -#define Cy_8_BITS (0x03) -#define CyCOR2 (0x09*2) -#define CyIXM (0x80) -#define CyTxIBE (0x40) -#define CyETC (0x20) -#define CyAUTO_TXFL (0x60) -#define CyLLM (0x10) -#define CyRLM (0x08) -#define CyRtsAO (0x04) -#define CyCtsAE (0x02) -#define CyDsrAE (0x01) -#define CyCOR3 (0x0A*2) -#define CySPL_CH_DRANGE (0x80) /* special character detect range */ -#define CySPL_CH_DET1 (0x40) /* enable special character detection - on SCHR4-SCHR3 */ -#define CyFL_CTRL_TRNSP (0x20) /* Flow Control Transparency */ -#define CySPL_CH_DET2 (0x10) /* Enable special character detection - on SCHR2-SCHR1 */ -#define CyREC_FIFO (0x0F) /* Receive FIFO threshold */ -#define CyCOR4 (0x1E*2) -#define CyCOR5 (0x1F*2) -#define CyCCSR (0x0B*2) -#define CyRxEN (0x80) -#define CyRxFloff (0x40) -#define CyRxFlon (0x20) -#define CyTxEN (0x08) -#define CyTxFloff (0x04) -#define CyTxFlon (0x02) -#define CyRDCR (0x0E*2) -#define CySCHR1 (0x1A*2) -#define CySCHR2 (0x1B*2) -#define CySCHR3 (0x1C*2) -#define CySCHR4 (0x1D*2) -#define CySCRL (0x22*2) -#define CySCRH (0x23*2) -#define CyLNC (0x24*2) -#define CyMCOR1 (0x15*2) -#define CyMCOR2 (0x16*2) -#define CyRTPR (0x21*2) -#define CyMSVR1 (0x6C*2) -#define CyMSVR2 (0x6D*2) -#define CyANY_DELTA (0xF0) -#define CyDSR (0x80) -#define CyCTS (0x40) -#define CyRI (0x20) -#define CyDCD (0x10) -#define CyDTR (0x02) -#define CyRTS (0x01) -#define CyPVSR (0x6F*2) -#define CyRBPR (0x78*2) -#define CyRCOR (0x7C*2) -#define CyTBPR (0x72*2) -#define CyTCOR (0x76*2) - -/* Custom Registers */ - -#define CyPLX_VER (0x3400) -#define PLX_9050 0x0b -#define PLX_9060 0x0c -#define PLX_9080 0x0d - -/***************************************************************************/ - -#endif /* _LINUX_CYCLADES_H */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 8a18517696c1..056d2074f07a 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1711,14 +1711,6 @@ #define PCI_DEVICE_ID_CRP16INTF 0x0903 #define PCI_VENDOR_ID_CYCLADES 0x120e -#define PCI_DEVICE_ID_CYCLOM_Y_Lo 0x0100 -#define PCI_DEVICE_ID_CYCLOM_Y_Hi 0x0101 -#define PCI_DEVICE_ID_CYCLOM_4Y_Lo 0x0102 -#define PCI_DEVICE_ID_CYCLOM_4Y_Hi 0x0103 -#define PCI_DEVICE_ID_CYCLOM_8Y_Lo 0x0104 -#define PCI_DEVICE_ID_CYCLOM_8Y_Hi 0x0105 -#define PCI_DEVICE_ID_CYCLOM_Z_Lo 0x0200 -#define PCI_DEVICE_ID_CYCLOM_Z_Hi 0x0201 #define PCI_DEVICE_ID_PC300_RX_2 0x0300 #define PCI_DEVICE_ID_PC300_RX_1 0x0301 #define PCI_DEVICE_ID_PC300_TE_2 0x0310 -- cgit v1.2.3 From 67b1544a55c94b62f68488d5fcbc93cca293dc32 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 2 Mar 2021 07:21:36 +0100 Subject: tty: isicom, remove this orphan The Isicom driver was orphaned by commit d86b3001a1a6 (MAINTAINERS: orphan isicom) 10 years ago. Noone stepped up to take care of them and to fix all the issues the driver has. So it's time to drop the driver with all its traces. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20210302062214.29627-6-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/isicom.h | 85 -------------------------------------------------- 1 file changed, 85 deletions(-) delete mode 100644 include/linux/isicom.h (limited to 'include/linux') diff --git a/include/linux/isicom.h b/include/linux/isicom.h deleted file mode 100644 index 7de6822d7b1a..000000000000 --- a/include/linux/isicom.h +++ /dev/null @@ -1,85 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_ISICOM_H -#define _LINUX_ISICOM_H - -#define YES 1 -#define NO 0 - -/* - * ISICOM Driver definitions ... - * - */ - -#define ISICOM_NAME "ISICom" - -/* - * PCI definitions - */ - -#define DEVID_COUNT 9 -#define VENDOR_ID 0x10b5 - -/* - * These are now officially allocated numbers - */ - -#define ISICOM_NMAJOR 112 /* normal */ -#define ISICOM_CMAJOR 113 /* callout */ -#define ISICOM_MAGIC (('M' << 8) | 'T') - -#define WAKEUP_CHARS 256 /* hard coded for now */ -#define TX_SIZE 254 - -#define BOARD_COUNT 4 -#define PORT_COUNT (BOARD_COUNT*16) - -/* character sizes */ - -#define ISICOM_CS5 0x0000 -#define ISICOM_CS6 0x0001 -#define ISICOM_CS7 0x0002 -#define ISICOM_CS8 0x0003 - -/* stop bits */ - -#define ISICOM_1SB 0x0000 -#define ISICOM_2SB 0x0004 - -/* parity */ - -#define ISICOM_NOPAR 0x0000 -#define ISICOM_ODPAR 0x0008 -#define ISICOM_EVPAR 0x0018 - -/* flow control */ - -#define ISICOM_CTSRTS 0x03 -#define ISICOM_INITIATE_XONXOFF 0x04 -#define ISICOM_RESPOND_XONXOFF 0x08 - -#define BOARD(line) (((line) >> 4) & 0x3) - - /* isi kill queue bitmap */ - -#define ISICOM_KILLTX 0x01 -#define ISICOM_KILLRX 0x02 - - /* isi_board status bitmap */ - -#define FIRMWARE_LOADED 0x0001 -#define BOARD_ACTIVE 0x0002 -#define BOARD_INIT 0x0004 - - /* isi_port status bitmap */ - -#define ISI_CTS 0x1000 -#define ISI_DSR 0x2000 -#define ISI_RI 0x4000 -#define ISI_DCD 0x8000 -#define ISI_DTR 0x0100 -#define ISI_RTS 0x0200 - - -#define ISI_TXOK 0x0001 - -#endif /* ISICOM_H */ -- cgit v1.2.3 From 3b00b6af7a5bd7fd7e5189ccaad0e0cfb7dc7785 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 2 Mar 2021 07:21:37 +0100 Subject: tty: rocket, remove the driver While the driver is still marked as maintained in MAINTAINERS, Comtrol does not really care about this ancient driver. They are still manufacturing serial devices, but those are controlled only by out-of-tree drivers. Comtrol didn't answer my pings, so this driver is apparently unmaintained. Aside from that, the driver was untouched for years, only whole-tree changes happened during the past years. The driver needs much more care, so drop it for now. If someone steps up to reintroduce it, they need to clean it up first. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20210302062214.29627-7-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/pci_ids.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 056d2074f07a..4c3fa5293d76 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1688,27 +1688,6 @@ #define PCI_VENDOR_ID_MICROSEMI 0x11f8 #define PCI_VENDOR_ID_RP 0x11fe -#define PCI_DEVICE_ID_RP32INTF 0x0001 -#define PCI_DEVICE_ID_RP8INTF 0x0002 -#define PCI_DEVICE_ID_RP16INTF 0x0003 -#define PCI_DEVICE_ID_RP4QUAD 0x0004 -#define PCI_DEVICE_ID_RP8OCTA 0x0005 -#define PCI_DEVICE_ID_RP8J 0x0006 -#define PCI_DEVICE_ID_RP4J 0x0007 -#define PCI_DEVICE_ID_RP8SNI 0x0008 -#define PCI_DEVICE_ID_RP16SNI 0x0009 -#define PCI_DEVICE_ID_RPP4 0x000A -#define PCI_DEVICE_ID_RPP8 0x000B -#define PCI_DEVICE_ID_RP4M 0x000D -#define PCI_DEVICE_ID_RP2_232 0x000E -#define PCI_DEVICE_ID_RP2_422 0x000F -#define PCI_DEVICE_ID_URP32INTF 0x0801 -#define PCI_DEVICE_ID_URP8INTF 0x0802 -#define PCI_DEVICE_ID_URP16INTF 0x0803 -#define PCI_DEVICE_ID_URP8OCTA 0x0805 -#define PCI_DEVICE_ID_UPCI_RM3_8PORT 0x080C -#define PCI_DEVICE_ID_UPCI_RM3_4PORT 0x080D -#define PCI_DEVICE_ID_CRP16INTF 0x0903 #define PCI_VENDOR_ID_CYCLADES 0x120e #define PCI_DEVICE_ID_PC300_RX_2 0x0300 -- cgit v1.2.3 From 981b22b8777df7de070be1803f6d7ed4f634a43c Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 2 Mar 2021 07:21:38 +0100 Subject: tty: remove TTY_LDISC_MAGIC First, it is never checked. Second, use of it as a debugging aid is at least questionable. With the current tools, I don't think anyone used this kind of thing for debugging purposes for years. On the top of that, e.g. serdev does not set this field of tty_ldisc_ops at all. So get rid of this legacy. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20210302062214.29627-8-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_ldisc.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index 572a07976116..31284b55bd4f 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -173,7 +173,6 @@ extern int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, struct tty_ldisc_ops { - int magic; char *name; int num; int flags; @@ -218,8 +217,6 @@ struct tty_ldisc { struct tty_struct *tty; }; -#define TTY_LDISC_MAGIC 0x5403 - #define LDISC_FLAG_DEFINED 0x00000001 #define MODULE_ALIAS_LDISC(ldisc) \ -- cgit v1.2.3 From a872ab4d6d191cca1ce84b945e394bd6a8d84dd9 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 2 Mar 2021 07:22:05 +0100 Subject: tty: let tty_unregister_driver return void Now that noone checks the return value, switch the return type of tty_unregister_driver to void. We can do that as we always return zero. Generally, drivers are not allowed to call tty_unregister_driver while there are open devices. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20210302062214.29627-35-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 95fc2f100f12..51f56e5ec955 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -482,7 +482,7 @@ extern void stop_tty(struct tty_struct *tty); extern void __start_tty(struct tty_struct *tty); extern void start_tty(struct tty_struct *tty); extern int tty_register_driver(struct tty_driver *driver); -extern int tty_unregister_driver(struct tty_driver *driver); +extern void tty_unregister_driver(struct tty_driver *driver); extern struct device *tty_register_device(struct tty_driver *driver, unsigned index, struct device *dev); extern struct device *tty_register_device_attr(struct tty_driver *driver, -- cgit v1.2.3 From fcbba344907afe26da487f1ed0b0e285c06a547b Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Fri, 5 Mar 2021 06:38:59 +0900 Subject: tty: serial: samsung_tty: Add support for Apple UARTs Apple SoCs are a distant descendant of Samsung designs and use yet another variant of their UART style, with different interrupt handling. In particular, this variant has the following differences with existing ones: * It includes a built-in interrupt controller with different registers, using only a single platform IRQ * Internal interrupt sources are treated as edge-triggered, even though the IRQ output is level-triggered. This chiefly affects the TX IRQ path: the driver can no longer rely on the TX buffer empty IRQ immediately firing after TX is enabled, but instead must prime the FIFO with data directly. Signed-off-by: Hector Martin Reviewed-by: Krzysztof Kozlowski Tested-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20210304213902.83903-25-marcan@marcan.st Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_s3c.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_s3c.h b/include/linux/serial_s3c.h index ca2c5393dc6b..f6c3323fc4c5 100644 --- a/include/linux/serial_s3c.h +++ b/include/linux/serial_s3c.h @@ -246,6 +246,22 @@ S5PV210_UFCON_TXTRIG4 | \ S5PV210_UFCON_RXTRIG4) +#define APPLE_S5L_UCON_RXTO_ENA 9 +#define APPLE_S5L_UCON_RXTHRESH_ENA 12 +#define APPLE_S5L_UCON_TXTHRESH_ENA 13 +#define APPLE_S5L_UCON_RXTO_ENA_MSK (1 << APPLE_S5L_UCON_RXTO_ENA) +#define APPLE_S5L_UCON_RXTHRESH_ENA_MSK (1 << APPLE_S5L_UCON_RXTHRESH_ENA) +#define APPLE_S5L_UCON_TXTHRESH_ENA_MSK (1 << APPLE_S5L_UCON_TXTHRESH_ENA) + +#define APPLE_S5L_UCON_DEFAULT (S3C2410_UCON_TXIRQMODE | \ + S3C2410_UCON_RXIRQMODE | \ + S3C2410_UCON_RXFIFO_TOI) + +#define APPLE_S5L_UTRSTAT_RXTHRESH (1<<4) +#define APPLE_S5L_UTRSTAT_TXTHRESH (1<<5) +#define APPLE_S5L_UTRSTAT_RXTO (1<<9) +#define APPLE_S5L_UTRSTAT_ALL_FLAGS (0x3f0) + #ifndef __ASSEMBLY__ #include -- cgit v1.2.3 From 92d1e87e627a6e5c9c5111adb95b6eae8df124c7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 26 Feb 2021 11:23:56 +0100 Subject: USB: remove usb_bus_type from usb.h We have 2 forward declarations of usb_bus_type, one in the system-wide usb.h and the other in the "USB core only header file". This variable is not exported from the USB core, so remove the declaration from usb.h as it does not need to be there. Cc: Thomas Zimmermann Cc: linux-usb@vger.kernel.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/20210226102356.716746-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 7d72c4e0713c..f15fd0fa95bd 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1257,8 +1257,6 @@ struct usb_device_driver { #define to_usb_device_driver(d) container_of(d, struct usb_device_driver, \ drvwrap.driver) -extern struct bus_type usb_bus_type; - /** * struct usb_class_driver - identifies a USB driver that wants to use the USB major number * @name: the usb class device name for this driver. Will show up in sysfs. -- cgit v1.2.3 From fb95c7cf5600b7b74412f27dfb39a1e13fd8a90d Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Mon, 8 Mar 2021 10:52:05 +0800 Subject: usb: common: add function to get interval expressed in us unit Add a new function to convert bInterval into the time expressed in 1us unit. Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/25c8a09b055f716c1e5bf11fea72c3418f844482.1615170625.git.chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/ch9.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index abdd310c77f0..74debc824645 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -90,6 +90,9 @@ extern enum usb_ssp_rate usb_get_maximum_ssp_rate(struct device *dev); */ extern const char *usb_state_string(enum usb_device_state state); +unsigned int usb_decode_interval(const struct usb_endpoint_descriptor *epd, + enum usb_device_speed speed); + #ifdef CONFIG_TRACING /** * usb_decode_ctrl - Returns human readable representation of control request. -- cgit v1.2.3 From 365038f24b3e9d2b7c9e499f03f432040e28a35c Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Mon, 8 Mar 2021 10:52:07 +0800 Subject: usb: common: move function's kerneldoc next to its definition Following a general rule, add the kerneldoc for a function next to it's definition, but not next to its declaration in a header file. Suggested-by: Alan Stern Suggested-by: Greg Kroah-Hartman Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/c4d2e010ae2bf67cdfa0b55e6d1deb9339d9d3dc.1615170625.git.chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/ch9.h | 61 ------------------------------------------------- 1 file changed, 61 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index 74debc824645..1cffa34740b0 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -45,76 +45,15 @@ enum usb_ssp_rate { USB_SSP_GEN_2x2, }; -/** - * usb_ep_type_string() - Returns human readable-name of the endpoint type. - * @ep_type: The endpoint type to return human-readable name for. If it's not - * any of the types: USB_ENDPOINT_XFER_{CONTROL, ISOC, BULK, INT}, - * usually got by usb_endpoint_type(), the string 'unknown' will be returned. - */ extern const char *usb_ep_type_string(int ep_type); - -/** - * usb_speed_string() - Returns human readable-name of the speed. - * @speed: The speed to return human-readable name for. If it's not - * any of the speeds defined in usb_device_speed enum, string for - * USB_SPEED_UNKNOWN will be returned. - */ extern const char *usb_speed_string(enum usb_device_speed speed); - -/** - * usb_get_maximum_speed - Get maximum requested speed for a given USB - * controller. - * @dev: Pointer to the given USB controller device - * - * The function gets the maximum speed string from property "maximum-speed", - * and returns the corresponding enum usb_device_speed. - */ extern enum usb_device_speed usb_get_maximum_speed(struct device *dev); - -/** - * usb_get_maximum_ssp_rate - Get the signaling rate generation and lane count - * of a SuperSpeed Plus capable device. - * @dev: Pointer to the given USB controller device - * - * If the string from "maximum-speed" property is super-speed-plus-genXxY where - * 'X' is the generation number and 'Y' is the number of lanes, then this - * function returns the corresponding enum usb_ssp_rate. - */ extern enum usb_ssp_rate usb_get_maximum_ssp_rate(struct device *dev); - -/** - * usb_state_string - Returns human readable name for the state. - * @state: The state to return a human-readable name for. If it's not - * any of the states devices in usb_device_state_string enum, - * the string UNKNOWN will be returned. - */ extern const char *usb_state_string(enum usb_device_state state); - unsigned int usb_decode_interval(const struct usb_endpoint_descriptor *epd, enum usb_device_speed speed); #ifdef CONFIG_TRACING -/** - * usb_decode_ctrl - Returns human readable representation of control request. - * @str: buffer to return a human-readable representation of control request. - * This buffer should have about 200 bytes. - * @size: size of str buffer. - * @bRequestType: matches the USB bmRequestType field - * @bRequest: matches the USB bRequest field - * @wValue: matches the USB wValue field (CPU byte order) - * @wIndex: matches the USB wIndex field (CPU byte order) - * @wLength: matches the USB wLength field (CPU byte order) - * - * Function returns decoded, formatted and human-readable description of - * control request packet. - * - * The usage scenario for this is for tracepoints, so function as a return - * use the same value as in parameters. This approach allows to use this - * function in TP_printk - * - * Important: wValue, wIndex, wLength parameters before invoking this function - * should be processed by le16_to_cpu macro. - */ extern const char *usb_decode_ctrl(char *str, size_t size, __u8 bRequestType, __u8 bRequest, __u16 wValue, __u16 wIndex, __u16 wLength); -- cgit v1.2.3 From 60a35ba9141f06b67150ce3544bc595d049b0d83 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 3 Mar 2021 23:32:20 -0300 Subject: usb: usb-mx2: Remove unused file i.MX21 support has been dropped, so remove such unused file. Signed-off-by: Fabio Estevam Link: https://lore.kernel.org/r/20210304023220.2362407-1-festevam@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/usb-mx2.h | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 include/linux/platform_data/usb-mx2.h (limited to 'include/linux') diff --git a/include/linux/platform_data/usb-mx2.h b/include/linux/platform_data/usb-mx2.h deleted file mode 100644 index 97a670f3d8fb..000000000000 --- a/include/linux/platform_data/usb-mx2.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2009 Martin Fuzzey - */ - -#ifndef __ASM_ARCH_MX21_USBH -#define __ASM_ARCH_MX21_USBH - -enum mx21_usbh_xcvr { - /* Values below as used by hardware (HWMODE register) */ - MX21_USBXCVR_TXDIF_RXDIF = 0, - MX21_USBXCVR_TXDIF_RXSE = 1, - MX21_USBXCVR_TXSE_RXDIF = 2, - MX21_USBXCVR_TXSE_RXSE = 3, -}; - -struct mx21_usbh_platform_data { - enum mx21_usbh_xcvr host_xcvr; /* tranceiver mode host 1,2 ports */ - enum mx21_usbh_xcvr otg_xcvr; /* tranceiver mode otg (as host) port */ - u16 enable_host1:1, - enable_host2:1, - enable_otg_host:1, /* enable "OTG" port (as host) */ - host1_xcverless:1, /* traceiverless host1 port */ - host1_txenoe:1, /* output enable host1 transmit enable */ - otg_ext_xcvr:1, /* external tranceiver for OTG port */ - unused:10; -}; - -#endif /* __ASM_ARCH_MX21_USBH */ -- cgit v1.2.3 From d5b0e0677bfd5efd17c5bbb00156931f0d41cb85 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 8 Mar 2021 09:38:12 +0100 Subject: u64_stats,lockdep: Fix u64_stats_init() vs lockdep Jakub reported that: static struct net_device *rtl8139_init_board(struct pci_dev *pdev) { ... u64_stats_init(&tp->rx_stats.syncp); u64_stats_init(&tp->tx_stats.syncp); ... } results in lockdep getting confused between the RX and TX stats lock. This is because u64_stats_init() is an inline calling seqcount_init(), which is a macro using a static variable to generate a lockdep class. By wrapping that in an inline, we negate the effect of the macro and fold the static key variable, hence the confusion. Fix by also making u64_stats_init() a macro for the case where it matters, leaving the other case an inline for argument validation etc. Reported-by: Jakub Kicinski Debugged-by: "Ahmed S. Darwish" Signed-off-by: Peter Zijlstra (Intel) Tested-by: "Erhard F." Link: https://lkml.kernel.org/r/YEXicy6+9MksdLZh@hirez.programming.kicks-ass.net --- include/linux/u64_stats_sync.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index c6abb79501b3..e81856c0ba13 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -115,12 +115,13 @@ static inline void u64_stats_inc(u64_stats_t *p) } #endif +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) +#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) +#else static inline void u64_stats_init(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - seqcount_init(&syncp->seq); -#endif } +#endif static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { -- cgit v1.2.3 From 4817a52b306136c8b2b2271d8770401441e4cf79 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Mar 2021 15:21:18 +0100 Subject: seqlock,lockdep: Fix seqcount_latch_init() seqcount_init() must be a macro in order to preserve the static variable that is used for the lockdep key. Don't then wrap it in an inline function, which destroys that. Luckily there aren't many users of this function, but fix it before it becomes a problem. Fixes: 80793c3471d9 ("seqlock: Introduce seqcount_latch_t") Reported-by: Eric Dumazet Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/YEeFEbNUVkZaXDp4@hirez.programming.kicks-ass.net --- include/linux/seqlock.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 2f7bb92b4c9e..f61e34fbaaea 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -664,10 +664,7 @@ typedef struct { * seqcount_latch_init() - runtime initializer for seqcount_latch_t * @s: Pointer to the seqcount_latch_t instance */ -static inline void seqcount_latch_init(seqcount_latch_t *s) -{ - seqcount_init(&s->seqcount); -} +#define seqcount_latch_init(s) seqcount_init(&(s)->seqcount) /** * raw_read_seqcount_latch() - pick even/odd latch data copy -- cgit v1.2.3 From d15dfd31384ba3cb93150e5f87661a76fa419f74 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 9 Mar 2021 12:26:01 +0000 Subject: arm64: mte: Map hotplugged memory as Normal Tagged In a system supporting MTE, the linear map must allow reading/writing allocation tags by setting the memory type as Normal Tagged. Currently, this is only handled for memory present at boot. Hotplugged memory uses Normal non-Tagged memory. Introduce pgprot_mhp() for hotplugged memory and use it in add_memory_resource(). The arm64 code maps pgprot_mhp() to pgprot_tagged(). Note that ZONE_DEVICE memory should not be mapped as Tagged and therefore setting the memory type in arch_add_memory() is not feasible. Signed-off-by: Catalin Marinas Fixes: 0178dc761368 ("arm64: mte: Use Normal Tagged attributes for the linear map") Reported-by: Patrick Daly Tested-by: Patrick Daly Link: https://lore.kernel.org/r/1614745263-27827-1-git-send-email-pdaly@codeaurora.org Cc: # 5.10.x Cc: Will Deacon Cc: Andrew Morton Cc: Vincenzo Frascino Cc: David Hildenbrand Reviewed-by: David Hildenbrand Reviewed-by: Vincenzo Frascino Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20210309122601.5543-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- include/linux/pgtable.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index cdfc4e9f253e..5e772392a379 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -904,6 +904,10 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, #define pgprot_device pgprot_noncached #endif +#ifndef pgprot_mhp +#define pgprot_mhp(prot) (prot) +#endif + #ifdef CONFIG_MMU #ifndef pgprot_modify #define pgprot_modify pgprot_modify -- cgit v1.2.3 From b1b3ced389795d2671e88dd3e9e07a48dc9632fc Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 10 Mar 2021 10:06:05 +0200 Subject: mfd: Support ROHM BD9576MUF and BD9573MUF Add core support for ROHM BD9576MUF and BD9573MUF PMICs which are mainly used to power the R-Car series processors. Signed-off-by: Matti Vaittinen Signed-off-by: Lee Jones --- include/linux/mfd/rohm-bd957x.h | 59 ++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/rohm-generic.h | 2 ++ 2 files changed, 61 insertions(+) create mode 100644 include/linux/mfd/rohm-bd957x.h (limited to 'include/linux') diff --git a/include/linux/mfd/rohm-bd957x.h b/include/linux/mfd/rohm-bd957x.h new file mode 100644 index 000000000000..a631abb2c101 --- /dev/null +++ b/include/linux/mfd/rohm-bd957x.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (C) 2021 ROHM Semiconductors */ + +#ifndef __LINUX_MFD_BD957X_H__ +#define __LINUX_MFD_BD957X_H__ + +enum { + BD957X_VD50, + BD957X_VD18, + BD957X_VDDDR, + BD957X_VD10, + BD957X_VOUTL1, + BD957X_VOUTS1, +}; + +#define BD957X_REG_SMRB_ASSERT 0x15 +#define BD957X_REG_PMIC_INTERNAL_STAT 0x20 +#define BD957X_REG_INT_THERM_STAT 0x23 +#define BD957X_REG_INT_THERM_MASK 0x24 +#define BD957X_REG_INT_OVP_STAT 0x25 +#define BD957X_REG_INT_SCP_STAT 0x26 +#define BD957X_REG_INT_OCP_STAT 0x27 +#define BD957X_REG_INT_OVD_STAT 0x28 +#define BD957X_REG_INT_UVD_STAT 0x29 +#define BD957X_REG_INT_UVP_STAT 0x2a +#define BD957X_REG_INT_SYS_STAT 0x2b +#define BD957X_REG_INT_SYS_MASK 0x2c +#define BD957X_REG_INT_MAIN_STAT 0x30 +#define BD957X_REG_INT_MAIN_MASK 0x31 + +#define BD957X_REG_WDT_CONF 0x16 + +#define BD957X_REG_POW_TRIGGER1 0x41 +#define BD957X_REG_POW_TRIGGER2 0x42 +#define BD957X_REG_POW_TRIGGER3 0x43 +#define BD957X_REG_POW_TRIGGER4 0x44 +#define BD957X_REG_POW_TRIGGERL1 0x45 +#define BD957X_REG_POW_TRIGGERS1 0x46 + +#define BD957X_REGULATOR_EN_MASK 0xff +#define BD957X_REGULATOR_DIS_VAL 0xff + +#define BD957X_VSEL_REG_MASK 0xff + +#define BD957X_MASK_VOUT1_TUNE 0x87 +#define BD957X_MASK_VOUT2_TUNE 0x87 +#define BD957X_MASK_VOUT3_TUNE 0x1f +#define BD957X_MASK_VOUT4_TUNE 0x1f +#define BD957X_MASK_VOUTL1_TUNE 0x87 + +#define BD957X_REG_VOUT1_TUNE 0x50 +#define BD957X_REG_VOUT2_TUNE 0x53 +#define BD957X_REG_VOUT3_TUNE 0x56 +#define BD957X_REG_VOUT4_TUNE 0x59 +#define BD957X_REG_VOUTL1_TUNE 0x5c + +#define BD957X_MAX_REGISTER 0x61 + +#endif diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h index 66f673c35303..ac6787464004 100644 --- a/include/linux/mfd/rohm-generic.h +++ b/include/linux/mfd/rohm-generic.h @@ -14,6 +14,8 @@ enum rohm_chip_type { ROHM_CHIP_TYPE_BD71828, ROHM_CHIP_TYPE_BD9571, ROHM_CHIP_TYPE_BD9574, + ROHM_CHIP_TYPE_BD9576, + ROHM_CHIP_TYPE_BD9573, ROHM_CHIP_TYPE_AMOUNT }; -- cgit v1.2.3 From 0e9692607f94ecc59aedc0ecfd2348124c743412 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 10 Mar 2021 10:08:02 +0200 Subject: mfd: bd9576: Add IRQ support BD9573 and BD9576 support set of "protection" interrupts for "fatal" issues. Those lead to SOC reset as PMIC shuts the power outputs. Thus there is no relevant IRQ handling for them. Few "detection" interrupts were added to the BD9576 with the idea that SOC could take some recovery-action before error gets unrecoverable. Unfortunately the BD9576 interrupt logic was not re-evaluated. IRQs are not designed to be properly acknowleged - and IRQ line is kept active for whole duration of error condition (in comparison to informing only about state change). For above reason, do not consider missing IRQ as error. Signed-off-by: Matti Vaittinen Signed-off-by: Lee Jones --- include/linux/mfd/rohm-bd957x.h | 62 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/rohm-bd957x.h b/include/linux/mfd/rohm-bd957x.h index a631abb2c101..ddb396ff2da5 100644 --- a/include/linux/mfd/rohm-bd957x.h +++ b/include/linux/mfd/rohm-bd957x.h @@ -13,6 +13,55 @@ enum { BD957X_VOUTS1, }; +/* + * The BD9576 has own IRQ 'blocks' for: + * - I2C/thermal, + * - Over voltage protection + * - Short-circuit protection + * - Over current protection + * - Over voltage detection + * - Under voltage detection + * - Under voltage protection + * - 'system interrupt'. + * + * Each of the blocks have a status register giving more accurate IRQ source + * information - for example which of the regulators have over-voltage. + * + * On top of this, there is "main IRQ" status register where each bit indicates + * which of sub-blocks have active IRQs. Fine. That would fit regmap-irq main + * status handling. Except that: + * - Only some sub-IRQs can be masked. + * - The IRQ informs us about fault-condition, not when fault state changes. + * The IRQ line it is kept asserted until the detected condition is acked + * AND cleared in HW. This is annoying for IRQs like the one informing high + * temperature because if IRQ is not disabled it keeps the CPU in IRQ + * handling loop. + * + * For now we do just use the main-IRQ register as source for our IRQ + * information and bind the regmap-irq to this. We leave fine-grained sub-IRQ + * register handling to handlers in sub-devices. The regulator driver shall + * read which regulators are source for problem - or if the detected error is + * regulator temperature error. The sub-drivers do also handle masking of "sub- + * IRQs" if this is supported/needed. + * + * To overcome the problem with HW keeping IRQ asserted we do call + * disable_irq_nosync() from sub-device handler and add a delayed work to + * re-enable IRQ roughly 1 second later. This should keep our CPU out of + * busy-loop. + */ +#define IRQS_SILENT_MS 1000 + +enum { + BD9576_INT_THERM, + BD9576_INT_OVP, + BD9576_INT_SCP, + BD9576_INT_OCP, + BD9576_INT_OVD, + BD9576_INT_UVD, + BD9576_INT_UVP, + BD9576_INT_SYS, +}; + #define BD957X_REG_SMRB_ASSERT 0x15 #define BD957X_REG_PMIC_INTERNAL_STAT 0x20 #define BD957X_REG_INT_THERM_STAT 0x23 @@ -28,6 +77,19 @@ enum { #define BD957X_REG_INT_MAIN_STAT 0x30 #define BD957X_REG_INT_MAIN_MASK 0x31 +#define UVD_IRQ_VALID_MASK 0x6F +#define OVD_IRQ_VALID_MASK 0x2F + +#define BD957X_MASK_INT_MAIN_THERM BIT(0) +#define BD957X_MASK_INT_MAIN_OVP BIT(1) +#define BD957X_MASK_INT_MAIN_SCP BIT(2) +#define BD957X_MASK_INT_MAIN_OCP BIT(3) +#define BD957X_MASK_INT_MAIN_OVD BIT(4) +#define BD957X_MASK_INT_MAIN_UVD BIT(5) +#define BD957X_MASK_INT_MAIN_UVP BIT(6) +#define BD957X_MASK_INT_MAIN_SYS BIT(7) +#define BD957X_MASK_INT_ALL 0xff + #define BD957X_REG_WDT_CONF 0x16 #define BD957X_REG_POW_TRIGGER1 0x41 -- cgit v1.2.3 From 42fc191d60e6d5fd8c52e7afa8bccdc912947ce4 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 10 Mar 2021 10:11:44 +0200 Subject: mfd: bd9576: Add safety limit/monitoring registers ROHM BD9576 contains safety features like over/under voltage detection, over curren detection and over temperature detection. Add the configuration register information. Signed-off-by: Matti Vaittinen Signed-off-by: Lee Jones --- include/linux/mfd/rohm-bd957x.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/rohm-bd957x.h b/include/linux/mfd/rohm-bd957x.h index ddb396ff2da5..acc920b64f75 100644 --- a/include/linux/mfd/rohm-bd957x.h +++ b/include/linux/mfd/rohm-bd957x.h @@ -116,6 +116,25 @@ enum { #define BD957X_REG_VOUT4_TUNE 0x59 #define BD957X_REG_VOUTL1_TUNE 0x5c +#define BD9576_REG_VOUT1_OVD 0x51 +#define BD9576_REG_VOUT1_UVD 0x52 +#define BD9576_REG_VOUT2_OVD 0x54 +#define BD9576_REG_VOUT2_UVD 0x55 +#define BD9576_REG_VOUT3_OVD 0x57 +#define BD9576_REG_VOUT3_UVD 0x58 +#define BD9576_REG_VOUT4_OVD 0x5a +#define BD9576_REG_VOUT4_UVD 0x5b +#define BD9576_REG_VOUTL1_OVD 0x5d +#define BD9576_REG_VOUTL1_UVD 0x5e + +#define BD9576_MASK_XVD 0x7f + +#define BD9576_REG_VOUT1S_OCW 0x5f +#define BD9576_REG_VOUT1S_OCP 0x60 + +#define BD9576_MASK_VOUT1S_OCW 0x3f +#define BD9576_MASK_VOUT1S_OCP 0x3f + #define BD957X_MAX_REGISTER 0x61 #endif -- cgit v1.2.3 From eceae583930666a69ab805eee8e81f9699bf6930 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sun, 24 Jan 2021 22:41:23 +0100 Subject: mfd: Add base driver for Netronix embedded controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Netronix embedded controller is a microcontroller found in some e-book readers designed by the original design manufacturer Netronix, Inc. It contains RTC, battery monitoring, system power management, and PWM functionality. This driver implements register access and version detection. Third-party hardware documentation is available at: https://github.com/neuschaefer/linux/wiki/Netronix-MSP430-embedded-controller The EC supports interrupts, but the driver doesn't make use of them so far. Signed-off-by: Jonathan Neuschäfer Signed-off-by: Lee Jones --- include/linux/mfd/ntxec.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 include/linux/mfd/ntxec.h (limited to 'include/linux') diff --git a/include/linux/mfd/ntxec.h b/include/linux/mfd/ntxec.h new file mode 100644 index 000000000000..361204d125f1 --- /dev/null +++ b/include/linux/mfd/ntxec.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2020 Jonathan Neuschäfer + * + * Register access and version information for the Netronix embedded + * controller. + */ + +#ifndef NTXEC_H +#define NTXEC_H + +#include + +struct device; +struct regmap; + +struct ntxec { + struct device *dev; + struct regmap *regmap; +}; + +/* + * Some registers, such as the battery status register (0x41), are in + * big-endian, but others only have eight significant bits, which are in the + * first byte transmitted over I2C (the MSB of the big-endian value). + * This convenience function converts an 8-bit value to 16-bit for use in the + * second kind of register. + */ +static inline __be16 ntxec_reg8(u8 value) +{ + return value << 8; +} + +/* Known firmware versions */ +#define NTXEC_VERSION_KOBO_AURA 0xd726 /* found in Kobo Aura */ + +#endif -- cgit v1.2.3 From f7cb7fe34db9f32e8b1c13ecc823112480b875f8 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Tue, 26 Jan 2021 11:55:59 +0200 Subject: mfd: Add MFD driver for ATC260x PMICs Add initial support for the Actions Semi ATC260x PMICs which integrates Audio Codec, Power management, Clock generation and GPIO controller blocks. For the moment this driver only supports Regulator, Poweroff and Onkey functionalities for the ATC2603C and ATC2609A chip variants. Since the PMICs can be accessed using both I2C and SPI buses, the following driver structure has been adopted: -----> atc260x-core.c (Implements core functionalities) / ATC260x --------> atc260x-i2c.c (Implements I2C interface) \ -----> atc260x-spi.c (Implements SPI interface - TODO) Co-developed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam Signed-off-by: Cristian Ciocaltea Signed-off-by: Lee Jones --- include/linux/mfd/atc260x/atc2603c.h | 281 ++++++++++++++++++++++++++++++++ include/linux/mfd/atc260x/atc2609a.h | 308 +++++++++++++++++++++++++++++++++++ include/linux/mfd/atc260x/core.h | 58 +++++++ 3 files changed, 647 insertions(+) create mode 100644 include/linux/mfd/atc260x/atc2603c.h create mode 100644 include/linux/mfd/atc260x/atc2609a.h create mode 100644 include/linux/mfd/atc260x/core.h (limited to 'include/linux') diff --git a/include/linux/mfd/atc260x/atc2603c.h b/include/linux/mfd/atc260x/atc2603c.h new file mode 100644 index 000000000000..07ac640ef3e1 --- /dev/null +++ b/include/linux/mfd/atc260x/atc2603c.h @@ -0,0 +1,281 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * ATC2603C PMIC register definitions + * + * Copyright (C) 2020 Cristian Ciocaltea + */ + +#ifndef __LINUX_MFD_ATC260X_ATC2603C_H +#define __LINUX_MFD_ATC260X_ATC2603C_H + +enum atc2603c_irq_def { + ATC2603C_IRQ_AUDIO = 0, + ATC2603C_IRQ_OV, + ATC2603C_IRQ_OC, + ATC2603C_IRQ_OT, + ATC2603C_IRQ_UV, + ATC2603C_IRQ_ALARM, + ATC2603C_IRQ_ONOFF, + ATC2603C_IRQ_SGPIO, + ATC2603C_IRQ_IR, + ATC2603C_IRQ_REMCON, + ATC2603C_IRQ_POWER_IN, +}; + +/* PMU Registers */ +#define ATC2603C_PMU_SYS_CTL0 0x00 +#define ATC2603C_PMU_SYS_CTL1 0x01 +#define ATC2603C_PMU_SYS_CTL2 0x02 +#define ATC2603C_PMU_SYS_CTL3 0x03 +#define ATC2603C_PMU_SYS_CTL4 0x04 +#define ATC2603C_PMU_SYS_CTL5 0x05 +#define ATC2603C_PMU_SYS_CTL6 0x06 +#define ATC2603C_PMU_SYS_CTL7 0x07 +#define ATC2603C_PMU_SYS_CTL8 0x08 +#define ATC2603C_PMU_SYS_CTL9 0x09 +#define ATC2603C_PMU_BAT_CTL0 0x0A +#define ATC2603C_PMU_BAT_CTL1 0x0B +#define ATC2603C_PMU_VBUS_CTL0 0x0C +#define ATC2603C_PMU_VBUS_CTL1 0x0D +#define ATC2603C_PMU_WALL_CTL0 0x0E +#define ATC2603C_PMU_WALL_CTL1 0x0F +#define ATC2603C_PMU_SYS_PENDING 0x10 +#define ATC2603C_PMU_DC1_CTL0 0x11 +#define ATC2603C_PMU_DC1_CTL1 0x12 // Undocumented +#define ATC2603C_PMU_DC1_CTL2 0x13 // Undocumented +#define ATC2603C_PMU_DC2_CTL0 0x14 +#define ATC2603C_PMU_DC2_CTL1 0x15 // Undocumented +#define ATC2603C_PMU_DC2_CTL2 0x16 // Undocumented +#define ATC2603C_PMU_DC3_CTL0 0x17 +#define ATC2603C_PMU_DC3_CTL1 0x18 // Undocumented +#define ATC2603C_PMU_DC3_CTL2 0x19 // Undocumented +#define ATC2603C_PMU_DC4_CTL0 0x1A // Undocumented +#define ATC2603C_PMU_DC4_CTL1 0x1B // Undocumented +#define ATC2603C_PMU_DC5_CTL0 0x1C // Undocumented +#define ATC2603C_PMU_DC5_CTL1 0x1D // Undocumented +#define ATC2603C_PMU_LDO1_CTL 0x1E +#define ATC2603C_PMU_LDO2_CTL 0x1F +#define ATC2603C_PMU_LDO3_CTL 0x20 +#define ATC2603C_PMU_LDO4_CTL 0x21 // Undocumented +#define ATC2603C_PMU_LDO5_CTL 0x22 +#define ATC2603C_PMU_LDO6_CTL 0x23 +#define ATC2603C_PMU_LDO7_CTL 0x24 +#define ATC2603C_PMU_LDO8_CTL 0x25 // Undocumented +#define ATC2603C_PMU_LDO9_CTL 0x26 // Undocumented +#define ATC2603C_PMU_LDO10_CTL 0x27 // Undocumented +#define ATC2603C_PMU_LDO11_CTL 0x28 +#define ATC2603C_PMU_SWITCH_CTL 0x29 +#define ATC2603C_PMU_OV_CTL0 0x2A +#define ATC2603C_PMU_OV_CTL1 0x2B +#define ATC2603C_PMU_OV_STATUS 0x2C +#define ATC2603C_PMU_OV_EN 0x2D +#define ATC2603C_PMU_OV_INT_EN 0x2E +#define ATC2603C_PMU_OC_CTL 0x2F +#define ATC2603C_PMU_OC_STATUS 0x30 +#define ATC2603C_PMU_OC_EN 0x31 +#define ATC2603C_PMU_OC_INT_EN 0x32 +#define ATC2603C_PMU_UV_CTL0 0x33 +#define ATC2603C_PMU_UV_CTL1 0x34 +#define ATC2603C_PMU_UV_STATUS 0x35 +#define ATC2603C_PMU_UV_EN 0x36 +#define ATC2603C_PMU_UV_INT_EN 0x37 +#define ATC2603C_PMU_OT_CTL 0x38 +#define ATC2603C_PMU_CHARGER_CTL0 0x39 +#define ATC2603C_PMU_CHARGER_CTL1 0x3A +#define ATC2603C_PMU_CHARGER_CTL2 0x3B +#define ATC2603C_PMU_BAKCHARGER_CTL 0x3C // Undocumented +#define ATC2603C_PMU_APDS_CTL 0x3D +#define ATC2603C_PMU_AUXADC_CTL0 0x3E +#define ATC2603C_PMU_AUXADC_CTL1 0x3F +#define ATC2603C_PMU_BATVADC 0x40 +#define ATC2603C_PMU_BATIADC 0x41 +#define ATC2603C_PMU_WALLVADC 0x42 +#define ATC2603C_PMU_WALLIADC 0x43 +#define ATC2603C_PMU_VBUSVADC 0x44 +#define ATC2603C_PMU_VBUSIADC 0x45 +#define ATC2603C_PMU_SYSPWRADC 0x46 +#define ATC2603C_PMU_REMCONADC 0x47 +#define ATC2603C_PMU_SVCCADC 0x48 +#define ATC2603C_PMU_CHGIADC 0x49 +#define ATC2603C_PMU_IREFADC 0x4A +#define ATC2603C_PMU_BAKBATADC 0x4B +#define ATC2603C_PMU_ICTEMPADC 0x4C +#define ATC2603C_PMU_AUXADC0 0x4D +#define ATC2603C_PMU_AUXADC1 0x4E +#define ATC2603C_PMU_AUXADC2 0x4F +#define ATC2603C_PMU_ICMADC 0x50 +#define ATC2603C_PMU_BDG_CTL 0x51 // Undocumented +#define ATC2603C_RTC_CTL 0x52 +#define ATC2603C_RTC_MSALM 0x53 +#define ATC2603C_RTC_HALM 0x54 +#define ATC2603C_RTC_YMDALM 0x55 +#define ATC2603C_RTC_MS 0x56 +#define ATC2603C_RTC_H 0x57 +#define ATC2603C_RTC_DC 0x58 +#define ATC2603C_RTC_YMD 0x59 +#define ATC2603C_EFUSE_DAT 0x5A // Undocumented +#define ATC2603C_EFUSECRTL1 0x5B // Undocumented +#define ATC2603C_EFUSECRTL2 0x5C // Undocumented +#define ATC2603C_PMU_FW_USE0 0x5D // Undocumented +#define ATC2603C_PMU_FW_USE1 0x5E // Undocumented +#define ATC2603C_PMU_FW_USE2 0x5F // Undocumented +#define ATC2603C_PMU_FW_USE3 0x60 // Undocumented +#define ATC2603C_PMU_FW_USE4 0x61 // Undocumented +#define ATC2603C_PMU_ABNORMAL_STATUS 0x62 +#define ATC2603C_PMU_WALL_APDS_CTL 0x63 +#define ATC2603C_PMU_REMCON_CTL0 0x64 +#define ATC2603C_PMU_REMCON_CTL1 0x65 +#define ATC2603C_PMU_MUX_CTL0 0x66 +#define ATC2603C_PMU_SGPIO_CTL0 0x67 +#define ATC2603C_PMU_SGPIO_CTL1 0x68 +#define ATC2603C_PMU_SGPIO_CTL2 0x69 +#define ATC2603C_PMU_SGPIO_CTL3 0x6A +#define ATC2603C_PMU_SGPIO_CTL4 0x6B +#define ATC2603C_PWMCLK_CTL 0x6C +#define ATC2603C_PWM0_CTL 0x6D +#define ATC2603C_PWM1_CTL 0x6E +#define ATC2603C_PMU_ADC_DBG0 0x70 +#define ATC2603C_PMU_ADC_DBG1 0x71 +#define ATC2603C_PMU_ADC_DBG2 0x72 +#define ATC2603C_PMU_ADC_DBG3 0x73 +#define ATC2603C_PMU_ADC_DBG4 0x74 +#define ATC2603C_IRC_CTL 0x80 +#define ATC2603C_IRC_STAT 0x81 +#define ATC2603C_IRC_CC 0x82 +#define ATC2603C_IRC_KDC 0x83 +#define ATC2603C_IRC_WK 0x84 +#define ATC2603C_IRC_RCC 0x85 +#define ATC2603C_IRC_FILTER 0x86 + +/* AUDIO_OUT Registers */ +#define ATC2603C_AUDIOINOUT_CTL 0xA0 +#define ATC2603C_AUDIO_DEBUGOUTCTL 0xA1 +#define ATC2603C_DAC_DIGITALCTL 0xA2 +#define ATC2603C_DAC_VOLUMECTL0 0xA3 +#define ATC2603C_DAC_ANALOG0 0xA4 +#define ATC2603C_DAC_ANALOG1 0xA5 +#define ATC2603C_DAC_ANALOG2 0xA6 +#define ATC2603C_DAC_ANALOG3 0xA7 + +/* AUDIO_IN Registers */ +#define ATC2603C_ADC_DIGITALCTL 0xA8 +#define ATC2603C_ADC_HPFCTL 0xA9 +#define ATC2603C_ADC_CTL 0xAA +#define ATC2603C_AGC_CTL0 0xAB +#define ATC2603C_AGC_CTL1 0xAC // Undocumented +#define ATC2603C_AGC_CTL2 0xAD +#define ATC2603C_ADC_ANALOG0 0xAE +#define ATC2603C_ADC_ANALOG1 0xAF + +/* PCM_IF Registers */ +#define ATC2603C_PCM0_CTL 0xB0 // Undocumented +#define ATC2603C_PCM1_CTL 0xB1 // Undocumented +#define ATC2603C_PCM2_CTL 0xB2 // Undocumented +#define ATC2603C_PCMIF_CTL 0xB3 // Undocumented + +/* CMU_CONTROL Registers */ +#define ATC2603C_CMU_DEVRST 0xC1 // Undocumented + +/* INTS Registers */ +#define ATC2603C_INTS_PD 0xC8 +#define ATC2603C_INTS_MSK 0xC9 + +/* MFP Registers */ +#define ATC2603C_MFP_CTL 0xD0 +#define ATC2603C_PAD_VSEL 0xD1 // Undocumented +#define ATC2603C_GPIO_OUTEN 0xD2 +#define ATC2603C_GPIO_INEN 0xD3 +#define ATC2603C_GPIO_DAT 0xD4 +#define ATC2603C_PAD_DRV 0xD5 +#define ATC2603C_PAD_EN 0xD6 +#define ATC2603C_DEBUG_SEL 0xD7 // Undocumented +#define ATC2603C_DEBUG_IE 0xD8 // Undocumented +#define ATC2603C_DEBUG_OE 0xD9 // Undocumented +#define ATC2603C_BIST_START 0x0A // Undocumented +#define ATC2603C_BIST_RESULT 0x0B // Undocumented +#define ATC2603C_CHIP_VER 0xDC + +/* TWSI Registers */ +#define ATC2603C_SADDR 0xFF + +/* PMU_SYS_CTL0 Register Mask Bits */ +#define ATC2603C_PMU_SYS_CTL0_IR_WK_EN BIT(5) +#define ATC2603C_PMU_SYS_CTL0_RESET_WK_EN BIT(6) +#define ATC2603C_PMU_SYS_CTL0_HDSW_WK_EN BIT(7) +#define ATC2603C_PMU_SYS_CTL0_ALARM_WK_EN BIT(8) +#define ATC2603C_PMU_SYS_CTL0_REM_CON_WK_EN BIT(9) +#define ATC2603C_PMU_SYS_CTL0_RESTART_EN BIT(10) +#define ATC2603C_PMU_SYS_CTL0_SGPIOIRQ_WK_EN BIT(11) +#define ATC2603C_PMU_SYS_CTL0_ONOFF_SHORT_WK_EN BIT(12) +#define ATC2603C_PMU_SYS_CTL0_ONOFF_LONG_WK_EN BIT(13) +#define ATC2603C_PMU_SYS_CTL0_WALL_WK_EN BIT(14) +#define ATC2603C_PMU_SYS_CTL0_USB_WK_EN BIT(15) +#define ATC2603C_PMU_SYS_CTL0_WK_ALL (GENMASK(15, 5) & (~BIT(10))) + +/* PMU_SYS_CTL1 Register Mask Bits */ +#define ATC2603C_PMU_SYS_CTL1_EN_S1 BIT(0) +#define ATC2603C_PMU_SYS_CTL1_LB_S4_EN BIT(2) +#define ATC2603C_PMU_SYS_CTL1_LB_S4 GENMASK(4, 3) +#define ATC2603C_PMU_SYS_CTL1_LB_S4_3_1V BIT(4) +#define ATC2603C_PMU_SYS_CTL1_IR_WK_FLAG BIT(5) +#define ATC2603C_PMU_SYS_CTL1_RESET_WK_FLAG BIT(6) +#define ATC2603C_PMU_SYS_CTL1_HDSW_WK_FLAG BIT(7) +#define ATC2603C_PMU_SYS_CTL1_ALARM_WK_FLAG BIT(8) +#define ATC2603C_PMU_SYS_CTL1_REM_CON_WK_FLAG BIT(9) +#define ATC2603C_PMU_SYS_CTL1_ONOFF_PRESS_RESET_IRQ_PD BIT(10) +#define ATC2603C_PMU_SYS_CTL1_SGPIOIRQ_WK_FLAG BIT(11) +#define ATC2603C_PMU_SYS_CTL1_ONOFF_SHORT_WK_FLAG BIT(12) +#define ATC2603C_PMU_SYS_CTL1_ONOFF_LONG_WK_FLAG BIT(13) +#define ATC2603C_PMU_SYS_CTL1_WALL_WK_FLAG BIT(14) +#define ATC2603C_PMU_SYS_CTL1_USB_WK_FLAG BIT(15) + +/* PMU_SYS_CTL2 Register Mask Bits */ +#define ATC2603C_PMU_SYS_CTL2_PMU_A_EN BIT(0) +#define ATC2603C_PMU_SYS_CTL2_ONOFF_PRESS_INT_EN BIT(1) +#define ATC2603C_PMU_SYS_CTL2_ONOFF_PRESS_PD BIT(2) +#define ATC2603C_PMU_SYS_CTL2_S2TIMER GENMASK(5, 3) +#define ATC2603C_PMU_SYS_CTL2_S2_TIMER_EN BIT(6) +#define ATC2603C_PMU_SYS_CTL2_ONOFF_RESET_TIME_SEL GENMASK(8, 7) +#define ATC2603C_PMU_SYS_CTL2_ONOFF_PRESS_RESET_EN BIT(9) +#define ATC2603C_PMU_SYS_CTL2_ONOFF_PRESS_TIME GENMASK(11, 10) +#define ATC2603C_PMU_SYS_CTL2_ONOFF_INT_EN BIT(12) +#define ATC2603C_PMU_SYS_CTL2_ONOFF_LONG_PRESS BIT(13) +#define ATC2603C_PMU_SYS_CTL2_ONOFF_SHORT_PRESS BIT(14) +#define ATC2603C_PMU_SYS_CTL2_ONOFF_PRESS BIT(15) + +/* PMU_SYS_CTL3 Register Mask Bits */ +#define ATC2603C_PMU_SYS_CTL3_S2S3TOS1_TIMER GENMASK(8, 7) +#define ATC2603C_PMU_SYS_CTL3_S2S3TOS1_TIMER_EN BIT(9) +#define ATC2603C_PMU_SYS_CTL3_S3_TIMER GENMASK(12, 10) +#define ATC2603C_PMU_SYS_CTL3_S3_TIMER_EN BIT(13) +#define ATC2603C_PMU_SYS_CTL3_EN_S3 BIT(14) +#define ATC2603C_PMU_SYS_CTL3_EN_S2 BIT(15) + +/* PMU_SYS_CTL5 Register Mask Bits */ +#define ATC2603C_PMU_SYS_CTL5_WALLWKDTEN BIT(7) +#define ATC2603C_PMU_SYS_CTL5_VBUSWKDTEN BIT(8) +#define ATC2603C_PMU_SYS_CTL5_REMCON_DECT_EN BIT(9) +#define ATC2603C_PMU_SYS_CTL5_ONOFF_8S_SEL BIT(10) + +/* INTS_MSK Register Mask Bits */ +#define ATC2603C_INTS_MSK_AUDIO BIT(0) +#define ATC2603C_INTS_MSK_OV BIT(1) +#define ATC2603C_INTS_MSK_OC BIT(2) +#define ATC2603C_INTS_MSK_OT BIT(3) +#define ATC2603C_INTS_MSK_UV BIT(4) +#define ATC2603C_INTS_MSK_ALARM BIT(5) +#define ATC2603C_INTS_MSK_ONOFF BIT(6) +#define ATC2603C_INTS_MSK_SGPIO BIT(7) +#define ATC2603C_INTS_MSK_IR BIT(8) +#define ATC2603C_INTS_MSK_REMCON BIT(9) +#define ATC2603C_INTS_MSK_POWERIN BIT(10) + +/* CMU_DEVRST Register Mask Bits */ +#define ATC2603C_CMU_DEVRST_MFP BIT(1) +#define ATC2603C_CMU_DEVRST_INTS BIT(2) +#define ATC2603C_CMU_DEVRST_AUDIO BIT(4) + +/* PAD_EN Register Mask Bits */ +#define ATC2603C_PAD_EN_EXTIRQ BIT(0) + +#endif /* __LINUX_MFD_ATC260X_ATC2603C_H */ diff --git a/include/linux/mfd/atc260x/atc2609a.h b/include/linux/mfd/atc260x/atc2609a.h new file mode 100644 index 000000000000..b957d7bd73e9 --- /dev/null +++ b/include/linux/mfd/atc260x/atc2609a.h @@ -0,0 +1,308 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * ATC2609A PMIC register definitions + * + * Copyright (C) 2019 Manivannan Sadhasivam + */ + +#ifndef __LINUX_MFD_ATC260X_ATC2609A_H +#define __LINUX_MFD_ATC260X_ATC2609A_H + +enum atc2609a_irq_def { + ATC2609A_IRQ_AUDIO = 0, + ATC2609A_IRQ_OV, + ATC2609A_IRQ_OC, + ATC2609A_IRQ_OT, + ATC2609A_IRQ_UV, + ATC2609A_IRQ_ALARM, + ATC2609A_IRQ_ONOFF, + ATC2609A_IRQ_WKUP, + ATC2609A_IRQ_IR, + ATC2609A_IRQ_REMCON, + ATC2609A_IRQ_POWER_IN, +}; + +/* PMU Registers */ +#define ATC2609A_PMU_SYS_CTL0 0x00 +#define ATC2609A_PMU_SYS_CTL1 0x01 +#define ATC2609A_PMU_SYS_CTL2 0x02 +#define ATC2609A_PMU_SYS_CTL3 0x03 +#define ATC2609A_PMU_SYS_CTL4 0x04 +#define ATC2609A_PMU_SYS_CTL5 0x05 +#define ATC2609A_PMU_SYS_CTL6 0x06 +#define ATC2609A_PMU_SYS_CTL7 0x07 +#define ATC2609A_PMU_SYS_CTL8 0x08 +#define ATC2609A_PMU_SYS_CTL9 0x09 +#define ATC2609A_PMU_BAT_CTL0 0x0A +#define ATC2609A_PMU_BAT_CTL1 0x0B +#define ATC2609A_PMU_VBUS_CTL0 0x0C +#define ATC2609A_PMU_VBUS_CTL1 0x0D +#define ATC2609A_PMU_WALL_CTL0 0x0E +#define ATC2609A_PMU_WALL_CTL1 0x0F +#define ATC2609A_PMU_SYS_PENDING 0x10 +#define ATC2609A_PMU_APDS_CTL0 0x11 +#define ATC2609A_PMU_APDS_CTL1 0x12 +#define ATC2609A_PMU_APDS_CTL2 0x13 +#define ATC2609A_PMU_CHARGER_CTL 0x14 +#define ATC2609A_PMU_BAKCHARGER_CTL 0x15 +#define ATC2609A_PMU_SWCHG_CTL0 0x16 +#define ATC2609A_PMU_SWCHG_CTL1 0x17 +#define ATC2609A_PMU_SWCHG_CTL2 0x18 +#define ATC2609A_PMU_SWCHG_CTL3 0x19 +#define ATC2609A_PMU_SWCHG_CTL4 0x1A +#define ATC2609A_PMU_DC_OSC 0x1B +#define ATC2609A_PMU_DC0_CTL0 0x1C +#define ATC2609A_PMU_DC0_CTL1 0x1D +#define ATC2609A_PMU_DC0_CTL2 0x1E +#define ATC2609A_PMU_DC0_CTL3 0x1F +#define ATC2609A_PMU_DC0_CTL4 0x20 +#define ATC2609A_PMU_DC0_CTL5 0x21 +#define ATC2609A_PMU_DC0_CTL6 0x22 +#define ATC2609A_PMU_DC1_CTL0 0x23 +#define ATC2609A_PMU_DC1_CTL1 0x24 +#define ATC2609A_PMU_DC1_CTL2 0x25 +#define ATC2609A_PMU_DC1_CTL3 0x26 +#define ATC2609A_PMU_DC1_CTL4 0x27 +#define ATC2609A_PMU_DC1_CTL5 0x28 +#define ATC2609A_PMU_DC1_CTL6 0x29 +#define ATC2609A_PMU_DC2_CTL0 0x2A +#define ATC2609A_PMU_DC2_CTL1 0x2B +#define ATC2609A_PMU_DC2_CTL2 0x2C +#define ATC2609A_PMU_DC2_CTL3 0x2D +#define ATC2609A_PMU_DC2_CTL4 0x2E +#define ATC2609A_PMU_DC2_CTL5 0x2F +#define ATC2609A_PMU_DC2_CTL6 0x30 +#define ATC2609A_PMU_DC3_CTL0 0x31 +#define ATC2609A_PMU_DC3_CTL1 0x32 +#define ATC2609A_PMU_DC3_CTL2 0x33 +#define ATC2609A_PMU_DC3_CTL3 0x34 +#define ATC2609A_PMU_DC3_CTL4 0x35 +#define ATC2609A_PMU_DC3_CTL5 0x36 +#define ATC2609A_PMU_DC3_CTL6 0x37 +#define ATC2609A_PMU_DC_ZR 0x38 +#define ATC2609A_PMU_LDO0_CTL0 0x39 +#define ATC2609A_PMU_LDO0_CTL1 0x3A +#define ATC2609A_PMU_LDO1_CTL0 0x3B +#define ATC2609A_PMU_LDO1_CTL1 0x3C +#define ATC2609A_PMU_LDO2_CTL0 0x3D +#define ATC2609A_PMU_LDO2_CTL1 0x3E +#define ATC2609A_PMU_LDO3_CTL0 0x3F +#define ATC2609A_PMU_LDO3_CTL1 0x40 +#define ATC2609A_PMU_LDO4_CTL0 0x41 +#define ATC2609A_PMU_LDO4_CTL1 0x42 +#define ATC2609A_PMU_LDO5_CTL0 0x43 +#define ATC2609A_PMU_LDO5_CTL1 0x44 +#define ATC2609A_PMU_LDO6_CTL0 0x45 +#define ATC2609A_PMU_LDO6_CTL1 0x46 +#define ATC2609A_PMU_LDO7_CTL0 0x47 +#define ATC2609A_PMU_LDO7_CTL1 0x48 +#define ATC2609A_PMU_LDO8_CTL0 0x49 +#define ATC2609A_PMU_LDO8_CTL1 0x4A +#define ATC2609A_PMU_LDO9_CTL 0x4B +#define ATC2609A_PMU_OV_INT_EN 0x4C +#define ATC2609A_PMU_OV_STATUS 0x4D +#define ATC2609A_PMU_UV_INT_EN 0x4E +#define ATC2609A_PMU_UV_STATUS 0x4F +#define ATC2609A_PMU_OC_INT_EN 0x50 +#define ATC2609A_PMU_OC_STATUS 0x51 +#define ATC2609A_PMU_OT_CTL 0x52 +#define ATC2609A_PMU_CM_CTL0 0x53 +#define ATC2609A_PMU_FW_USE0 0x54 +#define ATC2609A_PMU_FW_USE1 0x55 +#define ATC2609A_PMU_ADC12B_I 0x56 +#define ATC2609A_PMU_ADC12B_V 0x57 +#define ATC2609A_PMU_ADC12B_DUMMY 0x58 +#define ATC2609A_PMU_AUXADC_CTL0 0x59 +#define ATC2609A_PMU_AUXADC_CTL1 0x5A +#define ATC2609A_PMU_BATVADC 0x5B +#define ATC2609A_PMU_BATIADC 0x5C +#define ATC2609A_PMU_WALLVADC 0x5D +#define ATC2609A_PMU_WALLIADC 0x5E +#define ATC2609A_PMU_VBUSVADC 0x5F +#define ATC2609A_PMU_VBUSIADC 0x60 +#define ATC2609A_PMU_SYSPWRADC 0x61 +#define ATC2609A_PMU_REMCONADC 0x62 +#define ATC2609A_PMU_SVCCADC 0x63 +#define ATC2609A_PMU_CHGIADC 0x64 +#define ATC2609A_PMU_IREFADC 0x65 +#define ATC2609A_PMU_BAKBATADC 0x66 +#define ATC2609A_PMU_ICTEMPADC 0x67 +#define ATC2609A_PMU_AUXADC0 0x68 +#define ATC2609A_PMU_AUXADC1 0x69 +#define ATC2609A_PMU_AUXADC2 0x6A +#define ATC2609A_PMU_AUXADC3 0x6B +#define ATC2609A_PMU_ICTEMPADC_ADJ 0x6C +#define ATC2609A_PMU_BDG_CTL 0x6D +#define ATC2609A_RTC_CTL 0x6E +#define ATC2609A_RTC_MSALM 0x6F +#define ATC2609A_RTC_HALM 0x70 +#define ATC2609A_RTC_YMDALM 0x71 +#define ATC2609A_RTC_MS 0x72 +#define ATC2609A_RTC_H 0x73 +#define ATC2609A_RTC_DC 0x74 +#define ATC2609A_RTC_YMD 0x75 +#define ATC2609A_EFUSE_DAT 0x76 +#define ATC2609A_EFUSECRTL1 0x77 +#define ATC2609A_EFUSECRTL2 0x78 +#define ATC2609A_PMU_DC4_CTL0 0x79 +#define ATC2609A_PMU_DC4_CTL1 0x7A +#define ATC2609A_PMU_DC4_CTL2 0x7B +#define ATC2609A_PMU_DC4_CTL3 0x7C +#define ATC2609A_PMU_DC4_CTL4 0x7D +#define ATC2609A_PMU_DC4_CTL5 0x7E +#define ATC2609A_PMU_DC4_CTL6 0x7F +#define ATC2609A_PMU_PWR_STATUS 0x80 +#define ATC2609A_PMU_S2_PWR 0x81 +#define ATC2609A_CLMT_CTL0 0x82 +#define ATC2609A_CLMT_DATA0 0x83 +#define ATC2609A_CLMT_DATA1 0x84 +#define ATC2609A_CLMT_DATA2 0x85 +#define ATC2609A_CLMT_DATA3 0x86 +#define ATC2609A_CLMT_ADD0 0x87 +#define ATC2609A_CLMT_ADD1 0x88 +#define ATC2609A_CLMT_OCV_TABLE 0x89 +#define ATC2609A_CLMT_R_TABLE 0x8A +#define ATC2609A_PMU_PWRON_CTL0 0x8D +#define ATC2609A_PMU_PWRON_CTL1 0x8E +#define ATC2609A_PMU_PWRON_CTL2 0x8F +#define ATC2609A_IRC_CTL 0x90 +#define ATC2609A_IRC_STAT 0x91 +#define ATC2609A_IRC_CC 0x92 +#define ATC2609A_IRC_KDC 0x93 +#define ATC2609A_IRC_WK 0x94 +#define ATC2609A_IRC_RCC 0x95 + +/* AUDIO_OUT Registers */ +#define ATC2609A_AUDIOINOUT_CTL 0xA0 +#define ATC2609A_AUDIO_DEBUGOUTCTL 0xA1 +#define ATC2609A_DAC_DIGITALCTL 0xA2 +#define ATC2609A_DAC_VOLUMECTL0 0xA3 +#define ATC2609A_DAC_ANALOG0 0xA4 +#define ATC2609A_DAC_ANALOG1 0xA5 +#define ATC2609A_DAC_ANALOG2 0xA6 +#define ATC2609A_DAC_ANALOG3 0xA7 + +/* AUDIO_IN Registers */ +#define ATC2609A_ADC_DIGITALCTL 0xA8 +#define ATC2609A_ADC_HPFCTL 0xA9 +#define ATC2609A_ADC_CTL 0xAA +#define ATC2609A_AGC_CTL0 0xAB +#define ATC2609A_AGC_CTL1 0xAC +#define ATC2609A_AGC_CTL2 0xAD +#define ATC2609A_ADC_ANALOG0 0xAE +#define ATC2609A_ADC_ANALOG1 0xAF + +/* PCM_IF Registers */ +#define ATC2609A_PCM0_CTL 0xB0 +#define ATC2609A_PCM1_CTL 0xB1 +#define ATC2609A_PCM2_CTL 0xB2 +#define ATC2609A_PCMIF_CTL 0xB3 + +/* CMU_CONTROL Registers */ +#define ATC2609A_CMU_DEVRST 0xC1 + +/* INTS Registers */ +#define ATC2609A_INTS_PD 0xC8 +#define ATC2609A_INTS_MSK 0xC9 + +/* MFP Registers */ +#define ATC2609A_MFP_CTL 0xD0 +#define ATC2609A_PAD_VSEL 0xD1 +#define ATC2609A_GPIO_OUTEN 0xD2 +#define ATC2609A_GPIO_INEN 0xD3 +#define ATC2609A_GPIO_DAT 0xD4 +#define ATC2609A_PAD_DRV 0xD5 +#define ATC2609A_PAD_EN 0xD6 +#define ATC2609A_DEBUG_SEL 0xD7 +#define ATC2609A_DEBUG_IE 0xD8 +#define ATC2609A_DEBUG_OE 0xD9 +#define ATC2609A_CHIP_VER 0xDC + +/* PWSI Registers */ +#define ATC2609A_PWSI_CTL 0xF0 +#define ATC2609A_PWSI_STATUS 0xF1 + +/* TWSI Registers */ +#define ATC2609A_SADDR 0xFF + +/* PMU_SYS_CTL0 Register Mask Bits */ +#define ATC2609A_PMU_SYS_CTL0_IR_WK_EN BIT(5) +#define ATC2609A_PMU_SYS_CTL0_RESET_WK_EN BIT(6) +#define ATC2609A_PMU_SYS_CTL0_HDSW_WK_EN BIT(7) +#define ATC2609A_PMU_SYS_CTL0_ALARM_WK_EN BIT(8) +#define ATC2609A_PMU_SYS_CTL0_REM_CON_WK_EN BIT(9) +#define ATC2609A_PMU_SYS_CTL0_RESTART_EN BIT(10) +#define ATC2609A_PMU_SYS_CTL0_WKIRQ_WK_EN BIT(11) +#define ATC2609A_PMU_SYS_CTL0_ONOFF_SHORT_WK_EN BIT(12) +#define ATC2609A_PMU_SYS_CTL0_ONOFF_LONG_WK_EN BIT(13) +#define ATC2609A_PMU_SYS_CTL0_WALL_WK_EN BIT(14) +#define ATC2609A_PMU_SYS_CTL0_USB_WK_EN BIT(15) +#define ATC2609A_PMU_SYS_CTL0_WK_ALL (GENMASK(15, 5) & (~BIT(10))) + +/* PMU_SYS_CTL1 Register Mask Bits */ +#define ATC2609A_PMU_SYS_CTL1_EN_S1 BIT(0) +#define ATC2609A_PMU_SYS_CTL1_LB_S4_EN BIT(2) +#define ATC2609A_PMU_SYS_CTL1_LB_S4 GENMASK(4, 3) +#define ATC2609A_PMU_SYS_CTL1_LB_S4_3_1V BIT(4) +#define ATC2609A_PMU_SYS_CTL1_IR_WK_FLAG BIT(5) +#define ATC2609A_PMU_SYS_CTL1_RESET_WK_FLAG BIT(6) +#define ATC2609A_PMU_SYS_CTL1_HDSW_WK_FLAG BIT(7) +#define ATC2609A_PMU_SYS_CTL1_ALARM_WK_FLAG BIT(8) +#define ATC2609A_PMU_SYS_CTL1_REM_CON_WK_FLAG BIT(9) +#define ATC2609A_PMU_SYS_CTL1_RESTART_WK_FLAG BIT(10) +#define ATC2609A_PMU_SYS_CTL1_WKIRQ_WK_FLAG BIT(11) +#define ATC2609A_PMU_SYS_CTL1_ONOFF_SHORT_WK_FLAG BIT(12) +#define ATC2609A_PMU_SYS_CTL1_ONOFF_LONG_WK_FLAG BIT(13) +#define ATC2609A_PMU_SYS_CTL1_WALL_WK_FLAG BIT(14) +#define ATC2609A_PMU_SYS_CTL1_USB_WK_FLAG BIT(15) + +/* PMU_SYS_CTL2 Register Mask Bits */ +#define ATC2609A_PMU_SYS_CTL2_PMU_A_EN BIT(0) +#define ATC2609A_PMU_SYS_CTL2_ONOFF_PRESS_INT_EN BIT(1) +#define ATC2609A_PMU_SYS_CTL2_ONOFF_PRESS_PD BIT(2) +#define ATC2609A_PMU_SYS_CTL2_S2TIMER GENMASK(5, 3) +#define ATC2609A_PMU_SYS_CTL2_S2_TIMER_EN BIT(6) +#define ATC2609A_PMU_SYS_CTL2_ONOFF_RESET_TIME_SEL GENMASK(8, 7) +#define ATC2609A_PMU_SYS_CTL2_ONOFF_RESET_EN BIT(9) +#define ATC2609A_PMU_SYS_CTL2_ONOFF_PRESS_TIME GENMASK(11, 10) +#define ATC2609A_PMU_SYS_CTL2_ONOFF_LSP_INT_EN BIT(12) +#define ATC2609A_PMU_SYS_CTL2_ONOFF_LONG_PRESS BIT(13) +#define ATC2609A_PMU_SYS_CTL2_ONOFF_SHORT_PRESS BIT(14) +#define ATC2609A_PMU_SYS_CTL2_ONOFF_PRESS BIT(15) + +/* PMU_SYS_CTL3 Register Mask Bits */ +#define ATC2609A_PMU_SYS_CTL3_S2S3TOS1_TIMER GENMASK(8, 7) +#define ATC2609A_PMU_SYS_CTL3_S2S3TOS1_TIMER_EN BIT(9) +#define ATC2609A_PMU_SYS_CTL3_S3_TIMER GENMASK(12, 10) +#define ATC2609A_PMU_SYS_CTL3_S3_TIMER_EN BIT(13) +#define ATC2609A_PMU_SYS_CTL3_EN_S3 BIT(14) +#define ATC2609A_PMU_SYS_CTL3_EN_S2 BIT(15) + +/* PMU_SYS_CTL5 Register Mask Bits */ +#define ATC2609A_PMU_SYS_CTL5_WALLWKDTEN BIT(7) +#define ATC2609A_PMU_SYS_CTL5_VBUSWKDTEN BIT(8) +#define ATC2609A_PMU_SYS_CTL5_REMCON_DECT_EN BIT(9) +#define ATC2609A_PMU_SYS_CTL5_ONOFF_8S_SEL BIT(10) + +/* INTS_MSK Register Mask Bits */ +#define ATC2609A_INTS_MSK_AUDIO BIT(0) +#define ATC2609A_INTS_MSK_OV BIT(1) +#define ATC2609A_INTS_MSK_OC BIT(2) +#define ATC2609A_INTS_MSK_OT BIT(3) +#define ATC2609A_INTS_MSK_UV BIT(4) +#define ATC2609A_INTS_MSK_ALARM BIT(5) +#define ATC2609A_INTS_MSK_ONOFF BIT(6) +#define ATC2609A_INTS_MSK_WKUP BIT(7) +#define ATC2609A_INTS_MSK_IR BIT(8) +#define ATC2609A_INTS_MSK_REMCON BIT(9) +#define ATC2609A_INTS_MSK_POWERIN BIT(10) + +/* CMU_DEVRST Register Mask Bits */ +#define ATC2609A_CMU_DEVRST_AUDIO BIT(0) +#define ATC2609A_CMU_DEVRST_MFP BIT(1) +#define ATC2609A_CMU_DEVRST_INTS BIT(2) + +/* PAD_EN Register Mask Bits */ +#define ATC2609A_PAD_EN_EXTIRQ BIT(0) + +#endif /* __LINUX_MFD_ATC260X_ATC2609A_H */ diff --git a/include/linux/mfd/atc260x/core.h b/include/linux/mfd/atc260x/core.h new file mode 100644 index 000000000000..777b6c345d44 --- /dev/null +++ b/include/linux/mfd/atc260x/core.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Core MFD defines for ATC260x PMICs + * + * Copyright (C) 2019 Manivannan Sadhasivam + * Copyright (C) 2020 Cristian Ciocaltea + */ + +#ifndef __LINUX_MFD_ATC260X_CORE_H +#define __LINUX_MFD_ATC260X_CORE_H + +#include +#include + +enum atc260x_type { + ATC2603A = 0, + ATC2603C, + ATC2609A, +}; + +enum atc260x_ver { + ATC260X_A = 0, + ATC260X_B, + ATC260X_C, + ATC260X_D, + ATC260X_E, + ATC260X_F, + ATC260X_G, + ATC260X_H, +}; + +struct atc260x { + struct device *dev; + + struct regmap *regmap; + const struct regmap_irq_chip *regmap_irq_chip; + struct regmap_irq_chip_data *irq_data; + + struct mutex *regmap_mutex; /* mutex for custom regmap locking */ + + const struct mfd_cell *cells; + int nr_cells; + int irq; + + enum atc260x_type ic_type; + enum atc260x_ver ic_ver; + const char *type_name; + unsigned int rev_reg; + + const struct atc260x_init_regs *init_regs; /* regs for device init */ +}; + +struct regmap_config; + +int atc260x_match_device(struct atc260x *atc260x, struct regmap_config *regmap_cfg); +int atc260x_device_probe(struct atc260x *atc260x); + +#endif /* __LINUX_MFD_ATC260X_CORE_H */ -- cgit v1.2.3 From 98b94b6e38ca0c4eeb29949c656f6a315000c23e Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 22 Feb 2021 12:52:20 +0100 Subject: regulator: pca9450: Clear PRESET_EN bit to fix BUCK1/2/3 voltage setting The driver uses the DVS registers PCA9450_REG_BUCKxOUT_DVS0 to set the voltage for the buck regulators 1, 2 and 3. This has no effect as the PRESET_EN bit is set by default and therefore the preset values are used instead, which are set to 850 mV. To fix this we clear the PRESET_EN bit at time of initialization. Fixes: 0935ff5f1f0a ("regulator: pca9450: add pca9450 pmic driver") Cc: Signed-off-by: Frieder Schrempf Link: https://lore.kernel.org/r/20210222115229.166620-1-frieder.schrempf@kontron.de Signed-off-by: Mark Brown --- include/linux/regulator/pca9450.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h index ccdb5320a240..71902f41c919 100644 --- a/include/linux/regulator/pca9450.h +++ b/include/linux/regulator/pca9450.h @@ -147,6 +147,9 @@ enum { #define BUCK6_FPWM 0x04 #define BUCK6_ENMODE_MASK 0x03 +/* PCA9450_REG_BUCK123_PRESET_EN bit */ +#define BUCK123_PRESET_EN 0x80 + /* PCA9450_BUCK1OUT_DVS0 bits */ #define BUCK1OUT_DVS0_MASK 0x7F #define BUCK1OUT_DVS0_DEFAULT 0x14 -- cgit v1.2.3 From a0521f70aee008f8b574f13ebdf0010f2f90db62 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 3 Mar 2021 11:16:40 +0200 Subject: parport: Introduce module_parport_driver() helper macro Introduce module_parport_driver() helper macro to reduce boilerplate in the existing and new code. Signed-off-by: Andy Shevchenko Acked-by: Sudip Mukherjee Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20210303091642.23929-2-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/parport.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/parport.h b/include/linux/parport.h index f981f794c850..1c16ffb8b908 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -332,9 +332,19 @@ int __must_check __parport_register_driver(struct parport_driver *, __parport_register_driver(driver, THIS_MODULE, KBUILD_MODNAME) /* Unregister a high-level driver. */ -extern void parport_unregister_driver (struct parport_driver *); void parport_unregister_driver(struct parport_driver *); +/** + * module_parport_driver() - Helper macro for registering a modular parport driver + * @__parport_driver: struct parport_driver to be used + * + * Helper macro for parport drivers which do not do anything special in module + * init and exit. This eliminates a lot of boilerplate. Each module may only + * use this macro once, and calling it replaces module_init() and module_exit(). + */ +#define module_parport_driver(__parport_driver) \ + module_driver(__parport_driver, parport_register_driver, parport_unregister_driver) + /* If parport_register_driver doesn't fit your needs, perhaps * parport_find_xxx does. */ extern struct parport *parport_find_number (int); -- cgit v1.2.3 From 2a92c90f2ecca4475d6050f2f938a1755a8954cc Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 1 Mar 2021 17:30:12 +0300 Subject: software node: Fix device_add_software_node() The function device_add_software_node() was meant to register the node supplied to it, but only if that node wasn't already registered. Right now the function attempts to always register the node. That will cause a failure with nodes that are already registered. Fixing that by incrementing the reference count of the nodes that have already been registered, and only registering the new nodes. Also, clarifying the behaviour in the function documentation. Fixes: e68d0119e328 ("software node: Introduce device_add_software_node()") Signed-off-by: Heikki Krogerus Reviewed-by: Andy Shevchenko Tested-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index dafccfce0262..dd4687b56239 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -488,7 +488,7 @@ fwnode_create_software_node(const struct property_entry *properties, const struct fwnode_handle *parent); void fwnode_remove_software_node(struct fwnode_handle *fwnode); -int device_add_software_node(struct device *dev, const struct software_node *swnode); +int device_add_software_node(struct device *dev, const struct software_node *node); void device_remove_software_node(struct device *dev); int device_create_managed_software_node(struct device *dev, -- cgit v1.2.3 From 4d5f52838d11981c4d76cdc4e73230120de1ac85 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Tue, 9 Mar 2021 10:46:35 -0800 Subject: bus: mhi: Make firmware image optional for controller Some controllers can opt to not have MHI download a firmware image to have the device bootup and can find the device in a pass through execution environment, ready to go. Thus, MHI controllers for those devices do not need fw_image defined. Make it optional to accommodate different bootup modes. Suggested-by: Loic Poulain Signed-off-by: Bhaumik Bhatt Reviewed-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1615315595-37750-1-git-send-email-bbhatt@codeaurora.org Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index d26acc8b21cd..8f5bf409f663 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -296,7 +296,7 @@ struct mhi_controller_config { * @wake_db: MHI WAKE doorbell register address * @iova_start: IOMMU starting address for data (required) * @iova_stop: IOMMU stop address for data (required) - * @fw_image: Firmware image name for normal booting (required) + * @fw_image: Firmware image name for normal booting (optional) * @edl_image: Firmware image name for emergency download mode (optional) * @rddm_size: RAM dump size that host should allocate for debugging purpose * @sbl_size: SBL image size downloaded through BHIe (optional) -- cgit v1.2.3 From b18adee4ce4443399963826b5d28d9e63d40740c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 9 Mar 2021 19:41:25 +0000 Subject: stacktrace: Move documentation for arch_stack_walk_reliable() to header Currently arch_stack_walk_reliable() is documented with an identical comment in both x86 and S/390 implementations which is a bit redundant. Move this to the header and convert to kerneldoc while we're at it. Signed-off-by: Mark Brown Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Acked-by: Vasily Gorbik Acked-by: Randy Dunlap Link: https://lkml.kernel.org/r/20210309194125.652-1-broonie@kernel.org --- include/linux/stacktrace.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 50e2df30b0aa..9edecb494e9e 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -52,8 +52,27 @@ typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr); */ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs); + +/** + * arch_stack_walk_reliable - Architecture specific function to walk the + * stack reliably + * + * @consume_entry: Callback which is invoked by the architecture code for + * each entry. + * @cookie: Caller supplied pointer which is handed back to + * @consume_entry + * @task: Pointer to a task struct, can be NULL + * + * This function returns an error if it detects any unreliable + * features of the stack. Otherwise it guarantees that the stack + * trace is reliable. + * + * If the task is not 'current', the caller *must* ensure the task is + * inactive and its stack is pinned. + */ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task); + void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, const struct pt_regs *regs); -- cgit v1.2.3 From 4806f1e2fee84c053cb68cd5be5817170bf0aab6 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 3 Mar 2021 14:36:16 +0200 Subject: net/mlx5: Set QP timestamp mode to default QPs which don't care from timestamp mode, should set the ts_format to default, otherwise the QP creation could be failed if the timestamp mode is not supported. Fixes: 2fe8d4b87802 ("RDMA/mlx5: Fail QP creation if the device can not support the CQE TS") Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- include/linux/mlx5/qp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index d75ef8aa8fac..b7deb790f257 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -547,4 +547,11 @@ static inline const char *mlx5_qp_state_str(int state) } } +static inline int mlx5_get_qp_default_ts(struct mlx5_core_dev *dev) +{ + return !MLX5_CAP_ROCE(dev, qp_ts_format) ? + MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING : + MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT; +} + #endif /* MLX5_QP_H */ -- cgit v1.2.3 From 0bb3262c0248d44aea3be31076f44beb82a7b120 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 9 Mar 2021 17:51:35 -0800 Subject: net: socket: use BIT() for MSG_* The bit mask for MSG_* seems a little confused here. Replace it with BIT() to make it clear to understand. Signed-off-by: Menglong Dong Signed-off-by: David S. Miller --- include/linux/socket.h | 71 ++++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 385894b4a8bb..e88859f38cd0 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -283,42 +283,45 @@ struct ucred { Added those for 1003.1g not all are supported yet */ -#define MSG_OOB 1 -#define MSG_PEEK 2 -#define MSG_DONTROUTE 4 -#define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */ -#define MSG_CTRUNC 8 -#define MSG_PROBE 0x10 /* Do not send. Only probe path f.e. for MTU */ -#define MSG_TRUNC 0x20 -#define MSG_DONTWAIT 0x40 /* Nonblocking io */ -#define MSG_EOR 0x80 /* End of record */ -#define MSG_WAITALL 0x100 /* Wait for a full request */ -#define MSG_FIN 0x200 -#define MSG_SYN 0x400 -#define MSG_CONFIRM 0x800 /* Confirm path validity */ -#define MSG_RST 0x1000 -#define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */ -#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */ -#define MSG_MORE 0x8000 /* Sender will send more */ -#define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */ -#define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */ -#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */ -#define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */ -#define MSG_EOF MSG_FIN -#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */ -#define MSG_SENDPAGE_DECRYPTED 0x100000 /* sendpage() internal : page may carry - * plain text and require encryption - */ - -#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ -#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ -#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file - descriptor received through - SCM_RIGHTS */ +#define MSG_OOB BIT(0) +#define MSG_PEEK BIT(1) +#define MSG_DONTROUTE BIT(2) +#define MSG_TRYHARD BIT(2) /* Synonym for MSG_DONTROUTE for DECnet */ +#define MSG_CTRUNC BIT(3) +#define MSG_PROBE BIT(4) /* Do not send. Only probe path f.e. for MTU */ +#define MSG_TRUNC BIT(5) +#define MSG_DONTWAIT BIT(6) /* Nonblocking io */ +#define MSG_EOR BIT(7) /* End of record */ +#define MSG_WAITALL BIT(8) /* Wait for a full request */ +#define MSG_FIN BIT(9) +#define MSG_SYN BIT(10) +#define MSG_CONFIRM BIT(11) /* Confirm path validity */ +#define MSG_RST BIT(12) +#define MSG_ERRQUEUE BIT(13) /* Fetch message from error queue */ +#define MSG_NOSIGNAL BIT(14) /* Do not generate SIGPIPE */ +#define MSG_MORE BIT(15) /* Sender will send more */ +#define MSG_WAITFORONE BIT(16) /* recvmmsg(): block until 1+ packets avail */ +#define MSG_SENDPAGE_NOPOLICY BIT(16) /* sendpage() internal : do no apply policy */ +#define MSG_SENDPAGE_NOTLAST BIT(17) /* sendpage() internal : not the last page */ +#define MSG_BATCH BIT(18) /* sendmmsg(): more messages coming */ +#define MSG_EOF MSG_FIN +#define MSG_NO_SHARED_FRAGS BIT(19) /* sendpage() internal : page frags + * are not shared + */ +#define MSG_SENDPAGE_DECRYPTED BIT(20) /* sendpage() internal : page may carry + * plain text and require encryption + */ + +#define MSG_ZEROCOPY BIT(26) /* Use user data in kernel path */ +#define MSG_FASTOPEN BIT(29) /* Send data in TCP SYN */ +#define MSG_CMSG_CLOEXEC BIT(30) /* Set close_on_exec for file + * descriptor received through + * SCM_RIGHTS + */ #if defined(CONFIG_COMPAT) -#define MSG_CMSG_COMPAT 0x80000000 /* This message needs 32 bit fixups */ +#define MSG_CMSG_COMPAT BIT(31) /* This message needs 32 bit fixups */ #else -#define MSG_CMSG_COMPAT 0 /* We never have 32 bit fixups */ +#define MSG_CMSG_COMPAT 0 /* We never have 32 bit fixups */ #endif -- cgit v1.2.3 From dd4fa1dae9f4847cc1fd78ca468ad69e16e5db3e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Mar 2021 01:56:36 -0800 Subject: macvlan: macvlan_count_rx() needs to be aware of preemption macvlan_count_rx() can be called from process context, it is thus necessary to disable preemption before calling u64_stats_update_begin() syzbot was able to spot this on 32bit arch: WARNING: CPU: 1 PID: 4632 at include/linux/seqlock.h:271 __seqprop_assert include/linux/seqlock.h:271 [inline] WARNING: CPU: 1 PID: 4632 at include/linux/seqlock.h:271 __seqprop_assert.constprop.0+0xf0/0x11c include/linux/seqlock.h:269 Modules linked in: Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 4632 Comm: kworker/1:3 Not tainted 5.12.0-rc2-syzkaller #0 Hardware name: ARM-Versatile Express Workqueue: events macvlan_process_broadcast Backtrace: [<82740468>] (dump_backtrace) from [<827406dc>] (show_stack+0x18/0x1c arch/arm/kernel/traps.c:252) r7:00000080 r6:60000093 r5:00000000 r4:8422a3c4 [<827406c4>] (show_stack) from [<82751b58>] (__dump_stack lib/dump_stack.c:79 [inline]) [<827406c4>] (show_stack) from [<82751b58>] (dump_stack+0xb8/0xe8 lib/dump_stack.c:120) [<82751aa0>] (dump_stack) from [<82741270>] (panic+0x130/0x378 kernel/panic.c:231) r7:830209b4 r6:84069ea4 r5:00000000 r4:844350d0 [<82741140>] (panic) from [<80244924>] (__warn+0xb0/0x164 kernel/panic.c:605) r3:8404ec8c r2:00000000 r1:00000000 r0:830209b4 r7:0000010f [<80244874>] (__warn) from [<82741520>] (warn_slowpath_fmt+0x68/0xd4 kernel/panic.c:628) r7:81363f70 r6:0000010f r5:83018e50 r4:00000000 [<827414bc>] (warn_slowpath_fmt) from [<81363f70>] (__seqprop_assert include/linux/seqlock.h:271 [inline]) [<827414bc>] (warn_slowpath_fmt) from [<81363f70>] (__seqprop_assert.constprop.0+0xf0/0x11c include/linux/seqlock.h:269) r8:5a109000 r7:0000000f r6:a568dac0 r5:89802300 r4:00000001 [<81363e80>] (__seqprop_assert.constprop.0) from [<81364af0>] (u64_stats_update_begin include/linux/u64_stats_sync.h:128 [inline]) [<81363e80>] (__seqprop_assert.constprop.0) from [<81364af0>] (macvlan_count_rx include/linux/if_macvlan.h:47 [inline]) [<81363e80>] (__seqprop_assert.constprop.0) from [<81364af0>] (macvlan_broadcast+0x154/0x26c drivers/net/macvlan.c:291) r5:89802300 r4:8a927740 [<8136499c>] (macvlan_broadcast) from [<81365020>] (macvlan_process_broadcast+0x258/0x2d0 drivers/net/macvlan.c:317) r10:81364f78 r9:8a86d000 r8:8a9c7e7c r7:8413aa5c r6:00000000 r5:00000000 r4:89802840 [<81364dc8>] (macvlan_process_broadcast) from [<802696a4>] (process_one_work+0x2d4/0x998 kernel/workqueue.c:2275) r10:00000008 r9:8404ec98 r8:84367a02 r7:ddfe6400 r6:ddfe2d40 r5:898dac80 r4:8a86d43c [<802693d0>] (process_one_work) from [<80269dcc>] (worker_thread+0x64/0x54c kernel/workqueue.c:2421) r10:00000008 r9:8a9c6000 r8:84006d00 r7:ddfe2d78 r6:898dac94 r5:ddfe2d40 r4:898dac80 [<80269d68>] (worker_thread) from [<80271f40>] (kthread+0x184/0x1a4 kernel/kthread.c:292) r10:85247e64 r9:898dac80 r8:80269d68 r7:00000000 r6:8a9c6000 r5:89a2ee40 r4:8a97bd00 [<80271dbc>] (kthread) from [<80200114>] (ret_from_fork+0x14/0x20 arch/arm/kernel/entry-common.S:158) Exception stack(0x8a9c7fb0 to 0x8a9c7ff8) Fixes: 412ca1550cbe ("macvlan: Move broadcasts into a work queue") Signed-off-by: Eric Dumazet Cc: Herbert Xu Reported-by: syzbot Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 96556c64c95d..10c94a3936ca 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -43,13 +43,14 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, if (likely(success)) { struct vlan_pcpu_stats *pcpu_stats; - pcpu_stats = this_cpu_ptr(vlan->pcpu_stats); + pcpu_stats = get_cpu_ptr(vlan->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); pcpu_stats->rx_packets++; pcpu_stats->rx_bytes += len; if (multicast) pcpu_stats->rx_multicast++; u64_stats_update_end(&pcpu_stats->syncp); + put_cpu_ptr(vlan->pcpu_stats); } else { this_cpu_inc(vlan->pcpu_stats->rx_errors); } -- cgit v1.2.3 From ce6ed1c4c9876c2880f52f18c41ef2a30d070bc5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 4 Mar 2021 20:37:08 +0900 Subject: kbuild: rebuild GCC plugins when the compiler is upgraded Linus reported a build error due to the GCC plugin incompatibility when the compiler is upgraded. [1] GCC plugins are tied to a particular GCC version. So, they must be rebuilt when the compiler is upgraded. This seems to be a long-standing flaw since the initial support of GCC plugins. Extend commit 8b59cd81dc5e ("kbuild: ensure full rebuild when the compiler is updated"), so that GCC plugins are covered by the compiler upgrade detection. [1]: https://lore.kernel.org/lkml/CAHk-=wieoN5ttOy7SnsGwZv+Fni3R6m-Ut=oxih6bbZ28G+4dw@mail.gmail.com/ Reported-by: Linus Torvalds Signed-off-by: Masahiro Yamada Reviewed-by: Kees Cook --- include/linux/compiler-version.h | 14 ++++++++++++++ include/linux/kconfig.h | 2 -- 2 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 include/linux/compiler-version.h (limited to 'include/linux') diff --git a/include/linux/compiler-version.h b/include/linux/compiler-version.h new file mode 100644 index 000000000000..2b2972c77c62 --- /dev/null +++ b/include/linux/compiler-version.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifdef __LINUX_COMPILER_VERSION_H +#error "Please do not include . This is done by the build system." +#endif +#define __LINUX_COMPILER_VERSION_H + +/* + * This header exists to force full rebuild when the compiler is upgraded. + * + * When fixdep scans this, it will find this string "CONFIG_CC_VERSION_TEXT" + * and add dependency on include/config/cc/version/text.h, which is touched + * by Kconfig when the version string from the compiler changes. + */ diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h index e78e17a76dc9..24a59cb06963 100644 --- a/include/linux/kconfig.h +++ b/include/linux/kconfig.h @@ -2,8 +2,6 @@ #ifndef __LINUX_KCONFIG_H #define __LINUX_KCONFIG_H -/* CONFIG_CC_VERSION_TEXT (Do not delete this comment. See help in Kconfig) */ - #include #ifdef CONFIG_CPU_BIG_ENDIAN -- cgit v1.2.3 From 2bc611844b5d2c43b63bdf71ae6395fa7a6566cc Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 27 Jan 2021 21:30:12 +0100 Subject: mtd: nand: Let ECC engines advertize the exact number of steps This is an information that might be useful for specific uses, so export it, which might avoid having to guess the number of steps when necessary. Signed-off-by: Miquel Raynal Tested-by: Adam Ford #logicpd Torpedo Link: https://lore.kernel.org/linux-mtd/20210127203020.9574-2-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 414f8a4d2853..632becb13b46 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -231,12 +231,14 @@ struct nand_ops { /** * struct nand_ecc_context - Context for the ECC engine * @conf: basic ECC engine parameters + * @nsteps: number of ECC steps * @total: total number of bytes used for storing ECC codes, this is used by * generic OOB layouts * @priv: ECC engine driver private data */ struct nand_ecc_context { struct nand_ecc_props conf; + unsigned int nsteps; unsigned int total; void *priv; }; -- cgit v1.2.3 From e3554b10babd8ee1cf43bfc840ef4657eb1d12aa Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 27 Jan 2021 21:30:15 +0100 Subject: mtd: nand: Add a helper to retrieve the number of ECC steps This operation is very common and deserves a helper. It of course only works after the ECC engine initialization. Signed-off-by: Miquel Raynal Tested-by: Adam Ford #logicpd Torpedo Link: https://lore.kernel.org/linux-mtd/20210127203020.9574-5-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 632becb13b46..8a0116396689 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -587,6 +587,16 @@ nanddev_get_ecc_conf(struct nand_device *nand) return &nand->ecc.ctx.conf; } +/** + * nanddev_get_ecc_nsteps() - Extract the number of ECC steps + * @nand: NAND device + */ +static inline unsigned int +nanddev_get_ecc_nsteps(struct nand_device *nand) +{ + return nand->ecc.ctx.nsteps; +} + /** * nanddev_get_ecc_requirements() - Extract the ECC requirements from a NAND * device -- cgit v1.2.3 From ba4a40a483da86d76bd69957c21fcb975b8405ae Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 27 Jan 2021 21:30:16 +0100 Subject: mtd: nand: Add a helper to retrieve the number of ECC bytes per step This operation is very common and deserves a helper. It of course only works after the ECC engine initialization. Signed-off-by: Miquel Raynal Tested-by: Adam Ford #logicpd Torpedo Link: https://lore.kernel.org/linux-mtd/20210127203020.9574-6-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 8a0116396689..32fc7edf65b3 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -597,6 +597,16 @@ nanddev_get_ecc_nsteps(struct nand_device *nand) return nand->ecc.ctx.nsteps; } +/** + * nanddev_get_ecc_bytes_per_step() - Extract the number of ECC bytes per step + * @nand: NAND device + */ +static inline unsigned int +nanddev_get_ecc_bytes_per_step(struct nand_device *nand) +{ + return nand->ecc.ctx.total / nand->ecc.ctx.nsteps; +} + /** * nanddev_get_ecc_requirements() - Extract the ECC requirements from a NAND * device -- cgit v1.2.3 From 3e66843c74289b294b91547edd364c5a6fdef45b Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 27 Jan 2021 21:30:19 +0100 Subject: mtd: nand: ecc-bch: Use the public nsteps field The software BCH ECC engine stores the nsteps variable in its own private structure while it is also exported as a public ECC field. Let's get rid of the redundant private one and let's use the nand_ecc_context structure when possible. Signed-off-by: Miquel Raynal Tested-by: Adam Ford #logicpd Torpedo Link: https://lore.kernel.org/linux-mtd/20210127203020.9574-9-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-bch.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-bch.h b/include/linux/mtd/nand-ecc-sw-bch.h index 22c92073b3dd..9da9969505a8 100644 --- a/include/linux/mtd/nand-ecc-sw-bch.h +++ b/include/linux/mtd/nand-ecc-sw-bch.h @@ -16,7 +16,6 @@ * @req_ctx: Save request context and tweak the original request to fit the * engine needs * @code_size: Number of bytes needed to store a code (one code per step) - * @nsteps: Number of steps * @calc_buf: Buffer to use when calculating ECC bytes * @code_buf: Buffer to use when reading (raw) ECC bytes from the chip * @bch: BCH control structure @@ -26,7 +25,6 @@ struct nand_ecc_sw_bch_conf { struct nand_ecc_req_tweak_ctx req_ctx; unsigned int code_size; - unsigned int nsteps; u8 *calc_buf; u8 *code_buf; struct bch_control *bch; -- cgit v1.2.3 From bf3816d28f0778de0d3d00a2a65525e19e5dbad2 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 27 Jan 2021 21:30:20 +0100 Subject: mtd: nand: ecc-hamming: Use the public nsteps field The software Hamming ECC engine stores the nsteps variable in its own private structure while it is also exported as a public ECC field. Let's get rid of the redundant private one and let's use the nand_ecc_context structure when possible. Signed-off-by: Miquel Raynal Tested-by: Adam Ford #logicpd Torpedo Link: https://lore.kernel.org/linux-mtd/20210127203020.9574-10-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-hamming.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-hamming.h b/include/linux/mtd/nand-ecc-sw-hamming.h index 9f9073d86ff3..c6c71894c575 100644 --- a/include/linux/mtd/nand-ecc-sw-hamming.h +++ b/include/linux/mtd/nand-ecc-sw-hamming.h @@ -17,7 +17,6 @@ * @req_ctx: Save request context and tweak the original request to fit the * engine needs * @code_size: Number of bytes needed to store a code (one code per step) - * @nsteps: Number of steps * @calc_buf: Buffer to use when calculating ECC bytes * @code_buf: Buffer to use when reading (raw) ECC bytes from the chip * @sm_order: Smart Media special ordering @@ -25,7 +24,6 @@ struct nand_ecc_sw_hamming_conf { struct nand_ecc_req_tweak_ctx req_ctx; unsigned int code_size; - unsigned int nsteps; u8 *calc_buf; u8 *code_buf; unsigned int sm_order; -- cgit v1.2.3 From 6cfeb41a825913f3dcb131d6556cc9d1c4072015 Mon Sep 17 00:00:00 2001 From: Dejin Zheng Date: Sun, 14 Feb 2021 00:45:53 +0800 Subject: mtd: Add helper macro for register_mtd_blktrans boilerplate This patch introduces the module_mtd_blktrans macro which is a convenience macro for mtd blktrans modules similar to module_platform_driver. It is intended to be used by drivers which init/exit section does nothing but register/unregister the mtd blktrans driver. By using this macro it is possible to eliminate a few lines of boilerplate code per mtd blktrans driver. Signed-off-by: Dejin Zheng Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210213164600.409061-2-zhengdejin5@gmail.com --- include/linux/mtd/blktrans.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h index 3c668cb1e344..15cc9b95e32b 100644 --- a/include/linux/mtd/blktrans.h +++ b/include/linux/mtd/blktrans.h @@ -77,5 +77,16 @@ extern int add_mtd_blktrans_dev(struct mtd_blktrans_dev *dev); extern int del_mtd_blktrans_dev(struct mtd_blktrans_dev *dev); extern int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev); +/** + * module_mtd_blktrans() - Helper macro for registering a mtd blktrans driver + * @__mtd_blktrans: mtd_blktrans_ops struct + * + * Helper macro for mtd blktrans drivers which do not do anything special in + * module init/exit. This eliminates a lot of boilerplate. Each module may only + * use this macro once, and calling it replaces module_init() and module_exit() + */ +#define module_mtd_blktrans(__mtd_blktrans) \ + module_driver(__mtd_blktrans, register_mtd_blktrans, \ + deregister_mtd_blktrans) #endif /* __MTD_TRANS_H__ */ -- cgit v1.2.3 From 1ad55288829c78e85bfe7d0c86d75415adf5f305 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Wed, 17 Feb 2021 22:18:45 +0100 Subject: mtd: char: Get rid of Big MTD Lock Get rid of central chrdev MTD lock, which prevents simultaneous operations on completely independent physical MTD chips. Replace it with newly introduced per-master mutex. Signed-off-by: Alexander Sverdlin Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210217211845.43364-2-alexander.sverdlin@nokia.com --- include/linux/mtd/mtd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 157357ec1441..ceabc2cae8a4 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -229,6 +229,7 @@ struct mtd_part { */ struct mtd_master { struct mutex partitions_lock; + struct mutex chrdev_lock; unsigned int suspended : 1; }; -- cgit v1.2.3 From 659ab7a49cbebe0deffcbe1f9560e82006b21817 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 3 Mar 2021 14:32:29 +0100 Subject: drm: Use USB controller's DMA mask when importing dmabufs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit USB devices cannot perform DMA and hence have no dma_mask set in their device structure. Therefore importing dmabuf into a USB-based driver fails, which breaks joining and mirroring of display in X11. For USB devices, pick the associated USB controller as attachment device. This allows the DRM import helpers to perform the DMA setup. If the DMA controller does not support DMA transfers, we're out of luck and cannot import. Our current USB-based DRM drivers don't use DMA, so the actual DMA device is not important. Tested by joining/mirroring displays of udl and radeon under Gnome/X11. v8: * release dmadev if device initialization fails (Noralf) * fix commit description (Noralf) v7: * fix use-before-init bug in gm12u320 (Dan) v6: * implement workaround in DRM drivers and hold reference to DMA device while USB device is in use * remove dev_is_usb() (Greg) * collapse USB helper into usb_intf_get_dma_device() (Alan) * integrate Daniel's TODO statement (Daniel) * fix typos (Greg) v5: * provide a helper for USB interfaces (Alan) * add FIXME item to documentation and TODO list (Daniel) v4: * implement workaround with USB helper functions (Greg) * use struct usb_device->bus->sysdev as DMA device (Takashi) v3: * drop gem_create_object * use DMA mask of USB controller, if any (Daniel, Christian, Noralf) v2: * move fix to importer side (Christian, Daniel) * update SHMEM and CMA helpers for new PRIME callbacks Signed-off-by: Thomas Zimmermann Fixes: 6eb0233ec2d0 ("usb: don't inherity DMA properties for USB devices") Tested-by: Pavel Machek Reviewed-by: Greg Kroah-Hartman Acked-by: Christian König Acked-by: Daniel Vetter Acked-by: Noralf Trønnes Cc: Christoph Hellwig Cc: Greg Kroah-Hartman Cc: # v5.10+ Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20210303133229.3288-1-tzimmermann@suse.de Signed-off-by: Maarten Lankhorst --- include/linux/usb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 7d72c4e0713c..d6a41841b93e 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -746,6 +746,8 @@ extern int usb_lock_device_for_reset(struct usb_device *udev, extern int usb_reset_device(struct usb_device *dev); extern void usb_queue_reset_device(struct usb_interface *dev); +extern struct device *usb_intf_get_dma_device(struct usb_interface *intf); + #ifdef CONFIG_ACPI extern int usb_acpi_set_power_state(struct usb_device *hdev, int index, bool enable); -- cgit v1.2.3 From a8affc03a9b375e19bc81573de0c9108317d78c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Mar 2021 12:01:37 +0100 Subject: block: rename BIO_MAX_PAGES to BIO_MAX_VECS Ever since the addition of multipage bio_vecs BIO_MAX_PAGES has been horribly confusingly misnamed. Rename it to BIO_MAX_VECS to stop confusing users of the bio API. Signed-off-by: Christoph Hellwig Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20210311110137.1132391-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 983ed2fe7c85..d0246c92a6e8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -20,11 +20,11 @@ #define BIO_BUG_ON #endif -#define BIO_MAX_PAGES 256U +#define BIO_MAX_VECS 256U static inline unsigned int bio_max_segs(unsigned int nr_segs) { - return min(nr_segs, BIO_MAX_PAGES); + return min(nr_segs, BIO_MAX_VECS); } #define bio_prio(bio) (bio)->bi_ioprio -- cgit v1.2.3 From b046664872dd78a8bebe3d5f3bb9da9baa93f5ca Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 11 Mar 2021 15:23:07 +0100 Subject: static_call: Move struct static_call_key definition to static_call_types.h Having the definition of static_call() in static_call_types.h makes no sense as long struct static_call_key isn't defined there, as the generic implementation of static_call() is referencing this structure. So move the definition of struct static_call_key to static_call_types.h. Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210311142319.4723-3-jgross@suse.com --- include/linux/static_call.h | 18 ------------------ include/linux/static_call_types.h | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 85ecc789f4ff..76b881259144 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -128,16 +128,6 @@ struct static_call_mod { struct static_call_site *sites; }; -struct static_call_key { - void *func; - union { - /* bit 0: 0 = mods, 1 = sites */ - unsigned long type; - struct static_call_mod *mods; - struct static_call_site *sites; - }; -}; - /* For finding the key associated with a trampoline */ struct static_call_tramp_key { s32 tramp; @@ -187,10 +177,6 @@ extern long __static_call_return0(void); static inline int static_call_init(void) { return 0; } -struct static_call_key { - void *func; -}; - #define __DEFINE_STATIC_CALL(name, _func, _func_init) \ DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = { \ @@ -243,10 +229,6 @@ static inline long __static_call_return0(void) static inline int static_call_init(void) { return 0; } -struct static_call_key { - void *func; -}; - static inline long __static_call_return0(void) { return 0; diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h index ae5662d368b9..5a00b8b2cf9f 100644 --- a/include/linux/static_call_types.h +++ b/include/linux/static_call_types.h @@ -58,11 +58,25 @@ struct static_call_site { __raw_static_call(name); \ }) +struct static_call_key { + void *func; + union { + /* bit 0: 0 = mods, 1 = sites */ + unsigned long type; + struct static_call_mod *mods; + struct static_call_site *sites; + }; +}; + #else /* !CONFIG_HAVE_STATIC_CALL_INLINE */ #define __STATIC_CALL_ADDRESSABLE(name) #define __static_call(name) __raw_static_call(name) +struct static_call_key { + void *func; +}; + #endif /* CONFIG_HAVE_STATIC_CALL_INLINE */ #ifdef MODULE @@ -77,6 +91,10 @@ struct static_call_site { #else +struct static_call_key { + void *func; +}; + #define static_call(name) \ ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func)) -- cgit v1.2.3 From 6ea312d95e0226b306bb4b8ee3a0727d880378cb Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 11 Mar 2021 15:23:08 +0100 Subject: static_call: Add function to query current function Some users of paravirtualized functions need to query which function has been specified in a pv_ops vector element. In order to be able to switch such paravirtualized functions to static_calls instead, there needs to be a function to query the function which will be called via static_call(). Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210311142319.4723-4-jgross@suse.com --- include/linux/static_call.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 76b881259144..e01b61ab86b1 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -20,6 +20,7 @@ * static_call(name)(args...); * static_call_cond(name)(args...); * static_call_update(name, func); + * static_call_query(name); * * Usage example: * @@ -91,6 +92,10 @@ * * which will include the required value tests to avoid NULL-pointer * dereferences. + * + * To query which function is currently set to be called, use: + * + * func = static_call_query(name); */ #include @@ -118,6 +123,8 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool STATIC_CALL_TRAMP_ADDR(name), func); \ }) +#define static_call_query(name) (READ_ONCE(STATIC_CALL_KEY(name).func)) + #ifdef CONFIG_HAVE_STATIC_CALL_INLINE extern int __init static_call_init(void); @@ -191,6 +198,7 @@ static inline int static_call_init(void) { return 0; } }; \ ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) + #define static_call_cond(name) (void)__static_call(name) static inline -- cgit v1.2.3 From 2cfc056ef2c28b4961bff5e2f6deed94afb14024 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sat, 6 Mar 2021 19:24:18 +0800 Subject: remoteproc: introduce is_iomem to rproc_mem_entry Introduce is_iomem to indicate this piece memory is iomem or not. Reviewed-by: Bjorn Andersson Reviewed-by: Mathieu Poirier Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/1615029865-23312-4-git-send-email-peng.fan@oss.nxp.com Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index f28ee75d1005..a5f6d2d9cde2 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -315,6 +315,7 @@ struct rproc; /** * struct rproc_mem_entry - memory entry descriptor * @va: virtual address + * @is_iomem: io memory * @dma: dma address * @len: length, in bytes * @da: device address @@ -329,6 +330,7 @@ struct rproc; */ struct rproc_mem_entry { void *va; + bool is_iomem; dma_addr_t dma; size_t len; u32 da; -- cgit v1.2.3 From 40df0a91b2a5228ded8e5f75b80d28c96c6831cd Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sat, 6 Mar 2021 19:24:19 +0800 Subject: remoteproc: add is_iomem to da_to_va Introduce an extra parameter is_iomem to da_to_va, then the caller could take the memory as normal memory or io mapped memory. Reviewed-by: Bjorn Andersson Reviewed-by: Mathieu Poirier Reported-by: kernel test robot Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/1615029865-23312-5-git-send-email-peng.fan@oss.nxp.com Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index a5f6d2d9cde2..1b7d56c7a453 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -386,7 +386,7 @@ struct rproc_ops { int (*stop)(struct rproc *rproc); int (*attach)(struct rproc *rproc); void (*kick)(struct rproc *rproc, int vqid); - void * (*da_to_va)(struct rproc *rproc, u64 da, size_t len); + void * (*da_to_va)(struct rproc *rproc, u64 da, size_t len, bool *is_iomem); int (*parse_fw)(struct rproc *rproc, const struct firmware *fw); int (*handle_rsc)(struct rproc *rproc, u32 rsc_type, void *rsc, int offset, int avail); -- cgit v1.2.3 From bade4be69a6ea6f38c5894468ede10ee60b6f7a0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Mar 2021 13:25:01 -0500 Subject: svcrdma: Revert "svcrdma: Reduce Receive doorbell rate" I tested commit 43042b90cae1 ("svcrdma: Reduce Receive doorbell rate") with mlx4 (IB) and software iWARP and didn't find any issues. However, I recently got my hardware iWARP setup back on line (FastLinQ) and it's crashing hard on this commit (confirmed via bisect). The failure mode is complex. - After a connection is established, the first Receive completes normally. - But the second and third Receives have garbage in their Receive buffers. The server responds with ERR_VERS as a result. - When the client tears down the connection to retry, a couple of posted Receives flush twice, and that corrupts the recv_ctxt free list. - __svc_rdma_free then faults or loops infinitely while destroying the xprt's recv_ctxts. Since 43042b90cae1 ("svcrdma: Reduce Receive doorbell rate") does not fix a bug but is a scalability enhancement, it's safe and appropriate to revert it while working on a replacement. Fixes: 43042b90cae1 ("svcrdma: Reduce Receive doorbell rate") Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 7c693b31965e..1e76ed688044 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -104,7 +104,6 @@ struct svcxprt_rdma { wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ unsigned long sc_flags; - u32 sc_pending_recvs; struct list_head sc_read_complete_q; struct work_struct sc_work; -- cgit v1.2.3 From e36db6a06937c6fce3291f0c362d4f757b8ec703 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 15 Feb 2021 12:40:21 +0200 Subject: iio: kfifo: add devm_iio_kfifo_buffer_setup() helper This change adds the devm_iio_kfifo_buffer_setup() helper/short-hand, which groups the simple routine of allocating a kfifo buffers via devm_iio_kfifo_allocate() and calling iio_device_attach_buffer(). The mode_flags parameter is required, as the IIO kfifo supports 2 modes: INDIO_BUFFER_SOFTWARE & INDIO_BUFFER_TRIGGERED. The setup_ops parameter is optional. This function will be a bit more useful when needing to define multiple buffers per IIO device. The naming for this function has been inspired from iio_triggered_buffer_setup() since that one does a kfifo alloc + a pollfunc alloc. So, this should have a more familiar ring to what it is. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20210215104043.91251-3-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/kfifo_buf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/kfifo_buf.h b/include/linux/iio/kfifo_buf.h index 1fc1efa7799d..92c411b9ac26 100644 --- a/include/linux/iio/kfifo_buf.h +++ b/include/linux/iio/kfifo_buf.h @@ -3,6 +3,8 @@ #define __LINUX_IIO_KFIFO_BUF_H__ struct iio_buffer; +struct iio_buffer_setup_ops; +struct iio_dev; struct device; struct iio_buffer *iio_kfifo_allocate(void); @@ -10,4 +12,9 @@ void iio_kfifo_free(struct iio_buffer *r); struct iio_buffer *devm_iio_kfifo_allocate(struct device *dev); +int devm_iio_kfifo_buffer_setup(struct device *dev, + struct iio_dev *indio_dev, + int mode_flags, + const struct iio_buffer_setup_ops *setup_ops); + #endif -- cgit v1.2.3 From 99f6e8215b627730e2e6d371430a1f81e8ed6c27 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 15 Feb 2021 12:40:24 +0200 Subject: iio: kfifo: un-export devm_iio_kfifo_allocate() function At this point all drivers should use devm_iio_kfifo_buffer_setup() instead of manually allocating via devm_iio_kfifo_allocate() and assigning ops and modes. With this change, the devm_iio_kfifo_allocate() will be made private to the IIO core, since all drivers should call either devm_iio_kfifo_buffer_setup() or devm_iio_triggered_buffer_setup() to create a kfifo buffer. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20210215104043.91251-6-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/kfifo_buf.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/kfifo_buf.h b/include/linux/iio/kfifo_buf.h index 92c411b9ac26..1522896e1daf 100644 --- a/include/linux/iio/kfifo_buf.h +++ b/include/linux/iio/kfifo_buf.h @@ -10,8 +10,6 @@ struct device; struct iio_buffer *iio_kfifo_allocate(void); void iio_kfifo_free(struct iio_buffer *r); -struct iio_buffer *devm_iio_kfifo_allocate(struct device *dev); - int devm_iio_kfifo_buffer_setup(struct device *dev, struct iio_dev *indio_dev, int mode_flags, -- cgit v1.2.3 From a02c09e42b3ed6cefae671e302835f1f04bf474e Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 15 Feb 2021 12:40:25 +0200 Subject: iio: buffer-dma,adi-axi-adc: introduce devm_iio_dmaengine_buffer_setup() This change does a conversion of the devm_iio_dmaengine_buffer_alloc() to devm_iio_dmaengine_buffer_setup(). This will allocate an IIO DMA buffer and attach it to the IIO device, similar to devm_iio_triggered_buffer_setup() (though the underlying code is different, the final logic is the same). Since the only user of the devm_iio_dmaengine_buffer_alloc() was the adi-axi-adc driver, this change does the replacement in a single go in the driver. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20210215104043.91251-7-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer-dmaengine.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer-dmaengine.h b/include/linux/iio/buffer-dmaengine.h index 5b502291d6a4..5c355be89814 100644 --- a/include/linux/iio/buffer-dmaengine.h +++ b/include/linux/iio/buffer-dmaengine.h @@ -7,10 +7,11 @@ #ifndef __IIO_DMAENGINE_H__ #define __IIO_DMAENGINE_H__ -struct iio_buffer; +struct iio_dev; struct device; -struct iio_buffer *devm_iio_dmaengine_buffer_alloc(struct device *dev, - const char *channel); +int devm_iio_dmaengine_buffer_setup(struct device *dev, + struct iio_dev *indio_dev, + const char *channel); #endif -- cgit v1.2.3 From 32f171724e5cbecc80594fb6eced057cfdd6eb6f Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 15 Feb 2021 12:40:29 +0200 Subject: iio: core: rework iio device group creation Up until now, the device groups that an IIO device had were limited to 6. Two of these groups would account for buffer attributes (the buffer/ and scan_elements/ directories). Since we want to add multiple buffers per IIO device, this number may not be enough, when adding a second buffer. So, this change reallocates the groups array whenever an IIO device group is added, via a iio_device_register_sysfs_group() helper. This also means that the groups array should be assigned to 'indio_dev.dev.groups' really late, right before {cdev_}device_add() is called to do the entire setup. And we also must take care to free this array when the sysfs resources are being cleaned up. With this change we can also move the 'groups' & 'groupcounter' fields to the iio_dev_opaque object. Up until now, this didn't make a whole lot of sense (especially since we weren't sure how multibuffer support would look like in the end). But doing it now kills one birds with one stone. An alternative, would be to add a configurable Kconfig symbol CONFIG_IIO_MAX_BUFFERS_PER_DEVICE (or something like that) and compute a static maximum of the groups we can support per IIO device. But that would probably annoy a few people since that would make the system less configurable. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20210215104043.91251-11-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio-opaque.h | 4 ++++ include/linux/iio/iio.h | 5 ----- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index 07c5a8e52ca8..8ba13a5c7af6 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -12,6 +12,8 @@ * attributes * @chan_attr_group: group for all attrs in base directory * @ioctl_handlers: ioctl handlers registered with the core handler + * @groups: attribute groups + * @groupcounter: index of next attribute group * @debugfs_dentry: device specific debugfs dentry * @cached_reg_addr: cached register address for debugfs reads * @read_buf: read buffer to be used for the initial reg read @@ -24,6 +26,8 @@ struct iio_dev_opaque { struct list_head channel_attr_list; struct attribute_group chan_attr_group; struct list_head ioctl_handlers; + const struct attribute_group **groups; + int groupcounter; #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_dentry; unsigned cached_reg_addr; diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index e4a9822e6495..f8585d01fc76 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -518,8 +518,6 @@ struct iio_buffer_setup_ops { * @setup_ops: [DRIVER] callbacks to call before and after buffer * enable/disable * @chrdev: [INTERN] associated character device - * @groups: [INTERN] attribute groups - * @groupcounter: [INTERN] index of next attribute group * @flags: [INTERN] file ops related flags including busy flag. * @priv: [DRIVER] reference to driver's private information * **MUST** be accessed **ONLY** via iio_priv() helper @@ -556,9 +554,6 @@ struct iio_dev { struct mutex info_exist_lock; const struct iio_buffer_setup_ops *setup_ops; struct cdev chrdev; -#define IIO_MAX_GROUPS 6 - const struct attribute_group *groups[IIO_MAX_GROUPS + 1]; - int groupcounter; unsigned long flags; void *priv; -- cgit v1.2.3 From d9a625744ed0e452f5c495cd8c51eed4b6623a4c Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 15 Feb 2021 12:40:31 +0200 Subject: iio: core: merge buffer/ & scan_elements/ attributes With this change, we create a new directory for the IIO device called buffer0, under which both the old buffer/ and scan_elements/ are stored. This is done to simplify the addition of multiple IIO buffers per IIO device. Otherwise we would need to add a bufferX/ and scan_elementsX/ directory for each IIO buffer. With the current way of storing attribute groups, we can't have directories stored under each other (i.e. scan_elements/ under buffer/), so the best approach moving forward is to merge their attributes. The old/legacy buffer/ & scan_elements/ groups are not stored on the opaque IIO device object. This way the IIO buffer can have just a single attribute_group object, saving a bit of memory when adding multiple IIO buffers. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20210215104043.91251-13-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer_impl.h | 9 +++------ include/linux/iio/iio-opaque.h | 4 ++++ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h index a63dc07b7350..3e555e58475b 100644 --- a/include/linux/iio/buffer_impl.h +++ b/include/linux/iio/buffer_impl.h @@ -100,14 +100,11 @@ struct iio_buffer { /* @scan_el_dev_attr_list: List of scan element related attributes. */ struct list_head scan_el_dev_attr_list; - /* @buffer_group: Attributes of the buffer group. */ - struct attribute_group buffer_group; - /* - * @scan_el_group: Attribute group for those attributes not - * created from the iio_chan_info array. + * @buffer_group: Attributes of the new buffer group. + * Includes scan elements attributes. */ - struct attribute_group scan_el_group; + struct attribute_group buffer_group; /* @attrs: Standard attributes of the buffer. */ const struct attribute **attrs; diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index 8ba13a5c7af6..1a9310b0145f 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -14,6 +14,8 @@ * @ioctl_handlers: ioctl handlers registered with the core handler * @groups: attribute groups * @groupcounter: index of next attribute group + * @legacy_scan_el_group: attribute group for legacy scan elements attribute group + * @legacy_buffer_group: attribute group for legacy buffer attributes group * @debugfs_dentry: device specific debugfs dentry * @cached_reg_addr: cached register address for debugfs reads * @read_buf: read buffer to be used for the initial reg read @@ -28,6 +30,8 @@ struct iio_dev_opaque { struct list_head ioctl_handlers; const struct attribute_group **groups; int groupcounter; + struct attribute_group legacy_scan_el_group; + struct attribute_group legacy_buffer_group; #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_dentry; unsigned cached_reg_addr; -- cgit v1.2.3 From 3e3d11b2e43b9a967d98261250c19636b893b7ed Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 15 Feb 2021 12:40:32 +0200 Subject: iio: add reference to iio buffer on iio_dev_attr This change adds a reference to a 'struct iio_buffer' object on the iio_dev_attr object. This way, we can use the created iio_dev_attr objects on per-buffer basis (since they're allocated anyway). A minor downside of this change is that the number of parameters on __iio_add_chan_devattr() grows by 1. This looks like it could do with a bit of a re-think. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20210215104043.91251-14-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/sysfs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/sysfs.h b/include/linux/iio/sysfs.h index b532c875bc24..e51fba66de4b 100644 --- a/include/linux/iio/sysfs.h +++ b/include/linux/iio/sysfs.h @@ -9,6 +9,7 @@ #ifndef _INDUSTRIAL_IO_SYSFS_H_ #define _INDUSTRIAL_IO_SYSFS_H_ +struct iio_buffer; struct iio_chan_spec; /** @@ -17,12 +18,14 @@ struct iio_chan_spec; * @address: associated register address * @l: list head for maintaining list of dynamically created attrs * @c: specification for the underlying channel + * @buffer: the IIO buffer to which this attribute belongs to (if any) */ struct iio_dev_attr { struct device_attribute dev_attr; u64 address; struct list_head l; struct iio_chan_spec const *c; + struct iio_buffer *buffer; }; #define to_iio_dev_attr(_dev_attr) \ -- cgit v1.2.3 From 15097c7a1adc0554ce8eb6f5fd6758d063bfea44 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 15 Feb 2021 12:40:33 +0200 Subject: iio: buffer: wrap all buffer attributes into iio_dev_attr This change wraps all buffer attributes into iio_dev_attr objects, and assigns a reference to the IIO buffer they belong to. With the addition of multiple IIO buffers per one IIO device, we need a way to know which IIO buffer is being enabled/disabled/controlled. We know that all buffer attributes are device_attributes. So we can wrap them with a iio_dev_attr types. In the iio_dev_attr type, we can also hold a reference to an IIO buffer. So, we end up being able to allocate wrapped attributes for all buffer attributes (even the one from other drivers). The neat part with this mechanism, is that we don't need to add any extra cleanup, because these attributes are being added to a dynamic list that will get cleaned up via iio_free_chan_devattr_list(). With this change, the 'buffer->scan_el_dev_attr_list' list is being renamed to 'buffer->buffer_attr_list', effectively merging (or finalizing the merge) of the buffer/ & scan_elements/ attributes internally. Accessing these new buffer attributes can now be done via 'to_iio_dev_attr(attr)->buffer' inside the show/store handlers. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20210215104043.91251-15-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer_impl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h index 3e555e58475b..41044320e581 100644 --- a/include/linux/iio/buffer_impl.h +++ b/include/linux/iio/buffer_impl.h @@ -97,8 +97,8 @@ struct iio_buffer { /* @scan_timestamp: Does the scan mode include a timestamp. */ bool scan_timestamp; - /* @scan_el_dev_attr_list: List of scan element related attributes. */ - struct list_head scan_el_dev_attr_list; + /* @buffer_attr_list: List of buffer attributes. */ + struct list_head buffer_attr_list; /* * @buffer_group: Attributes of the new buffer group. -- cgit v1.2.3 From ee708e6baacd3afdace9b721c25fbbe106cebb94 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 15 Feb 2021 12:40:38 +0200 Subject: iio: buffer: introduce support for attaching more IIO buffers With this change, calling iio_device_attach_buffer() will actually attach more buffers. Right now this doesn't do any validation of whether a buffer is attached twice; maybe that can be added later (if needed). Attaching a buffer more than once should yield noticeably bad results. The first buffer is the legacy buffer, so a reference is kept to it. At this point, accessing the data for the extra buffers (that are added after the first one) isn't possible yet. The iio_device_attach_buffer() is also changed to return an error code, which for now is -ENOMEM if the array could not be realloc-ed for more buffers. To adapt to this new change iio_device_attach_buffer() is called last in all place where it's called. The realloc failure is a bit difficult to handle during un-managed calls when unwinding, so it's better to have this as the last error in the setup_buffer calls. At this point, no driver should call iio_device_attach_buffer() directly, it should call one of the {devm_}iio_triggered_buffer_setup() or devm_iio_kfifo_buffer_setup() or devm_iio_dmaengine_buffer_setup() functions. This makes iio_device_attach_buffer() a bit easier to handle. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20210215104043.91251-20-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer.h | 4 ++-- include/linux/iio/buffer_impl.h | 3 +++ include/linux/iio/iio-opaque.h | 4 ++++ 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index 8febc23f5f26..b6928ac5c63d 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -41,7 +41,7 @@ static inline int iio_push_to_buffers_with_timestamp(struct iio_dev *indio_dev, bool iio_validate_scan_mask_onehot(struct iio_dev *indio_dev, const unsigned long *mask); -void iio_device_attach_buffer(struct iio_dev *indio_dev, - struct iio_buffer *buffer); +int iio_device_attach_buffer(struct iio_dev *indio_dev, + struct iio_buffer *buffer); #endif /* _IIO_BUFFER_GENERIC_H_ */ diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h index 41044320e581..768b90c64412 100644 --- a/include/linux/iio/buffer_impl.h +++ b/include/linux/iio/buffer_impl.h @@ -112,6 +112,9 @@ struct iio_buffer { /* @demux_bounce: Buffer for doing gather from incoming scan. */ void *demux_bounce; + /* @attached_entry: Entry in the devices list of buffers attached by the driver. */ + struct list_head attached_entry; + /* @buffer_list: Entry in the devices list of current buffers. */ struct list_head buffer_list; diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index 1a9310b0145f..b6ebc04af3e7 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -7,6 +7,8 @@ * struct iio_dev_opaque - industrial I/O device opaque information * @indio_dev: public industrial I/O device information * @event_interface: event chrdevs associated with interrupt lines + * @attached_buffers: array of buffers statically attached by the driver + * @attached_buffers_cnt: number of buffers in the array of statically attached buffers * @buffer_list: list of all buffers currently attached * @channel_attr_list: keep track of automatically created channel * attributes @@ -24,6 +26,8 @@ struct iio_dev_opaque { struct iio_dev indio_dev; struct iio_event_interface *event_interface; + struct iio_buffer **attached_buffers; + unsigned int attached_buffers_cnt; struct list_head buffer_list; struct list_head channel_attr_list; struct attribute_group chan_attr_group; -- cgit v1.2.3 From f73f7f4da581875f9b1f2fb8ebd1ab15ed634488 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 15 Feb 2021 12:40:39 +0200 Subject: iio: buffer: add ioctl() to support opening extra buffers for IIO device With this change, an ioctl() call is added to open a character device for a buffer. The ioctl() number is 'i' 0x91, which follows the IIO_GET_EVENT_FD_IOCTL ioctl. The ioctl() will return an FD for the requested buffer index. The indexes are the same from the /sys/iio/devices/iio:deviceX/bufferY (i.e. the Y variable). Since there doesn't seem to be a sane way to return the FD for buffer0 to be the same FD for the /dev/iio:deviceX, this ioctl() will return another FD for buffer0 (or the first buffer). This duplicate FD will be able to access the same buffer object (for buffer0) as accessing directly the /dev/iio:deviceX chardev. Also, there is no IIO_BUFFER_GET_BUFFER_COUNT ioctl() implemented, as the index for each buffer (and the count) can be deduced from the '/sys/bus/iio/devices/iio:deviceX/bufferY' folders (i.e the number of bufferY folders). Used following C code to test this: ------------------------------------------------------------------- #include #include #include #include #include #define IIO_BUFFER_GET_FD_IOCTL _IOWR('i', 0x91, int) int main(int argc, char *argv[]) { int fd; int fd1; int ret; if ((fd = open("/dev/iio:device0", O_RDWR))<0) { fprintf(stderr, "Error open() %d errno %d\n",fd, errno); return -1; } fprintf(stderr, "Using FD %d\n", fd); fd1 = atoi(argv[1]); ret = ioctl(fd, IIO_BUFFER_GET_FD_IOCTL, &fd1); if (ret < 0) { fprintf(stderr, "Error for buffer %d ioctl() %d errno %d\n", fd1, ret, errno); close(fd); return -1; } fprintf(stderr, "Got FD %d\n", fd1); close(fd1); close(fd); return 0; } ------------------------------------------------------------------- Results are: ------------------------------------------------------------------- # ./test 0 Using FD 3 Got FD 4 # ./test 1 Using FD 3 Got FD 4 # ./test 2 Using FD 3 Got FD 4 # ./test 3 Using FD 3 Got FD 4 # ls /sys/bus/iio/devices/iio\:device0 buffer buffer0 buffer1 buffer2 buffer3 dev in_voltage_sampling_frequency in_voltage_scale in_voltage_scale_available name of_node power scan_elements subsystem uevent ------------------------------------------------------------------- iio:device0 has some fake kfifo buffers attached to an IIO device. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20210215104043.91251-21-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer_impl.h | 5 +++++ include/linux/iio/iio-opaque.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h index 768b90c64412..245b32918ae1 100644 --- a/include/linux/iio/buffer_impl.h +++ b/include/linux/iio/buffer_impl.h @@ -6,6 +6,8 @@ #ifdef CONFIG_IIO_BUFFER +#include + struct iio_dev; struct iio_buffer; @@ -72,6 +74,9 @@ struct iio_buffer { /** @length: Number of datums in buffer. */ unsigned int length; + /** @flags: File ops flags including busy flag. */ + unsigned long flags; + /** @bytes_per_datum: Size of individual datum including timestamp. */ size_t bytes_per_datum; diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index b6ebc04af3e7..32addd5e790e 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -9,6 +9,7 @@ * @event_interface: event chrdevs associated with interrupt lines * @attached_buffers: array of buffers statically attached by the driver * @attached_buffers_cnt: number of buffers in the array of statically attached buffers + * @buffer_ioctl_handler: ioctl() handler for this IIO device's buffer interface * @buffer_list: list of all buffers currently attached * @channel_attr_list: keep track of automatically created channel * attributes @@ -28,6 +29,7 @@ struct iio_dev_opaque { struct iio_event_interface *event_interface; struct iio_buffer **attached_buffers; unsigned int attached_buffers_cnt; + struct iio_ioctl_handler *buffer_ioctl_handler; struct list_head buffer_list; struct list_head channel_attr_list; struct attribute_group chan_attr_group; -- cgit v1.2.3 From 0e41fd515f94dcfcc24b6e510d29528431e46f60 Mon Sep 17 00:00:00 2001 From: Ye Xiang Date: Mon, 1 Feb 2021 13:49:20 +0800 Subject: iio: hid-sensors: Move get sensitivity attribute to hid-sensor-common No functional change has been made with this patch. The main intent here is to reduce code repetition of getting sensitivity attribute. In the current implementation, sensor_hub_input_get_attribute_info() is called from multiple drivers to get attribute info for sensitivity field. Moving this to common place will avoid code repetition. Signed-off-by: Ye Xiang Acked-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210201054921.18214-2-xiang.ye@intel.com Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-hub.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 763802b2b8f9..637ec53a98a1 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -248,7 +248,9 @@ static inline int hid_sensor_convert_exponent(int unit_expo) int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev, u32 usage_id, - struct hid_sensor_common *st); + struct hid_sensor_common *st, + const u32 *sensitivity_addresses, + u32 sensitivity_addresses_len); int hid_sensor_write_raw_hyst_value(struct hid_sensor_common *st, int val1, int val2); int hid_sensor_read_raw_hyst_value(struct hid_sensor_common *st, -- cgit v1.2.3 From 1c71a2863a0c56123e5a67880cf658083c0a0b1e Mon Sep 17 00:00:00 2001 From: Ye Xiang Date: Sun, 7 Feb 2021 15:00:46 +0800 Subject: iio: Add relative sensitivity support Some hid sensors may use relative sensitivity such as als sensor. This patch adds relative sensitivity checking for all hid sensors. Signed-off-by: Ye Xiang Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20210207070048.23935-2-xiang.ye@intel.com Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-hub.h | 5 +++++ include/linux/hid-sensor-ids.h | 1 + include/linux/iio/types.h | 1 + 3 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 637ec53a98a1..c27329e2a5ad 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -231,6 +231,7 @@ struct hid_sensor_common { struct hid_sensor_hub_attribute_info report_state; struct hid_sensor_hub_attribute_info power_state; struct hid_sensor_hub_attribute_info sensitivity; + struct hid_sensor_hub_attribute_info sensitivity_rel; struct hid_sensor_hub_attribute_info report_latency; struct work_struct work; }; @@ -253,8 +254,12 @@ int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev, u32 sensitivity_addresses_len); int hid_sensor_write_raw_hyst_value(struct hid_sensor_common *st, int val1, int val2); +int hid_sensor_write_raw_hyst_rel_value(struct hid_sensor_common *st, int val1, + int val2); int hid_sensor_read_raw_hyst_value(struct hid_sensor_common *st, int *val1, int *val2); +int hid_sensor_read_raw_hyst_rel_value(struct hid_sensor_common *st, + int *val1, int *val2); int hid_sensor_write_samp_freq_value(struct hid_sensor_common *st, int val1, int val2); int hid_sensor_read_samp_freq_value(struct hid_sensor_common *st, diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 3bbdbccc5805..ac631159403a 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -149,6 +149,7 @@ /* Per data field properties */ #define HID_USAGE_SENSOR_DATA_MOD_NONE 0x00 #define HID_USAGE_SENSOR_DATA_MOD_CHANGE_SENSITIVITY_ABS 0x1000 +#define HID_USAGE_SENSOR_DATA_MOD_CHANGE_SENSITIVITY_REL_PCT 0xE000 /* Power state enumerations */ #define HID_USAGE_SENSOR_PROP_POWER_STATE_UNDEFINED_ENUM 0x200850 diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index 1e3ed6f55bca..5aa7f66d4345 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -50,6 +50,7 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_PHASE, IIO_CHAN_INFO_HARDWAREGAIN, IIO_CHAN_INFO_HYSTERESIS, + IIO_CHAN_INFO_HYSTERESIS_RELATIVE, IIO_CHAN_INFO_INT_TIME, IIO_CHAN_INFO_ENABLE, IIO_CHAN_INFO_CALIBHEIGHT, -- cgit v1.2.3 From aa29cf932fb345e111cd7fef04320846fa73e372 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Fri, 19 Feb 2021 11:01:34 +0200 Subject: iio: adc: adi-axi-adc: fix typo in doc-string The channels are of type iio_chan_spec, not axi_adc_chan_spec. They were in some earlier version, but forgot to rename in the doc-string. Fixes: ef04070692a21 ("iio: adc: adi-axi-adc: add support for AXI ADC IP core") Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20210219090134.48057-1-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/adc/adi-axi-adc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/adc/adi-axi-adc.h b/include/linux/iio/adc/adi-axi-adc.h index c5d48e1c2d36..52620e5b8052 100644 --- a/include/linux/iio/adc/adi-axi-adc.h +++ b/include/linux/iio/adc/adi-axi-adc.h @@ -15,7 +15,7 @@ struct iio_chan_spec; * struct adi_axi_adc_chip_info - Chip specific information * @name Chip name * @id Chip ID (usually product ID) - * @channels Channel specifications of type @struct axi_adc_chan_spec + * @channels Channel specifications of type @struct iio_chan_spec * @num_channels Number of @channels * @scale_table Supported scales by the chip; tuples of 2 ints * @num_scales Number of scales in the table -- cgit v1.2.3 From 15aacc980dcb326ef33dfc32772faea1067f9178 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Thu, 18 Feb 2021 12:40:39 +0100 Subject: iio: adis: add helpers for locking Add some helpers to lock and unlock the device. As this is such a simple change, we update all the users that were using the lock already in this patch. Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20210218114039.216091-5-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 04e96d688ba9..f9b728d490b1 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -428,6 +428,16 @@ static inline int adis_initial_startup(struct adis *adis) return ret; } +static inline void adis_dev_lock(struct adis *adis) +{ + mutex_lock(&adis->state_lock); +} + +static inline void adis_dev_unlock(struct adis *adis) +{ + mutex_unlock(&adis->state_lock); +} + int adis_single_conversion(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, unsigned int error_mask, int *val); -- cgit v1.2.3 From 7852546f524595245382a919e752468f73421451 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 4 Mar 2021 14:45:15 +0200 Subject: RDMA/mlx5: Fix query RoCE port mlx5_is_roce_enabled returns the devlink RoCE init value, therefore it should be used only when driver is loaded. Instead we just need to read the roce_en field. In addition, rename mlx5_is_roce_enabled to mlx5_is_roce_init_enabled. Fixes: 7a58779edd75 ("IB/mlx5: Improve query port for representor port") Link: https://lore.kernel.org/r/20210304124517.1100608-2-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 53b89631a1d9..ab07f09f2bad 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1226,7 +1226,7 @@ enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; -static inline bool mlx5_is_roce_enabled(struct mlx5_core_dev *dev) +static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev) { struct devlink *devlink = priv_to_devlink(dev); union devlink_param_value val; -- cgit v1.2.3 From f4dae54e486d528d4dd98df116e7a522bbf12667 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Mar 2021 12:35:04 -0800 Subject: tcp: plug skb_still_in_host_queue() to TSQ Jakub and Neil reported an increase of RTO timers whenever TX completions are delayed a bit more (by increasing NIC TX coalescing parameters) Main issue is that TCP stack has a logic preventing a packet being retransmit if the prior clone has not yet been orphaned or freed. This logic came with commit 1f3279ae0c13 ("tcp: avoid retransmits of TCP packets hanging in host queues") Thankfully, in the case skb_still_in_host_queue() detects the initial clone is still in flight, it can use TSQ logic that will eventually retry later, at the moment the clone is freed or orphaned. Signed-off-by: Eric Dumazet Reported-by: Neil Spring Reported-by: Jakub Kicinski Cc: Neal Cardwell Cc: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0503c917d773..483e89348f78 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1140,7 +1140,7 @@ static inline bool skb_fclone_busy(const struct sock *sk, return skb->fclone == SKB_FCLONE_ORIG && refcount_read(&fclones->fclone_ref) > 1 && - fclones->skb2.sk == sk; + READ_ONCE(fclones->skb2.sk) == sk; } /** -- cgit v1.2.3 From ccf953d8f3d68e85e577e843fdcde8872b0a9769 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 10 Mar 2021 18:55:30 +0000 Subject: fb_defio: Remove custom address_space_operations There's no need to give the page an address_space. Leaving the page->mapping as NULL will cause the VM to handle set_page_dirty() the same way that it's handled now, and that was the only reason to set the address_space in the first place. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210310185530.1053320-1-willy@infradead.org --- include/linux/fb.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index ecfbcc0553a5..a8dccd23c249 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -659,9 +659,6 @@ static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, /* drivers/video/fb_defio.c */ int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma); extern void fb_deferred_io_init(struct fb_info *info); -extern void fb_deferred_io_open(struct fb_info *info, - struct inode *inode, - struct file *file); extern void fb_deferred_io_cleanup(struct fb_info *info); extern int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync); -- cgit v1.2.3 From 3ab1cce553378fc0df1b1d26d7e23d03bd4dd3b6 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 8 Mar 2021 16:55:01 +0200 Subject: spi: core: remove 'delay_usecs' field from spi_transfer The 'delay' field in the spi_transfer struct is meant to replace the 'delay_usecs' field. However some cleanup was required to remove the uses of 'delay_usecs'. Now that it's been cleaned up, we can remove it from the kernel tree. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20210308145502.1075689-10-aardelean@deviqon.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 592897fa4f03..ea1784a43267 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -832,9 +832,6 @@ extern void spi_res_release(struct spi_controller *ctlr, * @delay: delay to be introduced after this transfer before * (optionally) changing the chipselect status, then starting * the next transfer or completing this @spi_message. - * @delay_usecs: microseconds to delay after this transfer before - * (optionally) changing the chipselect status, then starting - * the next transfer or completing this @spi_message. * @word_delay: inter word delay to be introduced after each word size * (set by bits_per_word) transmission. * @effective_speed_hz: the effective SCK-speed that was used to @@ -946,7 +943,6 @@ struct spi_transfer { #define SPI_NBITS_DUAL 0x02 /* 2bits transfer */ #define SPI_NBITS_QUAD 0x04 /* 4bits transfer */ u8 bits_per_word; - u16 delay_usecs; struct spi_delay delay; struct spi_delay cs_change_delay; struct spi_delay word_delay; @@ -1060,14 +1056,6 @@ spi_transfer_del(struct spi_transfer *t) static inline int spi_transfer_delay_exec(struct spi_transfer *t) { - struct spi_delay d; - - if (t->delay_usecs) { - d.value = t->delay_usecs; - d.unit = SPI_DELAY_UNIT_USECS; - return spi_delay_exec(&d, NULL); - } - return spi_delay_exec(&t->delay, t); } -- cgit v1.2.3 From 59079438a664559bb1f6f5fe85e306962ef9286e Mon Sep 17 00:00:00 2001 From: Mikhael Goikhman Date: Wed, 10 Mar 2021 23:09:09 -0800 Subject: net/mlx5: Remove unused mlx5_core_health member recover_work The code related to health->recover_work was removed in commit 63cbc552eebf ("net/mlx5: Handle SW reset of FW in error flow") Fix struct mlx5_core_health accordingly. Signed-off-by: Mikhael Goikhman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 53b89631a1d9..8fe51b4a781e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -438,7 +438,6 @@ struct mlx5_core_health { unsigned long flags; struct work_struct fatal_report_work; struct work_struct report_work; - struct delayed_work recover_work; struct devlink_health_reporter *fw_reporter; struct devlink_health_reporter *fw_fatal_reporter; }; -- cgit v1.2.3 From 59c904c8fffd903c1dae5fc6a402b88fa6dfc874 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 10 Mar 2021 23:09:11 -0800 Subject: net/mlx5: E-Switch, Add eswitch pointer to each representor Store the managing E-Switch of each representor. This will be used when a representor is created on eswitch manager 0 but the vport belongs to eswitch manager 1. Signed-off-by: Mark Bloch Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 994c2c8cb4fd..72d480df2a03 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -48,6 +48,7 @@ struct mlx5_eswitch_rep { /* Only IB rep is using vport_index */ u16 vport_index; u32 vlan_refcount; + struct mlx5_eswitch *esw; }; void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, -- cgit v1.2.3 From 3a46f4fb55ffd46e475e3fc53b1252f722cf647e Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 10 Mar 2021 23:09:13 -0800 Subject: net/mlx5: E-Switch, Refactor send to vport to be more generic Now that each representor stores a pointer to the managing E-Switch use that information when creating the send-to-vport rules. Signed-off-by: Mark Bloch Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 72d480df2a03..2ec0527991c8 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -62,8 +62,8 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, u16 vport_num); void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type); struct mlx5_flow_handle * -mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, - u16 vport_num, u32 sqn); +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, + struct mlx5_eswitch_rep *rep, u32 sqn); u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); -- cgit v1.2.3 From c3e666f1ada9cbfbe5465f122f9a2d63ddfd25ed Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 10 Mar 2021 23:09:14 -0800 Subject: net/mlx5: Add IFC bits needed for single FDB mode Currently we operate in a mode where each eswitch manager has a separate FDB. In order to combine these multiple FDBs we expose new caps to allow this: - Set root flow table which isn't native. - Set FDB a different selection mode when in LAG mode. Signed-off-by: Mark Bloch Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index df5d91c8b2d4..3ee7a86f39e4 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -806,9 +806,11 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x3]; + u8 reserved_at_5[0x2]; + u8 esw_shared_ingress_acl[0x1]; u8 esw_uplink_ingress_acl[0x1]; - u8 reserved_at_9[0x10]; + u8 root_ft_on_other_esw[0x1]; + u8 reserved_at_a[0xf]; u8 esw_functions_changed[0x1]; u8 reserved_at_1a[0x1]; u8 ecpf_vport_exists[0x1]; @@ -1502,7 +1504,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_270[0x6]; u8 lag_dct[0x2]; u8 lag_tx_port_affinity[0x1]; - u8 reserved_at_279[0x2]; + u8 lag_native_fdb_selection[0x1]; + u8 reserved_at_27a[0x1]; u8 lag_master[0x1]; u8 num_lag_ports[0x4]; @@ -10036,14 +10039,19 @@ struct mlx5_ifc_set_flow_table_root_in_bits { u8 reserved_at_60[0x20]; u8 table_type[0x8]; - u8 reserved_at_88[0x18]; + u8 reserved_at_88[0x7]; + u8 table_of_other_vport[0x1]; + u8 table_vport_number[0x10]; u8 reserved_at_a0[0x8]; u8 table_id[0x18]; u8 reserved_at_c0[0x8]; u8 underlay_qpn[0x18]; - u8 reserved_at_e0[0x120]; + u8 table_eswitch_owner_vhca_id_valid[0x1]; + u8 reserved_at_e1[0xf]; + u8 table_eswitch_owner_vhca_id[0x10]; + u8 reserved_at_100[0x100]; }; enum { @@ -10273,7 +10281,8 @@ struct mlx5_ifc_dcbx_param_bits { }; struct mlx5_ifc_lagc_bits { - u8 reserved_at_0[0x1d]; + u8 fdb_selection_mode[0x1]; + u8 reserved_at_1[0x1c]; u8 lag_state[0x3]; u8 reserved_at_20[0x14]; -- cgit v1.2.3 From 26bf30902c10473ba38f220d3401a61c56d8db3b Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 10 Mar 2021 23:09:15 -0800 Subject: net/mlx5: Use order-0 allocations for EQs Currently we are allocating high-order page for EQs. In case of fragmented system, VF hot remove/add in VMs for example, there isn't enough contiguous memory for EQs allocation, which results in crashing of the VM. Therefore, use order-0 fragments for the EQ allocations instead. Performance tests: ConnectX-5 100Gbps, CPU: Intel(R) Xeon(R) CPU E5-2697 v3 @ 2.60GHz Performance tests show no sensible degradation. Signed-off-by: Tariq Toukan Signed-off-by: Shay Drory Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8fe51b4a781e..5c0422930b01 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -873,6 +873,11 @@ static inline u32 mlx5_base_mkey(const u32 key) return key & 0xffffff00u; } +static inline u32 wq_get_byte_sz(u8 log_sz, u8 log_stride) +{ + return ((u32)1 << log_sz) << log_stride; +} + static inline void mlx5_init_fbc_offset(struct mlx5_buf_list *frags, u8 log_stride, u8 log_sz, u16 strides_offset, -- cgit v1.2.3 From 4260c4067fbba55a90037fe3ee32eff087749f83 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 24 Feb 2021 13:03:13 -0600 Subject: f2fs: Replace one-element array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. Refactor the code according to the use of a flexible-array member in struct f2fs_checkpoint, instead of a one-element arrays. Notice that a temporary pointer to void '*tmp_ptr' was used in order to fix the following errors when using a flexible array instead of a one element array in struct f2fs_checkpoint: CC [M] fs/f2fs/dir.o In file included from fs/f2fs/dir.c:13: fs/f2fs/f2fs.h: In function ‘__bitmap_ptr’: fs/f2fs/f2fs.h:2227:40: error: invalid use of flexible array member 2227 | return &ckpt->sit_nat_version_bitmap + offset + sizeof(__le32); | ^ fs/f2fs/f2fs.h:2227:49: error: invalid use of flexible array member 2227 | return &ckpt->sit_nat_version_bitmap + offset + sizeof(__le32); | ^ fs/f2fs/f2fs.h:2238:40: error: invalid use of flexible array member 2238 | return &ckpt->sit_nat_version_bitmap + offset; | ^ make[2]: *** [scripts/Makefile.build:287: fs/f2fs/dir.o] Error 1 make[1]: *** [scripts/Makefile.build:530: fs/f2fs] Error 2 make: *** [Makefile:1819: fs] Error 2 [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/79 Build-tested-by: kernel test robot Link: https://lore.kernel.org/lkml/603647e4.DeEFbl4eqljuwAUe%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index c6cc0a566ef5..5487a80617a3 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -168,7 +168,7 @@ struct f2fs_checkpoint { unsigned char alloc_type[MAX_ACTIVE_LOGS]; /* SIT and NAT version bitmap */ - unsigned char sit_nat_version_bitmap[1]; + unsigned char sit_nat_version_bitmap[]; } __packed; #define CP_CHKSUM_OFFSET 4092 /* default chksum offset in checkpoint */ -- cgit v1.2.3 From 1bb73841ea7a88765db7f641a90120490f1f4aee Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 2 Mar 2021 07:21:33 +0100 Subject: PCI: Remove MicroGate SyncLink device IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The drivers were removed in a1f714b44e34 (tty: Remove redundant synclink driver) and 3d608a591b2b (tty: Remove redundant synclinkmp driver). Remove the PCI device ID entries as well. Link: https://lore.kernel.org/r/20210302062214.29627-3-jslaby@suse.cz Signed-off-by: Jiri Slaby Signed-off-by: Bjorn Helgaas Reviewed-by: Krzysztof Wilczyński --- include/linux/pci_ids.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a76ccb697bef..8a18517696c1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2065,8 +2065,6 @@ #define PCI_DEVICE_ID_EXAR_XR17V358 0x0358 #define PCI_VENDOR_ID_MICROGATE 0x13c0 -#define PCI_DEVICE_ID_MICROGATE_USC 0x0010 -#define PCI_DEVICE_ID_MICROGATE_SCA 0x0030 #define PCI_VENDOR_ID_3WARE 0x13C1 #define PCI_DEVICE_ID_3WARE_1000 0x1000 -- cgit v1.2.3 From a3222a2da0a2d6c7682252d4bfdff05721a82b95 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Sun, 24 Jan 2021 15:56:36 +0200 Subject: net/mlx5e: Allow to match on ICMP parameters Support matching on ICMPv4/6 type and code parameters using misc3 section of match parameters. Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index dc3d2508f5c6..92a029a800a0 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1142,6 +1142,8 @@ enum mlx5_flex_parser_protos { MLX5_FLEX_PROTO_GENEVE = 1 << 3, MLX5_FLEX_PROTO_CW_MPLS_GRE = 1 << 4, MLX5_FLEX_PROTO_CW_MPLS_UDP = 1 << 5, + MLX5_FLEX_PROTO_ICMP = 1 << 8, + MLX5_FLEX_PROTO_ICMPV6 = 1 << 9, }; /* MLX5 DEV CAPs */ -- cgit v1.2.3 From f90fc37f289cd0886ef3a12b2ea33b93b8d9d360 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 12 Mar 2021 11:09:08 +0000 Subject: ptp_pch: Move 'pch_*()' prototypes to shared header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/ptp/ptp_pch.c:193:6: warning: no previous prototype for ‘pch_ch_control_write’ [-Wmissing-prototypes] drivers/ptp/ptp_pch.c:201:5: warning: no previous prototype for ‘pch_ch_event_read’ [-Wmissing-prototypes] drivers/ptp/ptp_pch.c:212:6: warning: no previous prototype for ‘pch_ch_event_write’ [-Wmissing-prototypes] drivers/ptp/ptp_pch.c:220:5: warning: no previous prototype for ‘pch_src_uuid_lo_read’ [-Wmissing-prototypes] drivers/ptp/ptp_pch.c:231:5: warning: no previous prototype for ‘pch_src_uuid_hi_read’ [-Wmissing-prototypes] drivers/ptp/ptp_pch.c:242:5: warning: no previous prototype for ‘pch_rx_snap_read’ [-Wmissing-prototypes] drivers/ptp/ptp_pch.c:259:5: warning: no previous prototype for ‘pch_tx_snap_read’ [-Wmissing-prototypes] drivers/ptp/ptp_pch.c:300:5: warning: no previous prototype for ‘pch_set_station_address’ [-Wmissing-prototypes] Cc: Richard Cochran (maintainer:PTP HARDWARE CLOCK SUPPORT) Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Flavio Suligoi Cc: netdev@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: David S. Miller --- include/linux/ptp_pch.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/linux/ptp_pch.h (limited to 'include/linux') diff --git a/include/linux/ptp_pch.h b/include/linux/ptp_pch.h new file mode 100644 index 000000000000..51818198c292 --- /dev/null +++ b/include/linux/ptp_pch.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * PTP PCH + * + * Copyright 2019 Linaro Ltd. + * + * Author Lee Jones + */ + +#ifndef _PTP_PCH_H_ +#define _PTP_PCH_H_ + +void pch_ch_control_write(struct pci_dev *pdev, u32 val); +u32 pch_ch_event_read(struct pci_dev *pdev); +void pch_ch_event_write(struct pci_dev *pdev, u32 val); +u32 pch_src_uuid_lo_read(struct pci_dev *pdev); +u32 pch_src_uuid_hi_read(struct pci_dev *pdev); +u64 pch_rx_snap_read(struct pci_dev *pdev); +u64 pch_tx_snap_read(struct pci_dev *pdev); +int pch_set_station_address(u8 *addr, struct pci_dev *pdev); + +#endif /* _PTP_PCH_H_ */ -- cgit v1.2.3 From 34dc2efb39a231280fd6696a59bbe712bf3c5c4a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Mar 2021 21:07:01 -0800 Subject: memblock: fix section mismatch warning The inlining logic in clang-13 is rewritten to often not inline some functions that were inlined by all earlier compilers. In case of the memblock interfaces, this exposed a harmless bug of a missing __init annotation: WARNING: modpost: vmlinux.o(.text+0x507c0a): Section mismatch in reference from the function memblock_bottom_up() to the variable .meminit.data:memblock The function memblock_bottom_up() references the variable __meminitdata memblock. This is often because memblock_bottom_up lacks a __meminitdata annotation or the annotation of memblock is wrong. Interestingly, these annotations were present originally, but got removed with the explanation that the __init annotation prevents the function from getting inlined. I checked this again and found that while this is the case with clang, gcc (version 7 through 10, did not test others) does inline the functions regardless. As the previous change was apparently intended to help the clang builds, reverting it to help the newer clang versions seems appropriate as well. gcc builds don't seem to care either way. Link: https://lkml.kernel.org/r/20210225133808.2188581-1-arnd@kernel.org Fixes: 5bdba520c1b3 ("mm: memblock: drop __init from memblock functions to make it inline") Reference: 2cfb3665e864 ("include/linux/memblock.h: add __init to memblock_set_bottom_up()") Signed-off-by: Arnd Bergmann Reviewed-by: David Hildenbrand Reviewed-by: Mike Rapoport Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Faiyaz Mohammed Cc: Baoquan He Cc: Thomas Bogendoerfer Cc: Aslan Bakirov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c88bc24e31aa..d13e3cd938b4 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -460,7 +460,7 @@ static inline void memblock_free_late(phys_addr_t base, phys_addr_t size) /* * Set the allocation direction to bottom-up or top-down. */ -static inline void memblock_set_bottom_up(bool enable) +static inline __init void memblock_set_bottom_up(bool enable) { memblock.bottom_up = enable; } @@ -470,7 +470,7 @@ static inline void memblock_set_bottom_up(bool enable) * if this is true, that said, memblock will allocate memory * in bottom-up direction. */ -static inline bool memblock_bottom_up(void) +static inline __init bool memblock_bottom_up(void) { return memblock.bottom_up; } -- cgit v1.2.3 From cbf78d85079cee662c45749ef4f744d41be85d48 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Mar 2021 21:07:04 -0800 Subject: stop_machine: mark helpers __always_inline With clang-13, some functions only get partially inlined, with a specialized version referring to a global variable. This triggers a harmless build-time check for the intel-rng driver: WARNING: modpost: drivers/char/hw_random/intel-rng.o(.text+0xe): Section mismatch in reference from the function stop_machine() to the function .init.text:intel_rng_hw_init() The function stop_machine() references the function __init intel_rng_hw_init(). This is often because stop_machine lacks a __init annotation or the annotation of intel_rng_hw_init is wrong. In this instance, an easy workaround is to force the stop_machine() function to be inline, along with related interfaces that did not show the same behavior at the moment, but theoretically could. The combination of the two patches listed below triggers the behavior in clang-13, but individually these commits are correct. Link: https://lkml.kernel.org/r/20210225130153.1956990-1-arnd@kernel.org Fixes: fe5595c07400 ("stop_machine: Provide stop_machine_cpuslocked()") Fixes: ee527cd3a20c ("Use stop_machine_run in the Intel RNG driver") Signed-off-by: Arnd Bergmann Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Thomas Gleixner Cc: Sebastian Andrzej Siewior Cc: "Paul E. McKenney" Cc: Ingo Molnar Cc: Prarit Bhargava Cc: Daniel Bristot de Oliveira Cc: Peter Zijlstra Cc: Valentin Schneider Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/stop_machine.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 30577c3aecf8..46fb3ebdd16e 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -128,7 +128,7 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); #else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */ -static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, +static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { unsigned long flags; @@ -139,14 +139,15 @@ static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, return ret; } -static inline int stop_machine(cpu_stop_fn_t fn, void *data, - const struct cpumask *cpus) +static __always_inline int +stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { return stop_machine_cpuslocked(fn, data, cpus); } -static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, - const struct cpumask *cpus) +static __always_inline int +stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, + const struct cpumask *cpus) { return stop_machine(fn, data, cpus); } -- cgit v1.2.3 From 82e69a121be4b1597ce758534816a8ee04c8b761 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 12 Mar 2021 21:07:15 -0800 Subject: mm/fork: clear PASID for new mm When a new mm is created, its PASID should be cleared, i.e. the PASID is initialized to its init state 0 on both ARM and X86. This patch was part of the series introducing mm->pasid, but got lost along the way [1]. It still makes sense to have it, because each address space has a different PASID. And the IOMMU code in iommu_sva_alloc_pasid() expects the pasid field of a new mm struct to be cleared. [1] https://lore.kernel.org/linux-iommu/YDgh53AcQHT+T3L0@otcwcpicx3.sc.intel.com/ Link: https://lkml.kernel.org/r/20210302103837.2562625-1-jean-philippe@linaro.org Signed-off-by: Fenghua Yu Signed-off-by: Jean-Philippe Brucker Reviewed-by: Tony Luck Cc: Jacob Pan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0974ad501a47..6613b26a8894 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -23,6 +23,7 @@ #endif #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) +#define INIT_PASID 0 struct address_space; struct mem_cgroup; -- cgit v1.2.3 From 97a7e4733b9b221d012ae68fcd3b3251febf6341 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 12 Mar 2021 21:07:26 -0800 Subject: mm: introduce page_needs_cow_for_dma() for deciding whether cow We've got quite a few places (pte, pmd, pud) that explicitly checked against whether we should break the cow right now during fork(). It's easier to provide a helper, especially before we work the same thing on hugetlbfs. Since we'll reference is_cow_mapping() in mm.h, move it there too. Actually it suites mm.h more since internal.h is mm/ only, but mm.h is exported to the whole kernel. With that we should expect another patch to use is_cow_mapping() whenever we can across the kernel since we do use it quite a lot but it's always done with raw code against VM_* flags. Link: https://lkml.kernel.org/r/20210217233547.93892-4-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Christoph Hellwig Cc: Daniel Vetter Cc: David Airlie Cc: David Gibson Cc: Gal Pressman Cc: Jan Kara Cc: Jann Horn Cc: Kirill Shutemov Cc: Kirill Tkhai Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mike Kravetz Cc: Mike Rapoport Cc: Roland Scheidegger Cc: VMware Graphics Cc: Wei Zhang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 77e64e3eac80..64a71bf20536 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1300,6 +1300,27 @@ static inline bool page_maybe_dma_pinned(struct page *page) GUP_PIN_COUNTING_BIAS; } +static inline bool is_cow_mapping(vm_flags_t flags) +{ + return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; +} + +/* + * This should most likely only be called during fork() to see whether we + * should break the cow immediately for a page on the src mm. + */ +static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, + struct page *page) +{ + if (!is_cow_mapping(vma->vm_flags)) + return false; + + if (!atomic_read(&vma->vm_mm->has_pinned)) + return false; + + return page_maybe_dma_pinned(page); +} + #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTION_IN_PAGE_FLAGS #endif -- cgit v1.2.3 From 97e4910232fa1f81e806aa60c25a0450276d99a2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Mar 2021 21:07:47 -0800 Subject: linux/compiler-clang.h: define HAVE_BUILTIN_BSWAP* Separating compiler-clang.h from compiler-gcc.h inadventently dropped the definitions of the three HAVE_BUILTIN_BSWAP macros, which requires falling back to the open-coded version and hoping that the compiler detects it. Since all versions of clang support the __builtin_bswap interfaces, add back the flags and have the headers pick these up automatically. This results in a 4% improvement of compilation speed for arm defconfig. Note: it might also be worth revisiting which architectures set CONFIG_ARCH_USE_BUILTIN_BSWAP for one compiler or the other, today this is set on six architectures (arm32, csky, mips, powerpc, s390, x86), while another ten architectures define custom helpers (alpha, arc, ia64, m68k, mips, nios2, parisc, sh, sparc, xtensa), and the rest (arm64, h8300, hexagon, microblaze, nds32, openrisc, riscv) just get the unoptimized version and rely on the compiler to detect it. A long time ago, the compiler builtins were architecture specific, but nowadays, all compilers that are able to build the kernel have correct implementations of them, though some may not be as optimized as the inline asm versions. The patch that dropped the optimization landed in v4.19, so as discussed it would be fairly safe to backport this revert to stable kernels to the 4.19/5.4/5.10 stable kernels, but there is a remaining risk for regressions, and it has no known side-effects besides compile speed. Link: https://lkml.kernel.org/r/20210226161151.2629097-1-arnd@kernel.org Link: https://lore.kernel.org/lkml/20210225164513.3667778-1-arnd@kernel.org/ Fixes: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Reviewed-by: Kees Cook Acked-by: Miguel Ojeda Acked-by: Nick Desaulniers Acked-by: Luc Van Oostenryck Cc: Masahiro Yamada Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Guo Ren Cc: Randy Dunlap Cc: Sami Tolvanen Cc: Marco Elver Cc: Arvind Sankar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 04c0a5a717f7..d217c382b02d 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -31,6 +31,12 @@ #define __no_sanitize_thread #endif +#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) +#define __HAVE_BUILTIN_BSWAP32__ +#define __HAVE_BUILTIN_BSWAP64__ +#define __HAVE_BUILTIN_BSWAP16__ +#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ + #if __has_feature(undefined_behavior_sanitizer) /* GCC does not have __SANITIZE_UNDEFINED__ */ #define __no_sanitize_undefined \ -- cgit v1.2.3 From 149fc787353f65b7e72e05e7b75d34863266c3e2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 12 Mar 2021 21:08:03 -0800 Subject: include/linux/sched/mm.h: use rcu_dereference in in_vfork() Fix a sparse warning by using rcu_dereference(). Technically this is a bug and a sufficiently aggressive compiler could reload the `real_parent' pointer outside the protection of the rcu lock (and access freed memory), but I think it's pretty unlikely to happen. Link: https://lkml.kernel.org/r/20210221194207.1351703-1-willy@infradead.org Fixes: b18dc5f291c0 ("mm, oom: skip vforked tasks from being selected") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Miaohe Lin Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 1ae08b8462a4..90b2a0bce11c 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -140,7 +140,8 @@ static inline bool in_vfork(struct task_struct *tsk) * another oom-unkillable task does this it should blame itself. */ rcu_read_lock(); - ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm; + ret = tsk->vfork_done && + rcu_dereference(tsk->real_parent)->mm == tsk->mm; rcu_read_unlock(); return ret; -- cgit v1.2.3 From be6c8982e4ab9a41907555f601b711a7e2a17d4c Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Fri, 12 Mar 2021 21:08:30 -0800 Subject: mm/memcg: rename mem_cgroup_split_huge_fixup to split_page_memcg and add nr_pages argument Rename mem_cgroup_split_huge_fixup to split_page_memcg and explicitly pass in page number argument. In this way, the interface name is more common and can be used by potential users. In addition, the complete info(memcg and flag) of the memcg needs to be set to the tail pages. Link: https://lkml.kernel.org/r/20210304074053.65527-2-zhouguanghui1@huawei.com Signed-off-by: Zhou Guanghui Acked-by: Johannes Weiner Reviewed-by: Zi Yan Reviewed-by: Shakeel Butt Acked-by: Michal Hocko Cc: Hugh Dickins Cc: "Kirill A. Shutemov" Cc: Nicholas Piggin Cc: Kefeng Wang Cc: Hanjun Guo Cc: Tianhong Ding Cc: Weilong Chen Cc: Rui Xiang Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e6dc793d587d..0c04d39a7967 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1061,9 +1061,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, rcu_read_unlock(); } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -void mem_cgroup_split_huge_fixup(struct page *head); -#endif +void split_page_memcg(struct page *head, unsigned int nr); #else /* CONFIG_MEMCG */ @@ -1400,7 +1398,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, return 0; } -static inline void mem_cgroup_split_huge_fixup(struct page *head) +static inline void split_page_memcg(struct page *head, unsigned int nr) { } -- cgit v1.2.3 From bc22ed2ea1121f9d9ba3f85c524cb857d54a2d00 Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Sat, 20 Feb 2021 11:28:18 -0500 Subject: virtio: remove export for virtio_config_{enable, disable} virtio_config_enable(), virtio_config_disable() are only used inside drivers/virtio/virtio.c, so it doesn't need export the symbols. Signed-off-by: Xianting Tian Link: https://lore.kernel.org/r/1613838498-8791-1-git-send-email-xianting_tian@126.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella --- include/linux/virtio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 55ea329fe72a..b1894e0323fa 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -132,8 +132,6 @@ bool is_virtio_device(struct device *dev); void virtio_break_device(struct virtio_device *dev); void virtio_config_changed(struct virtio_device *dev); -void virtio_config_disable(struct virtio_device *dev); -void virtio_config_enable(struct virtio_device *dev); int virtio_finalize_features(struct virtio_device *dev); #ifdef CONFIG_PM_SLEEP int virtio_device_freeze(struct virtio_device *dev); -- cgit v1.2.3 From e3305138da47f0ae2241e5daa18af276e1e54457 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sun, 14 Mar 2021 11:11:14 +0000 Subject: skbuff: make __skb_header_pointer()'s data argument const The function never modifies the input buffer, so 'data' argument can be marked as const. This implies one harmless cast-away. Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 483e89348f78..d6ea3dc3eddb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3678,11 +3678,11 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); static inline void * __must_check -__skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *data, int hlen, void *buffer) +__skb_header_pointer(const struct sk_buff *skb, int offset, int len, + const void *data, int hlen, void *buffer) { if (hlen - offset >= len) - return data + offset; + return (void *)data + offset; if (!skb || skb_copy_bits(skb, offset, buffer, len) < 0) -- cgit v1.2.3 From f96533cded173b3b019001a505a746c3cd8fc323 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sun, 14 Mar 2021 11:11:23 +0000 Subject: flow_dissector: constify raw input data argument Flow Dissector code never modifies the input buffer, neither skb nor raw data. Make 'data' argument const for all of the Flow dissector's functions. Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- include/linux/skbuff.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d6ea3dc3eddb..46c61e127e9f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1292,10 +1292,10 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) void __skb_get_hash(struct sk_buff *skb); u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); -u32 __skb_get_poff(const struct sk_buff *skb, void *data, +u32 __skb_get_poff(const struct sk_buff *skb, const void *data, const struct flow_keys_basic *keys, int hlen); __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, - void *data, int hlen_proto); + const void *data, int hlen_proto); static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) @@ -1314,9 +1314,8 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, bool __skb_flow_dissect(const struct net *net, const struct sk_buff *skb, struct flow_dissector *flow_dissector, - void *target_container, - void *data, __be16 proto, int nhoff, int hlen, - unsigned int flags); + void *target_container, const void *data, + __be16 proto, int nhoff, int hlen, unsigned int flags); static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, @@ -1338,9 +1337,9 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, static inline bool skb_flow_dissect_flow_keys_basic(const struct net *net, const struct sk_buff *skb, - struct flow_keys_basic *flow, void *data, - __be16 proto, int nhoff, int hlen, - unsigned int flags) + struct flow_keys_basic *flow, + const void *data, __be16 proto, + int nhoff, int hlen, unsigned int flags) { memset(flow, 0, sizeof(*flow)); return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow, -- cgit v1.2.3 From 805a25f3a1bdf4aafd0af412ce1e47d0cb6c7628 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sun, 14 Mar 2021 11:11:32 +0000 Subject: linux/etherdevice.h: misc trailing whitespace cleanup Caught by the text editor. Fix it separately from the actual changes. Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 2e5debc0373c..bcb2f81baafb 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -11,7 +11,7 @@ * Authors: Ross Biro * Fred N. van Kempen, * - * Relocated to include/linux where it belongs by Alan Cox + * Relocated to include/linux where it belongs by Alan Cox * */ #ifndef _LINUX_ETHERDEVICE_H -- cgit v1.2.3 From 59753ce8b196de60211a989c75ece8aeb0d9d57c Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sun, 14 Mar 2021 11:11:41 +0000 Subject: ethernet: constify eth_get_headlen()'s data argument It's used only for flow dissection, which now takes constant data pointers. Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index bcb2f81baafb..330345b1be54 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -29,7 +29,7 @@ struct device; int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr); unsigned char *arch_get_platform_mac_address(void); int nvmem_get_mac_address(struct device *dev, void *addrbuf); -u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len); +u32 eth_get_headlen(const struct net_device *dev, const void *data, u32 len); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern const struct header_ops eth_header_ops; -- cgit v1.2.3 From d206121faf8bb2239cd970af0bd32f5203780427 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sun, 14 Mar 2021 11:11:50 +0000 Subject: skbuff: micro-optimize {,__}skb_header_pointer() {,__}skb_header_pointer() helpers exist mainly for preventing accesses-beyond-end of the linear data. In the vast majorify of cases, they bail out on the first condition. All code going after is mostly a fallback. Mark the most common branch as 'likely' one to move it in-line. Also, skb_copy_bits() can return negative values only when the input arguments are invalid, e.g. offset is greater than skb->len. It can be safely marked as 'unlikely' branch, assuming that hotpath code provides sane input to not fail here. These two bump the throughput with a single Flow Dissector pass on every packet (e.g. with RPS or driver that uses eth_get_headlen()) on 20 Mbps per flow/core. Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 46c61e127e9f..ecc029674ae4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3680,11 +3680,10 @@ static inline void * __must_check __skb_header_pointer(const struct sk_buff *skb, int offset, int len, const void *data, int hlen, void *buffer) { - if (hlen - offset >= len) + if (likely(hlen - offset >= len)) return (void *)data + offset; - if (!skb || - skb_copy_bits(skb, offset, buffer, len) < 0) + if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0)) return NULL; return buffer; -- cgit v1.2.3 From c4d57c22ac65bd503716062a06fad55a01569cac Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Wed, 3 Mar 2021 10:54:19 +0100 Subject: power: supply: bq27xxx: fix power_avg for newer ICs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On all newer bq27xxx ICs, the AveragePower register contains a signed value; in addition to handling the raw value as unsigned, the driver code also didn't convert it to µW as expected. At least for the BQ28Z610, the reference manual incorrectly states that the value is in units of 1mW and not 10mW. I have no way of knowing whether the manuals of other supported ICs contain the same error, or if there are models that actually use 1mW. At least, the new code shouldn't be *less* correct than the old version for any device. power_avg is removed from the cache structure, se we don't have to extend it to store both a signed value and an error code. Always getting an up-to-date value may be desirable anyways, as it avoids inconsistent current and power readings when switching between charging and discharging. Signed-off-by: Matthias Schiffer Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 111a40d0d3d5..8d5f4f40fb41 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -53,7 +53,6 @@ struct bq27xxx_reg_cache { int capacity; int energy; int flags; - int power_avg; int health; }; -- cgit v1.2.3 From c9570d4a5efd04479b3cd09c39b571eb031d94f4 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 31 Dec 2020 09:52:52 +0100 Subject: extcon: Add stubs for extcon_register_notifier_all() functions Add stubs for extcon_register_notifier_all() function for !CONFIG_EXTCON case. This is useful for compile testing and for drivers which use EXTCON but do not require it (therefore do not depend on CONFIG_EXTCON). Fixes: 815429b39d94 ("extcon: Add new extcon_register_notifier_all() to monitor all external connectors") Reported-by: kernel test robot Signed-off-by: Krzysztof Kozlowski Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index fd183fb9c20f..0c19010da77f 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -271,6 +271,29 @@ static inline void devm_extcon_unregister_notifier(struct device *dev, struct extcon_dev *edev, unsigned int id, struct notifier_block *nb) { } +static inline int extcon_register_notifier_all(struct extcon_dev *edev, + struct notifier_block *nb) +{ + return 0; +} + +static inline int extcon_unregister_notifier_all(struct extcon_dev *edev, + struct notifier_block *nb) +{ + return 0; +} + +static inline int devm_extcon_register_notifier_all(struct device *dev, + struct extcon_dev *edev, + struct notifier_block *nb) +{ + return 0; +} + +static inline void devm_extcon_unregister_notifier_all(struct device *dev, + struct extcon_dev *edev, + struct notifier_block *nb) { } + static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) { return ERR_PTR(-ENODEV); -- cgit v1.2.3 From 58483761810087e5ffdf36e84ac1bf26df909097 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 14 Mar 2021 12:13:29 +0100 Subject: thermal/drivers/core: Use a char pointer for the cooling device name We want to have any kind of name for the cooling devices as we do no longer want to rely on auto-numbering. Let's replace the cooling device's fixed array by a char pointer to be allocated dynamically when registering the cooling device, so we don't limit the length of the name. Rework the error path at the same time as we have to rollback the allocations in case of error. Tested with a dummy device having the name: "Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch" A village on the island of Anglesey (Wales), known to have the longest name in Europe. Signed-off-by: Daniel Lezcano Reviewed-by: Lukasz Luba Tested-by: Ido Schimmel Link: https://lore.kernel.org/r/20210314111333.16551-1-daniel.lezcano@linaro.org --- include/linux/thermal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 6ac7bb1d2b1f..169502164364 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -91,7 +91,7 @@ struct thermal_cooling_device_ops { struct thermal_cooling_device { int id; - char type[THERMAL_NAME_LENGTH]; + char *type; struct device device; struct device_node *np; void *devdata; -- cgit v1.2.3 From a74f681c3710b47a093d910ca7c6666b3d1e3a2c Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sun, 14 Mar 2021 19:33:54 +0300 Subject: opp: Add devres wrapper for dev_pm_opp_set_clkname Add devres wrapper for dev_pm_opp_set_clkname() to simplify drivers code. Signed-off-by: Yangtao Li Signed-off-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index c0371efa4a0f..0583d775aa5a 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -150,6 +150,7 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * con void dev_pm_opp_put_regulators(struct opp_table *opp_table); struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name); void dev_pm_opp_put_clkname(struct opp_table *opp_table); +int devm_pm_opp_set_clkname(struct device *dev, const char *name); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); struct opp_table *devm_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); @@ -355,6 +356,11 @@ static inline struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const static inline void dev_pm_opp_put_clkname(struct opp_table *opp_table) {} +static inline int devm_pm_opp_set_clkname(struct device *dev, const char *name) +{ + return -EOPNOTSUPP; +} + static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs) { return ERR_PTR(-EOPNOTSUPP); -- cgit v1.2.3 From 32aee78bc5184c7a51a081939721e97cfad4a44e Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sun, 14 Mar 2021 19:33:55 +0300 Subject: opp: Add devres wrapper for dev_pm_opp_set_regulators Add devres wrapper for dev_pm_opp_set_regulators() to simplify drivers code. Signed-off-by: Yangtao Li Signed-off-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 0583d775aa5a..3e667af57211 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -148,6 +148,7 @@ struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) void dev_pm_opp_put_prop_name(struct opp_table *opp_table); struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count); void dev_pm_opp_put_regulators(struct opp_table *opp_table); +int devm_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count); struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name); void dev_pm_opp_put_clkname(struct opp_table *opp_table); int devm_pm_opp_set_clkname(struct device *dev, const char *name); @@ -349,6 +350,13 @@ static inline struct opp_table *dev_pm_opp_set_regulators(struct device *dev, co static inline void dev_pm_opp_put_regulators(struct opp_table *opp_table) {} +static inline int devm_pm_opp_set_regulators(struct device *dev, + const char * const names[], + unsigned int count) +{ + return -EOPNOTSUPP; +} + static inline struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name) { return ERR_PTR(-EOPNOTSUPP); -- cgit v1.2.3 From 9c4f220f3dc260e325c92e8588ade2affcb6528c Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sun, 14 Mar 2021 19:33:56 +0300 Subject: opp: Add devres wrapper for dev_pm_opp_set_supported_hw Add devres wrapper for dev_pm_opp_set_supported_hw() to simplify drivers code. Signed-off-by: Yangtao Li Signed-off-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 3e667af57211..e455b187e405 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -144,6 +144,7 @@ int dev_pm_opp_unregister_notifier(struct device *dev, struct notifier_block *nb struct opp_table *dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count); void dev_pm_opp_put_supported_hw(struct opp_table *opp_table); +int devm_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count); struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name); void dev_pm_opp_put_prop_name(struct opp_table *opp_table); struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count); @@ -321,6 +322,13 @@ static inline struct opp_table *dev_pm_opp_set_supported_hw(struct device *dev, static inline void dev_pm_opp_put_supported_hw(struct opp_table *opp_table) {} +static inline int devm_pm_opp_set_supported_hw(struct device *dev, + const u32 *versions, + unsigned int count) +{ + return -EOPNOTSUPP; +} + static inline struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)) { -- cgit v1.2.3 From 3d5cfbb69508db7d092475be01c66edc86066717 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sun, 14 Mar 2021 19:33:57 +0300 Subject: opp: Add devres wrapper for dev_pm_opp_of_add_table Add devres wrapper for dev_pm_opp_of_add_table() to simplify drivers code. Signed-off-by: Yangtao Li Signed-off-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index e455b187e405..3f8894012429 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -441,6 +441,7 @@ int dev_pm_opp_of_add_table(struct device *dev); int dev_pm_opp_of_add_table_indexed(struct device *dev, int index); int dev_pm_opp_of_add_table_noclk(struct device *dev, int index); void dev_pm_opp_of_remove_table(struct device *dev); +int devm_pm_opp_of_add_table(struct device *dev); int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask); void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask); int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); @@ -473,6 +474,11 @@ static inline void dev_pm_opp_of_remove_table(struct device *dev) { } +static inline int devm_pm_opp_of_add_table(struct device *dev) +{ + return -EOPNOTSUPP; +} + static inline int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask) { return -EOPNOTSUPP; -- cgit v1.2.3 From eedb0b12d091a21909b5e84d9f3e5e649305bd12 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 Jan 2021 14:53:22 +0100 Subject: dma-mapping: add a dma_mmap_pages helper Add a helper to map memory allocated using dma_alloc_pages into a user address space, similar to the dma_alloc_attrs function for coherent allocations. Signed-off-by: Christoph Hellwig Reviewed-by: Tomasz Figa Tested-by: Ricardo Ribalda --- include/linux/dma-mapping.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 2a984cb4d1e0..2b8dce756e1f 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -263,6 +263,8 @@ struct page *dma_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); void dma_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_handle, enum dma_data_direction dir); +int dma_mmap_pages(struct device *dev, struct vm_area_struct *vma, + size_t size, struct page *page); static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) -- cgit v1.2.3 From 7d5b5738d1514e9dd8ed452660e2a4d25beb9483 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 Jan 2021 14:54:18 +0100 Subject: dma-mapping: add a dma_alloc_noncontiguous API Add a new API that returns a potentiall virtually non-contigous sg_table and a DMA address. This API is only properly implemented for dma-iommu and will simply return a contigious chunk as a fallback. The intent is that drivers can use this API if either: - no kernel mapping or only temporary kernel mappings are required. That is as a better replacement for DMA_ATTR_NO_KERNEL_MAPPING - a kernel mapping is required for cached and DMA mapped pages, but the driver also needs the pages to e.g. map them to userspace. In that sense it is a replacement for some aspects of the recently removed and never fully implemented DMA_ATTR_NON_CONSISTENT Signed-off-by: Christoph Hellwig Reviewed-by: Tomasz Figa Tested-by: Ricardo Ribalda --- include/linux/dma-map-ops.h | 19 +++++++++++++++++++ include/linux/dma-mapping.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 51872e736e7b..0d53a96a3d64 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -22,6 +22,11 @@ struct dma_map_ops { gfp_t gfp); void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, dma_addr_t dma_handle, enum dma_data_direction dir); + struct sg_table *(*alloc_noncontiguous)(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, + unsigned long attrs); + void (*free_noncontiguous)(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir); int (*mmap)(struct device *, struct vm_area_struct *, void *, dma_addr_t, size_t, unsigned long attrs); @@ -198,6 +203,20 @@ static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, } #endif /* CONFIG_DMA_DECLARE_COHERENT */ +/* + * This is the actual return value from the ->alloc_noncontiguous method. + * The users of the DMA API should only care about the sg_table, but to make + * the DMA-API internal vmaping and freeing easier we stash away the page + * array as well (except for the fallback case). This can go away any time, + * e.g. when a vmap-variant that takes a scatterlist comes along. + */ +struct dma_sgt_handle { + struct sg_table sgt; + struct page **pages; +}; +#define sgt_handle(sgt) \ + container_of((sgt), struct dma_sgt_handle, sgt) + int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 2b8dce756e1f..954847f9a3e0 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -144,6 +144,15 @@ u64 dma_get_required_mask(struct device *dev); size_t dma_max_mapping_size(struct device *dev); bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); unsigned long dma_get_merge_boundary(struct device *dev); +struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); +void dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir); +void *dma_vmap_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt); +void dma_vunmap_noncontiguous(struct device *dev, void *vaddr); +int dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma, + size_t size, struct sg_table *sgt); #else /* CONFIG_HAS_DMA */ static inline dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, size_t offset, size_t size, @@ -257,6 +266,29 @@ static inline unsigned long dma_get_merge_boundary(struct device *dev) { return 0; } +static inline struct sg_table *dma_alloc_noncontiguous(struct device *dev, + size_t size, enum dma_data_direction dir, gfp_t gfp, + unsigned long attrs) +{ + return NULL; +} +static inline void dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir) +{ +} +static inline void *dma_vmap_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt) +{ + return NULL; +} +static inline void dma_vunmap_noncontiguous(struct device *dev, void *vaddr) +{ +} +static inline int dma_mmap_noncontiguous(struct device *dev, + struct vm_area_struct *vma, size_t size, struct sg_table *sgt) +{ + return -EINVAL; +} #endif /* CONFIG_HAS_DMA */ struct page *dma_alloc_pages(struct device *dev, size_t size, -- cgit v1.2.3 From d3d40f237480abf3268956daf18cdc56edd32834 Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Mon, 8 Mar 2021 14:24:12 +1300 Subject: Revert "netfilter: x_tables: Switch synchronization to RCU" This reverts commit cc00bcaa589914096edef7fb87ca5cee4a166b5c. This (and the preceding) patch basically re-implemented the RCU mechanisms of patch 784544739a25. That patch was replaced because of the performance problems that it created when replacing tables. Now, we have the same issue: the call to synchronize_rcu() makes replacing tables slower by as much as an order of magnitude. Prior to using RCU a script calling "iptables" approx. 200 times was taking 1.16s. With RCU this increased to 11.59s. Revert these patches and fix the issue in a different way. Signed-off-by: Mark Tomlinson Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 8ebb64193757..5deb099d156d 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -227,7 +227,7 @@ struct xt_table { unsigned int valid_hooks; /* Man behind the curtain... */ - struct xt_table_info __rcu *private; + struct xt_table_info *private; /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -448,9 +448,6 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); -struct xt_table_info -*xt_table_get_private_protected(const struct xt_table *table); - #ifdef CONFIG_COMPAT #include -- cgit v1.2.3 From 175e476b8cdf2a4de7432583b49c871345e4f8a1 Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Mon, 8 Mar 2021 14:24:13 +1300 Subject: netfilter: x_tables: Use correct memory barriers. When a new table value was assigned, it was followed by a write memory barrier. This ensured that all writes before this point would complete before any writes after this point. However, to determine whether the rules are unused, the sequence counter is read. To ensure that all writes have been done before these reads, a full memory barrier is needed, not just a write memory barrier. The same argument applies when incrementing the counter, before the rules are read. Changing to using smp_mb() instead of smp_wmb() fixes the kernel panic reported in cc00bcaa5899 (which is still present), while still maintaining the same speed of replacing tables. The smb_mb() barriers potentially slow the packet path, however testing has shown no measurable change in performance on a 4-core MIPS64 platform. Fixes: 7f5c6d4f665b ("netfilter: get rid of atomic ops in fast path") Signed-off-by: Mark Tomlinson Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5deb099d156d..8ec48466410a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -376,7 +376,7 @@ static inline unsigned int xt_write_recseq_begin(void) * since addend is most likely 1 */ __this_cpu_add(xt_recseq.sequence, addend); - smp_wmb(); + smp_mb(); return addend; } -- cgit v1.2.3 From 9cb24ea051857f2a7ab85c42842c5baa40497e53 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 14 Mar 2021 18:24:02 +0300 Subject: atm: delete include/linux/atm_suni.h This file has been effectively empty since 2.3.99-pre3 ! Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/atm_suni.h | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 include/linux/atm_suni.h (limited to 'include/linux') diff --git a/include/linux/atm_suni.h b/include/linux/atm_suni.h deleted file mode 100644 index 84f3aab54468..000000000000 --- a/include/linux/atm_suni.h +++ /dev/null @@ -1,12 +0,0 @@ -/* atm_suni.h - Driver-specific declarations of the SUNI driver (for use by - driver-specific utilities) */ - -/* Written 1998,2000 by Werner Almesberger, EPFL ICA */ - - -#ifndef LINUX_ATM_SUNI_H -#define LINUX_ATM_SUNI_H - -/* everything obsoleted */ - -#endif -- cgit v1.2.3 From 07a4bc51fc732b3618fd46dc51609948933064a4 Mon Sep 17 00:00:00 2001 From: Ong Boon Leong Date: Mon, 15 Mar 2021 13:27:06 +0800 Subject: net: pcs: rearrange C73 functions to prepare for C37 support later The current implementation for XPCS is validated for C73, so we rename them to have _c73 suffix and introduce a set of functions to use an_mode flag to switch between C73 and C37 AN later. Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- include/linux/pcs/pcs-xpcs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 351c1c9aedc5..a04e57c25fea 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -10,10 +10,14 @@ #include #include +/* AN mode */ +#define DW_AN_C73 1 + struct mdio_xpcs_args { __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); struct mii_bus *bus; int addr; + int an_mode; }; struct mdio_xpcs_ops { -- cgit v1.2.3 From b97b5331b8ab7f60fb880e0c31c9b09b73d2fa4e Mon Sep 17 00:00:00 2001 From: Ong Boon Leong Date: Mon, 15 Mar 2021 13:27:07 +0800 Subject: net: pcs: add C37 SGMII AN support for intel mGbE controller XPCS IP supports C37 SGMII AN process and it is used in intel multi-GbE controller as MAC-side SGMII. Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- include/linux/pcs/pcs-xpcs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index a04e57c25fea..2cb5188a7ef1 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -12,6 +12,7 @@ /* AN mode */ #define DW_AN_C73 1 +#define DW_AN_C37_SGMII 2 struct mdio_xpcs_args { __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); -- cgit v1.2.3 From ab39385021d1e0b4cd6cc521dc35c2fe659bbddf Mon Sep 17 00:00:00 2001 From: Ong Boon Leong Date: Mon, 15 Mar 2021 13:27:08 +0800 Subject: net: phylink: make phylink_parse_mode() support non-DT platform Certain platform does not support DT, so we make phylink_parse_mode() to allow non-DT platform to use it to setup in-band AN advertising. Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- include/linux/phylink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index d81a714cfbbd..fd2acfd9b597 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -64,6 +64,7 @@ enum phylink_op_type { * @pcs_poll: MAC PCS cannot provide link change interrupt * @poll_fixed_state: if true, starts link_poll, * if MAC link is at %MLO_AN_FIXED mode. + * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND * @get_fixed_state: callback to execute to determine the fixed link state, * if MAC link is at %MLO_AN_FIXED mode. */ @@ -72,6 +73,7 @@ struct phylink_config { enum phylink_op_type type; bool pcs_poll; bool poll_fixed_state; + bool ovr_an_inband; void (*get_fixed_state)(struct phylink_config *config, struct phylink_link_state *state); }; -- cgit v1.2.3 From e5e5b771f684c22b25c67df85d2deb43901f7b95 Mon Sep 17 00:00:00 2001 From: Ong Boon Leong Date: Mon, 15 Mar 2021 13:27:09 +0800 Subject: net: stmmac: make in-band AN mode parsing is supported for non-DT Not all platform uses DT, so phylink_parse_mode() will skip in-band setup of pl->supported and pl->link_config.advertising entirely. So, we add the setting of ovr_an_inband flag to make it works for non-DT platform. Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index a302982de2d7..722dc167b5c9 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -81,6 +81,7 @@ struct stmmac_mdio_bus_data { unsigned int phy_mask; unsigned int has_xpcs; + unsigned int xpcs_an_inband; int *irqs; int probed_phy_irq; bool needs_reset; -- cgit v1.2.3 From 76d00b494d7962e88d4bbd4135f34aba9019c67f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 23 Feb 2021 01:10:00 +0100 Subject: rcu/nocb: Disable bypass when CPU isn't completely offloaded Currently, the bypass is flushed at the very last moment in the deoffloading procedure. However, this approach leads to a larger state space than would be preferred. This commit therefore disables the bypass at soon as the deoffloading procedure begins, then flushes it. This guarantees that the bypass remains empty and thus out of the way of the deoffloading procedure. Symmetrically, this commit waits to enable the bypass until the offloading procedure has completed. Reported-by: Paul E. McKenney Cc: Josh Triplett Cc: Lai Jiangshan Cc: Joel Fernandes Cc: Neeraj Upadhyay Cc: Boqun Feng Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/rcu_segcblist.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 8afe886e85f1..3db96c4f45fd 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -109,7 +109,7 @@ struct rcu_cblist { * | SEGCBLIST_KTHREAD_GP | * | | * | Kthreads handle callbacks holding nocb_lock, local rcu_core() stops | - * | handling callbacks. | + * | handling callbacks. Enable bypass queueing. | * ---------------------------------------------------------------------------- */ @@ -125,7 +125,7 @@ struct rcu_cblist { * | SEGCBLIST_KTHREAD_GP | * | | * | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() | - * | ignores callbacks. | + * | ignores callbacks. Bypass enqueue is enabled. | * ---------------------------------------------------------------------------- * | * v @@ -134,7 +134,8 @@ struct rcu_cblist { * | SEGCBLIST_KTHREAD_GP | * | | * | CB/GP kthreads and local rcu_core() handle callbacks concurrently | - * | holding nocb_lock. Wake up CB and GP kthreads if necessary. | + * | holding nocb_lock. Wake up CB and GP kthreads if necessary. Disable | + * | bypass enqueue. | * ---------------------------------------------------------------------------- * | * v -- cgit v1.2.3 From b4d45aee6635197d257f3469413837cd94fc11f4 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 15 Mar 2021 20:16:47 +0800 Subject: net: stmmac: add platform level clocks management This patch intends to add platform level clocks management. Some platforms may have their own special clocks, they also need to be managed dynamically. If you want to manage such clocks, please implement clks_config callback. Reviewed-by: Andrew Lunn Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 722dc167b5c9..51004ebd0540 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -184,6 +184,7 @@ struct plat_stmmacenet_data { int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); struct mac_device_info *(*setup)(void *priv); + int (*clks_config)(void *priv, bool enabled); void *bsp_priv; struct clk *stmmac_clk; struct clk *pclk; -- cgit v1.2.3 From 6e3bac3eba448a438840ab8152cb8bbfcb8787b8 Mon Sep 17 00:00:00 2001 From: Ivan Bornyakov Date: Mon, 15 Mar 2021 17:19:26 +0300 Subject: net: phy: add Marvell 88X2222 transceiver support Add basic support for the Marvell 88X2222 multi-speed ethernet transceiver. This PHY provides data transmission over fiber-optic as well as Twinax copper links. The 88X2222 supports 2 ports of 10GBase-R and 1000Base-X on the line-side interface. The host-side interface supports 4 ports of 10GBase-R, RXAUI, 1000Base-X and 2 ports of XAUI. This driver, however, supports only XAUI on the host-side and 1000Base-X/10GBase-R on the line-side, for now. The SGMII is also supported over 1000Base-X. Interrupts are not supported. Internal registers access compliant with the Clause 45 specification. Signed-off-by: Ivan Bornyakov Signed-off-by: David S. Miller --- include/linux/marvell_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index 52b1610eae68..274abd5fbac3 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -24,6 +24,7 @@ #define MARVELL_PHY_ID_88E3016 0x01410e60 #define MARVELL_PHY_ID_88X3310 0x002b09a0 #define MARVELL_PHY_ID_88E2110 0x002b09b0 +#define MARVELL_PHY_ID_88X2222 0x01410f10 /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ #define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 -- cgit v1.2.3 From 45f3a13c816656c9d3d311880d90286341644d9b Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 15 Mar 2021 16:51:46 -0500 Subject: net: qualcomm: rmnet: mark trailer field endianness The fields in the checksum trailer structure used for QMAP protocol RX packets are all big-endian format, so define them that way. It turns out these fields are never actually used by the RMNet code. The start offset is always assumed to be zero, and the length is taken from the other packet headers. So making these fields explicitly big endian has no effect on the behavior of the code. Signed-off-by: Alex Elder Reviewed-by: Bjorn Andersson Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/if_rmnet.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h index 9661416a9bb4..8c7845baf383 100644 --- a/include/linux/if_rmnet.h +++ b/include/linux/if_rmnet.h @@ -32,8 +32,8 @@ struct rmnet_map_dl_csum_trailer { #else #error "Please fix " #endif - u16 csum_start_offset; - u16 csum_length; + __be16 csum_start_offset; + __be16 csum_length; __be16 csum_value; } __aligned(1); -- cgit v1.2.3 From 16653c16d282e768763b2e8cc78f75df8fd53992 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 15 Mar 2021 16:51:49 -0500 Subject: net: qualcomm: rmnet: use masks instead of C bit-fields The actual layout of bits defined in C bit-fields (e.g. int foo : 3) is implementation-defined. Structures defined in address this by specifying all bit-fields twice, to cover two possible layouts. I think this pattern is repetitive and noisy, and I find the whole notion of compiler "bitfield endianness" to be non-intuitive. Stop using C bit-fields for the command/data flag and the pad length fields in the rmnet_map structure, and define a single-byte flags field instead. Define a mask for the single-bit "command" flag, and another mask for the encoded pad length. The content of both fields can be accessed using a simple bitwise AND operation. Signed-off-by: Alex Elder Reviewed-by: Bjorn Andersson Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/if_rmnet.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h index 8c7845baf383..a02f0a3df1d9 100644 --- a/include/linux/if_rmnet.h +++ b/include/linux/if_rmnet.h @@ -6,21 +6,18 @@ #define _LINUX_IF_RMNET_H_ struct rmnet_map_header { -#if defined(__LITTLE_ENDIAN_BITFIELD) - u8 pad_len:6; - u8 reserved_bit:1; - u8 cd_bit:1; -#elif defined (__BIG_ENDIAN_BITFIELD) - u8 cd_bit:1; - u8 reserved_bit:1; - u8 pad_len:6; -#else -#error "Please fix " -#endif - u8 mux_id; - __be16 pkt_len; + u8 flags; /* MAP_CMD_FLAG, MAP_PAD_LEN_MASK */ + u8 mux_id; + __be16 pkt_len; /* Length of packet, including pad */ } __aligned(1); +/* rmnet_map_header flags field: + * PAD_LEN: number of pad bytes following packet data + * CMD: 1 = packet contains a MAP command; 0 = packet contains data + */ +#define MAP_PAD_LEN_MASK GENMASK(5, 0) +#define MAP_CMD_FLAG BIT(7) + struct rmnet_map_dl_csum_trailer { u8 reserved1; #if defined(__LITTLE_ENDIAN_BITFIELD) -- cgit v1.2.3 From cc1b21ba6251c8dd8e4e86018c9fdba85df0d219 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 15 Mar 2021 16:51:50 -0500 Subject: net: qualcomm: rmnet: don't use C bit-fields in rmnet checksum trailer Replace the use of C bit-fields in the rmnet_map_dl_csum_trailer structure with a single one-byte field, using constant field masks to encode or get at embedded values. Signed-off-by: Alex Elder Reviewed-by: Bjorn Andersson Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/if_rmnet.h | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h index a02f0a3df1d9..941997df9e08 100644 --- a/include/linux/if_rmnet.h +++ b/include/linux/if_rmnet.h @@ -19,21 +19,18 @@ struct rmnet_map_header { #define MAP_CMD_FLAG BIT(7) struct rmnet_map_dl_csum_trailer { - u8 reserved1; -#if defined(__LITTLE_ENDIAN_BITFIELD) - u8 valid:1; - u8 reserved2:7; -#elif defined (__BIG_ENDIAN_BITFIELD) - u8 reserved2:7; - u8 valid:1; -#else -#error "Please fix " -#endif + u8 reserved1; + u8 flags; /* MAP_CSUM_DL_VALID_FLAG */ __be16 csum_start_offset; __be16 csum_length; __be16 csum_value; } __aligned(1); +/* rmnet_map_dl_csum_trailer flags field: + * VALID: 1 = checksum and length valid; 0 = ignore them + */ +#define MAP_CSUM_DL_VALID_FLAG BIT(0) + struct rmnet_map_ul_csum_header { __be16 csum_start_offset; #if defined(__LITTLE_ENDIAN_BITFIELD) -- cgit v1.2.3 From 86ca860e12ec0feab7d721d3b05e60fb86613540 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 15 Mar 2021 16:51:51 -0500 Subject: net: qualcomm: rmnet: don't use C bit-fields in rmnet checksum header Replace the use of C bit-fields in the rmnet_map_ul_csum_header structure with a single two-byte (big endian) structure member, and use masks to encode or get values within it. The content of these fields can be accessed using simple bitwise AND and OR operations on the (host byte order) value of the new structure member. Previously rmnet_map_ipv4_ul_csum_header() would update C bit-field values in host byte order, then forcibly fix their byte order using a combination of byte swap operations and types. Instead, just compute the value that needs to go into the new structure member and save it with a simple byte-order conversion. Make similar simplifications in rmnet_map_ipv6_ul_csum_header(). Finally, in rmnet_map_checksum_uplink_packet() a set of assignments zeroes every field in the upload checksum header. Replace that with a single memset() operation. Signed-off-by: Alex Elder Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/if_rmnet.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h index 941997df9e08..4efb537f57f3 100644 --- a/include/linux/if_rmnet.h +++ b/include/linux/if_rmnet.h @@ -33,17 +33,16 @@ struct rmnet_map_dl_csum_trailer { struct rmnet_map_ul_csum_header { __be16 csum_start_offset; -#if defined(__LITTLE_ENDIAN_BITFIELD) - u16 csum_insert_offset:14; - u16 udp_ind:1; - u16 csum_enabled:1; -#elif defined (__BIG_ENDIAN_BITFIELD) - u16 csum_enabled:1; - u16 udp_ind:1; - u16 csum_insert_offset:14; -#else -#error "Please fix " -#endif + __be16 csum_info; /* MAP_CSUM_UL_* */ } __aligned(1); +/* csum_info field: + * OFFSET: where (offset in bytes) to insert computed checksum + * UDP: 1 = UDP checksum (zero checkum means no checksum) + * ENABLED: 1 = checksum computation requested + */ +#define MAP_CSUM_UL_OFFSET_MASK GENMASK(13, 0) +#define MAP_CSUM_UL_UDP_FLAG BIT(14) +#define MAP_CSUM_UL_ENABLED_FLAG BIT(15) + #endif /* !(_LINUX_IF_RMNET_H_) */ -- cgit v1.2.3 From c41c8a3485b0b36b7e308eeff8716eb77093596a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 14 Mar 2021 19:33:58 +0300 Subject: opp: Change return type of devm_pm_opp_register_set_opp_helper() Make devm_pm_opp_register_set_opp_helper() to return error code instead of opp_table pointer in order to have return type consistent with the other resource-managed OPP helpers. Signed-off-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 3f8894012429..2cf9694908a2 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -155,7 +155,7 @@ void dev_pm_opp_put_clkname(struct opp_table *opp_table); int devm_pm_opp_set_clkname(struct device *dev, const char *name); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); -struct opp_table *devm_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); +int devm_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs); void dev_pm_opp_detach_genpd(struct opp_table *opp_table); struct opp_table *devm_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs); @@ -337,11 +337,10 @@ static inline struct opp_table *dev_pm_opp_register_set_opp_helper(struct device static inline void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) {} -static inline struct opp_table * -devm_pm_opp_register_set_opp_helper(struct device *dev, +static inline int devm_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)) { - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } static inline struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) -- cgit v1.2.3 From 9edf48a4bfb75456f7612972a4750a12d8a83702 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 14 Mar 2021 19:33:59 +0300 Subject: opp: Change return type of devm_pm_opp_attach_genpd() Make devm_pm_opp_attach_genpd() to return error code instead of opp_table pointer in order to have return type consistent with the other resource-managed OPP helpers. Signed-off-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 2cf9694908a2..84150a22fd7c 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -158,7 +158,7 @@ void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); int devm_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs); void dev_pm_opp_detach_genpd(struct opp_table *opp_table); -struct opp_table *devm_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs); +int devm_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs); struct dev_pm_opp *dev_pm_opp_xlate_required_opp(struct opp_table *src_table, struct opp_table *dst_table, struct dev_pm_opp *src_opp); int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct opp_table *dst_table, unsigned int pstate); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); @@ -383,10 +383,11 @@ static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, cons static inline void dev_pm_opp_detach_genpd(struct opp_table *opp_table) {} -static inline struct opp_table *devm_pm_opp_attach_genpd(struct device *dev, - const char **names, struct device ***virt_devs) +static inline int devm_pm_opp_attach_genpd(struct device *dev, + const char **names, + struct device ***virt_devs) { - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } static inline struct dev_pm_opp *dev_pm_opp_xlate_required_opp(struct opp_table *src_table, -- cgit v1.2.3 From cfb12911ffb0ed38c7c420efb9d5dc8494e28109 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sun, 14 Mar 2021 19:34:01 +0300 Subject: spi: spi-geni-qcom: Convert to use resource-managed OPP API Use resource-managed OPP API to simplify code. Signed-off-by: Yangtao Li Acked-by: Mark Brown Signed-off-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- include/linux/qcom-geni-se.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h index ec2ad4b0fe14..cddef864a760 100644 --- a/include/linux/qcom-geni-se.h +++ b/include/linux/qcom-geni-se.h @@ -47,7 +47,6 @@ struct geni_icc_path { * @num_clk_levels: Number of valid clock levels in clk_perf_tbl * @clk_perf_tbl: Table of clock frequency input to serial engine clock * @icc_paths: Array of ICC paths for SE - * @opp_table: Pointer to the OPP table */ struct geni_se { void __iomem *base; @@ -57,7 +56,6 @@ struct geni_se { unsigned int num_clk_levels; unsigned long *clk_perf_tbl; struct geni_icc_path icc_paths[3]; - struct opp_table *opp_table; }; /* Common SE registers */ -- cgit v1.2.3 From 1d49439c04792a4a3d8299a32b7673ab7ba13b77 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 15 Mar 2021 16:38:42 -0300 Subject: ASoC: mx27vis: Remove unused file i.MX has been converted to a devicetree-only platform and asoc-mx27vis.h is no longer used. Get rid of this unused file. Signed-off-by: Fabio Estevam Link: https://lore.kernel.org/r/20210315193842.183042-1-festevam@gmail.com Signed-off-by: Mark Brown --- include/linux/platform_data/asoc-mx27vis.h | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 include/linux/platform_data/asoc-mx27vis.h (limited to 'include/linux') diff --git a/include/linux/platform_data/asoc-mx27vis.h b/include/linux/platform_data/asoc-mx27vis.h deleted file mode 100644 index 2107d0d992dd..000000000000 --- a/include/linux/platform_data/asoc-mx27vis.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PLATFORM_DATA_ASOC_MX27VIS_H -#define __PLATFORM_DATA_ASOC_MX27VIS_H - -struct snd_mx27vis_platform_data { - int amp_gain0_gpio; - int amp_gain1_gpio; - int amp_mutel_gpio; - int amp_muter_gpio; -}; - -#endif /* __PLATFORM_DATA_ASOC_MX27VIS_H */ -- cgit v1.2.3 From 47afc77bbfeac163d81c7a675d608c18561aa680 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 3 Mar 2021 18:28:11 +0300 Subject: spi: Add support for software nodes Making it possible for the drivers to assign complete software fwnodes to the devices instead of only the device properties in those nodes. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20210303152814.35070-2-heikki.krogerus@linux.intel.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 592897fa4f03..f47f94ea6fa9 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -20,6 +20,7 @@ struct dma_chan; struct property_entry; +struct software_node; struct spi_controller; struct spi_transfer; struct spi_controller_mem_ops; @@ -1409,7 +1410,8 @@ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd) * @modalias: Initializes spi_device.modalias; identifies the driver. * @platform_data: Initializes spi_device.platform_data; the particular * data stored there is driver-specific. - * @properties: Additional device properties for the device. + * @properties: Deprecated - use software node instead. + * @swnode: Software node for the device. * @controller_data: Initializes spi_device.controller_data; some * controllers need hints about hardware setup, e.g. for DMA. * @irq: Initializes spi_device.irq; depends on how the board is wired. @@ -1448,6 +1450,7 @@ struct spi_board_info { char modalias[SPI_NAME_SIZE]; const void *platform_data; const struct property_entry *properties; + const struct software_node *swnode; void *controller_data; int irq; -- cgit v1.2.3 From df41a5dad586c8ead1bb7082b4b6fcb563e02199 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 3 Mar 2021 18:28:14 +0300 Subject: spi: Remove support for dangling device properties >From now on only accepting complete software nodes. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20210303152814.35070-5-heikki.krogerus@linux.intel.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index f47f94ea6fa9..7cb3194d5ba5 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -19,7 +19,6 @@ #include struct dma_chan; -struct property_entry; struct software_node; struct spi_controller; struct spi_transfer; @@ -1410,7 +1409,6 @@ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd) * @modalias: Initializes spi_device.modalias; identifies the driver. * @platform_data: Initializes spi_device.platform_data; the particular * data stored there is driver-specific. - * @properties: Deprecated - use software node instead. * @swnode: Software node for the device. * @controller_data: Initializes spi_device.controller_data; some * controllers need hints about hardware setup, e.g. for DMA. @@ -1444,12 +1442,10 @@ struct spi_board_info { * * platform_data goes to spi_device.dev.platform_data, * controller_data goes to spi_device.controller_data, - * device properties are copied and attached to spi_device, * irq is copied too */ char modalias[SPI_NAME_SIZE]; const void *platform_data; - const struct property_entry *properties; const struct software_node *swnode; void *controller_data; int irq; -- cgit v1.2.3 From 3a0ade0c521a542f8a25e96ce8ea0dfaa532ac75 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sat, 6 Mar 2021 13:36:58 -0800 Subject: tasklet: Remove tasklet_kill_immediate Ever since RCU was converted to softirq, it has no users. Signed-off-by: Davidlohr Bueso Signed-off-by: Thomas Gleixner Acked-by: Paul E. McKenney Link: https://lore.kernel.org/r/20210306213658.12862-1-dave@stgolabs.net --- include/linux/interrupt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 76f1161a441a..2b98156ec707 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -716,7 +716,6 @@ static inline void tasklet_enable(struct tasklet_struct *t) } extern void tasklet_kill(struct tasklet_struct *t); -extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); extern void tasklet_init(struct tasklet_struct *t, void (*func)(unsigned long), unsigned long data); extern void tasklet_setup(struct tasklet_struct *t, -- cgit v1.2.3 From 6f73171e192366ff7c98af9fb50615ef9615f8a7 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 Mar 2021 12:48:22 +0200 Subject: fsnotify: allow fsnotify_{peek,remove}_first_event with empty queue Current code has an assumtion that fsnotify_notify_queue_is_empty() is called to verify that queue is not empty before trying to peek or remove an event from queue. Remove this assumption by moving the fsnotify_notify_queue_is_empty() into the functions, allow them to return NULL value and check return value by all callers. This is a prep patch for multi event queues. Link: https://lore.kernel.org/r/20210304104826.3993892-2-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index e5409b83e731..7eb979bfc141 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -495,7 +495,13 @@ static inline void fsnotify_queue_overflow(struct fsnotify_group *group) fsnotify_add_event(group, group->overflow_event, NULL); } -/* true if the group notification queue is empty */ +static inline bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group) +{ + assert_spin_locked(&group->notification_lock); + + return list_empty(&group->notification_list); +} + extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); /* return, but do not dequeue the first event on the notification queue */ extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group); -- cgit v1.2.3 From 8988f11abb820bacfcc53d498370bfb30f792ec4 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 Mar 2021 12:48:23 +0200 Subject: fanotify: reduce event objectid to 29-bit hash objectid is only used by fanotify backend and it is just an optimization for event merge before comparing all fields in event. Move the objectid member from common struct fsnotify_event into struct fanotify_event and reduce it to 29-bit hash to cram it together with the 3-bit event type. Events of different types are never merged, so the combination of event type and hash form a 32-bit key for fast compare of events. This reduces the size of events by one pointer and paves the way for adding hashed queue support for fanotify. Link: https://lore.kernel.org/r/20210304104826.3993892-3-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7eb979bfc141..fc98f9f88d12 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -167,7 +167,6 @@ struct fsnotify_ops { */ struct fsnotify_event { struct list_head list; - unsigned long objectid; /* identifier for queue merges */ }; /* @@ -582,11 +581,9 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); -static inline void fsnotify_init_event(struct fsnotify_event *event, - unsigned long objectid) +static inline void fsnotify_init_event(struct fsnotify_event *event) { INIT_LIST_HEAD(&event->list); - event->objectid = objectid; } #else -- cgit v1.2.3 From 94e00d28a680dff18805ca472b191364347d2234 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 Mar 2021 12:48:25 +0200 Subject: fsnotify: use hash table for faster events merge In order to improve event merge performance, hash events in a 128 size hash table by the event merge key. The fanotify_event size grows by two pointers, but we just reduced its size by removing the objectid member, so overall its size is increased by one pointer. Permission events and overflow event are not merged so they are also not hashed. Link: https://lore.kernel.org/r/20210304104826.3993892-5-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index fc98f9f88d12..63fb766f0f3e 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -233,6 +233,8 @@ struct fsnotify_group { #endif #ifdef CONFIG_FANOTIFY struct fanotify_group_private_data { + /* Hash table of events for merge */ + struct hlist_head *merge_hash; /* allows a group to block waiting for a userspace response */ struct list_head access_list; wait_queue_head_t access_waitq; @@ -486,12 +488,14 @@ extern void fsnotify_destroy_event(struct fsnotify_group *group, /* attach the event to the group notification queue */ extern int fsnotify_add_event(struct fsnotify_group *group, struct fsnotify_event *event, - int (*merge)(struct list_head *, - struct fsnotify_event *)); + int (*merge)(struct fsnotify_group *, + struct fsnotify_event *), + void (*insert)(struct fsnotify_group *, + struct fsnotify_event *)); /* Queue overflow event to a notification group */ static inline void fsnotify_queue_overflow(struct fsnotify_group *group) { - fsnotify_add_event(group, group->overflow_event, NULL); + fsnotify_add_event(group, group->overflow_event, NULL, NULL); } static inline bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group) -- cgit v1.2.3 From 5b8fea65d197f408bb00b251c70d842826d6b70b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 Mar 2021 13:29:20 +0200 Subject: fanotify: configurable limits via sysfs fanotify has some hardcoded limits. The only APIs to escape those limits are FAN_UNLIMITED_QUEUE and FAN_UNLIMITED_MARKS. Allow finer grained tuning of the system limits via sysfs tunables under /proc/sys/fs/fanotify, similar to tunables under /proc/sys/fs/inotify, with some minor differences. - max_queued_events - global system tunable for group queue size limit. Like the inotify tunable with the same name, it defaults to 16384 and applies on initialization of a new group. - max_user_marks - user ns tunable for marks limit per user. Like the inotify tunable named max_user_watches, on a machine with sufficient RAM and it defaults to 1048576 in init userns and can be further limited per containing user ns. - max_user_groups - user ns tunable for number of groups per user. Like the inotify tunable named max_user_instances, it defaults to 128 in init userns and can be further limited per containing user ns. The slightly different tunable names used for fanotify are derived from the "group" and "mark" terminology used in the fanotify man pages and throughout the code. Considering the fact that the default value for max_user_instances was increased in kernel v5.10 from 8192 to 1048576, leaving the legacy fanotify limit of 8192 marks per group in addition to the max_user_marks limit makes little sense, so the per group marks limit has been removed. Note that when a group is initialized with FAN_UNLIMITED_MARKS, its own marks are not accounted in the per user marks account, so in effect the limit of max_user_marks is only for the collection of groups that are not initialized with FAN_UNLIMITED_MARKS. Link: https://lore.kernel.org/r/20210304112921.3996419-2-amir73il@gmail.com Suggested-by: Jan Kara Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fanotify.h | 3 +++ include/linux/fsnotify_backend.h | 6 +----- include/linux/sched/user.h | 3 --- include/linux/user_namespace.h | 4 ++++ 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 3e9c56ee651f..031a97d8369a 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -2,8 +2,11 @@ #ifndef _LINUX_FANOTIFY_H #define _LINUX_FANOTIFY_H +#include #include +extern struct ctl_table fanotify_table[]; /* for sysctl */ + #define FAN_GROUP_FLAG(group, flag) \ ((group)->fanotify_data.flags & (flag)) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 63fb766f0f3e..1ce66748a2d2 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -206,9 +206,6 @@ struct fsnotify_group { /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ struct mutex mark_mutex; /* protect marks_list */ - atomic_t num_marks; /* 1 for each mark and 1 for not being - * past the point of no return when freeing - * a group */ atomic_t user_waits; /* Number of tasks waiting for user * response */ struct list_head marks_list; /* all inode marks for this group */ @@ -240,8 +237,7 @@ struct fsnotify_group { wait_queue_head_t access_waitq; int flags; /* flags from fanotify_init() */ int f_flags; /* event_f_flags from fanotify_init() */ - unsigned int max_marks; - struct user_struct *user; + struct ucounts *ucounts; } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index a8ec3b6093fc..3632c5d6ec55 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -14,9 +14,6 @@ struct user_struct { refcount_t __count; /* reference count */ atomic_t processes; /* How many processes does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ -#ifdef CONFIG_FANOTIFY - atomic_t fanotify_listeners; -#endif #ifdef CONFIG_EPOLL atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ #endif diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 64cf8ebdc4ec..f0d961a15fba 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -49,6 +49,10 @@ enum ucount_type { #ifdef CONFIG_INOTIFY_USER UCOUNT_INOTIFY_INSTANCES, UCOUNT_INOTIFY_WATCHES, +#endif +#ifdef CONFIG_FANOTIFY + UCOUNT_FANOTIFY_GROUPS, + UCOUNT_FANOTIFY_MARKS, #endif UCOUNT_COUNTS, }; -- cgit v1.2.3 From 7cea2a3c505e87a9d6afc78be4a7f7be636a73a7 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 Mar 2021 13:29:21 +0200 Subject: fanotify: support limited functionality for unprivileged users Add limited support for unprivileged fanotify groups. An unprivileged users is not allowed to get an open file descriptor in the event nor the process pid of another process. An unprivileged user cannot request permission events, cannot set mount/filesystem marks and cannot request unlimited queue/marks. This enables the limited functionality similar to inotify when watching a set of files and directories for OPEN/ACCESS/MODIFY/CLOSE events, without requiring SYS_CAP_ADMIN privileges. The FAN_REPORT_DFID_NAME init flag, provide a method for an unprivileged listener watching a set of directories (with FAN_EVENT_ON_CHILD) to monitor all changes inside those directories. This typically requires that the listener keeps a map of watched directory fid to dirfd (O_PATH), where fid is obtained with name_to_handle_at() before starting to watch for changes. When getting an event, the reported fid of the parent should be resolved to dirfd and fstatsat(2) with dirfd and name should be used to query the state of the filesystem entry. Link: https://lore.kernel.org/r/20210304112921.3996419-3-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fanotify.h | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 031a97d8369a..bad41bcb25df 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -18,15 +18,38 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ * these constant, the programs may break if re-compiled with new uapi headers * and then run on an old kernel. */ -#define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ + +/* Group classes where permission events are allowed */ +#define FANOTIFY_PERM_CLASSES (FAN_CLASS_CONTENT | \ FAN_CLASS_PRE_CONTENT) +#define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FANOTIFY_PERM_CLASSES) + #define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME) -#define FANOTIFY_INIT_FLAGS (FANOTIFY_CLASS_BITS | FANOTIFY_FID_BITS | \ - FAN_REPORT_TID | \ - FAN_CLOEXEC | FAN_NONBLOCK | \ - FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS) +/* + * fanotify_init() flags that require CAP_SYS_ADMIN. + * We do not allow unprivileged groups to request permission events. + * We do not allow unprivileged groups to get other process pid in events. + * We do not allow unprivileged groups to use unlimited resources. + */ +#define FANOTIFY_ADMIN_INIT_FLAGS (FANOTIFY_PERM_CLASSES | \ + FAN_REPORT_TID | \ + FAN_UNLIMITED_QUEUE | \ + FAN_UNLIMITED_MARKS) + +/* + * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN. + * FAN_CLASS_NOTIF is the only class we allow for unprivileged group. + * We do not allow unprivileged groups to get file descriptors in events, + * so one of the flags for reporting file handles is required. + */ +#define FANOTIFY_USER_INIT_FLAGS (FAN_CLASS_NOTIF | \ + FANOTIFY_FID_BITS | \ + FAN_CLOEXEC | FAN_NONBLOCK) + +#define FANOTIFY_INIT_FLAGS (FANOTIFY_ADMIN_INIT_FLAGS | \ + FANOTIFY_USER_INIT_FLAGS) #define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \ FAN_MARK_FILESYSTEM) -- cgit v1.2.3 From 3b49dfb08c750d4745ad42ec042288aba932b9d5 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 23 Feb 2021 15:17:48 -0400 Subject: ARM: amba: Allow some ARM_AMBA users to compile with COMPILE_TEST CONFIG_VFIO_AMBA has a light use of AMBA, adding some inline fallbacks when AMBA is disabled will allow it to be compiled under COMPILE_TEST and make VFIO easier to maintain. Signed-off-by: Jason Gunthorpe Message-Id: <3-v1-df057e0f92c3+91-vfio_arm_compile_test_jgg@nvidia.com> Reviewed-by: Eric Auger Signed-off-by: Alex Williamson --- include/linux/amba/bus.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 6cc93ab5b809..c68d87b87283 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -105,8 +105,19 @@ extern struct bus_type amba_bustype; #define amba_get_drvdata(d) dev_get_drvdata(&d->dev) #define amba_set_drvdata(d,p) dev_set_drvdata(&d->dev, p) +#ifdef CONFIG_ARM_AMBA int amba_driver_register(struct amba_driver *); void amba_driver_unregister(struct amba_driver *); +#else +static inline int amba_driver_register(struct amba_driver *drv) +{ + return -EINVAL; +} +static inline void amba_driver_unregister(struct amba_driver *drv) +{ +} +#endif + struct amba_device *amba_device_alloc(const char *, resource_size_t, size_t); void amba_device_put(struct amba_device *); int amba_device_add(struct amba_device *, struct resource *); -- cgit v1.2.3 From c124fd9a969acaa83f6dfa5e160a99a500af9e4b Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Thu, 18 Feb 2021 20:03:58 +0100 Subject: PCI: Add pci_find_vsec_capability() to find a specific VSEC Add pci_find_vsec_capability() to locate a Vendor-Specific Extended Capability with the specified VSEC ID. The Vendor-Specific Extended Capability (VSEC) allows one or more proprietary capabilities defined by the vendor which aren't standard or shared between vendors. Signed-off-by: Gustavo Pimentel Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/d89506834fb11c6fa0bd5d515c0dd55b13ac6958.1613674948.git.gustavo.pimentel@synopsys.com Signed-off-by: Vinod Koul --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 86c799c97b77..59e731a2d1f8 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1077,6 +1077,7 @@ u8 pci_find_next_ht_capability(struct pci_dev *dev, u8 pos, int ht_cap); u16 pci_find_ext_capability(struct pci_dev *dev, int cap); u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 pos, int cap); struct pci_bus *pci_find_next_bus(const struct pci_bus *from); +u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap); u64 pci_get_dsn(struct pci_dev *dev); -- cgit v1.2.3 From 5abbe51a526253b9f003e9a0a195638dc882d660 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 1 Feb 2021 18:46:41 +0100 Subject: kernel, fs: Introduce and use set_restart_fn() and arch_set_restart_data() Preparation for fixing get_nr_restart_syscall() on X86 for COMPAT. Add a new helper which sets restart_block->fn and calls a dummy arch_set_restart_data() helper. Fixes: 609c19a385c8 ("x86/ptrace: Stop setting TS_COMPAT in ptrace code") Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210201174641.GA17871@redhat.com --- include/linux/thread_info.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 9b2158c69275..157762db9d4b 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -11,6 +11,7 @@ #include #include #include +#include #ifdef CONFIG_THREAD_INFO_IN_TASK /* @@ -59,6 +60,18 @@ enum syscall_work_bit { #ifdef __KERNEL__ +#ifndef arch_set_restart_data +#define arch_set_restart_data(restart) do { } while (0) +#endif + +static inline long set_restart_fn(struct restart_block *restart, + long (*fn)(struct restart_block *)) +{ + restart->fn = fn; + arch_set_restart_data(restart); + return -ERESTART_RESTARTBLOCK; +} + #ifndef THREAD_ALIGN #define THREAD_ALIGN THREAD_SIZE #endif -- cgit v1.2.3 From b2e9df850c58c2b36e915e7d3bed3f6107cccba6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 1 Feb 2021 18:47:16 +0100 Subject: x86: Introduce restart_block->arch_data to remove TS_COMPAT_RESTART Save the current_thread_info()->status of X86 in the new restart_block->arch_data field so TS_COMPAT_RESTART can be removed again. Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210201174716.GA17898@redhat.com --- include/linux/restart_block.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h index bba2920e9c05..980a65594412 100644 --- a/include/linux/restart_block.h +++ b/include/linux/restart_block.h @@ -23,6 +23,7 @@ enum timespec_type { * System call restart block. */ struct restart_block { + unsigned long arch_data; long (*fn)(struct restart_block *); union { /* For futex_wait and futex_wait_requeue_pi */ -- cgit v1.2.3 From d29334c15d33a6a92d2043ca88f84cd5ad026c57 Mon Sep 17 00:00:00 2001 From: wenxu Date: Tue, 16 Mar 2021 16:33:54 +0800 Subject: net/sched: act_api: fix miss set post_ct for ovs after do conntrack in act_ct When openvswitch conntrack offload with act_ct action. The first rule do conntrack in the act_ct in tc subsystem. And miss the next rule in the tc and fallback to the ovs datapath but miss set post_ct flag which will lead the ct_state_key with -trk flag. Fixes: 7baf2429a1a9 ("net/sched: cls_flower add CT_FLAGS_INVALID flag support") Signed-off-by: wenxu Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6d0a33d1c0db..f2c9ee71cb2c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -285,6 +285,7 @@ struct nf_bridge_info { struct tc_skb_ext { __u32 chain; __u16 mru; + bool post_ct; }; #endif -- cgit v1.2.3 From 2ed2c5f0391106406ead3a74bfa571575eafe8b6 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 16 Mar 2021 21:10:19 +0100 Subject: net: ocelot: Remove ocelot_xfh_get_cpuq Now when extracting frames from CPU the cpuq is not used anymore so remove it. Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- include/linux/dsa/ocelot.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index 4265f328681a..c6bc45ae5e03 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -160,11 +160,6 @@ static inline void ocelot_xfh_get_src_port(void *extraction, u64 *src_port) packing(extraction, src_port, 46, 43, OCELOT_TAG_LEN, UNPACK, 0); } -static inline void ocelot_xfh_get_cpuq(void *extraction, u64 *cpuq) -{ - packing(extraction, cpuq, 28, 20, OCELOT_TAG_LEN, UNPACK, 0); -} - static inline void ocelot_xfh_get_qos_class(void *extraction, u64 *qos_class) { packing(extraction, qos_class, 19, 17, OCELOT_TAG_LEN, UNPACK, 0); -- cgit v1.2.3 From 01035bcc0f9195a19a76c8a006b3c520428acb61 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 16 Mar 2021 15:52:15 -0700 Subject: Revert "net: socket: use BIT() for MSG_*" This reverts commit 0bb3262c0248d44aea3be31076f44beb82a7b120. Breaks things on mips64/qemu Reported-by: Guenter Roeck Signed-off-by: David S. Miller --- include/linux/socket.h | 71 ++++++++++++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index e88859f38cd0..385894b4a8bb 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -283,45 +283,42 @@ struct ucred { Added those for 1003.1g not all are supported yet */ -#define MSG_OOB BIT(0) -#define MSG_PEEK BIT(1) -#define MSG_DONTROUTE BIT(2) -#define MSG_TRYHARD BIT(2) /* Synonym for MSG_DONTROUTE for DECnet */ -#define MSG_CTRUNC BIT(3) -#define MSG_PROBE BIT(4) /* Do not send. Only probe path f.e. for MTU */ -#define MSG_TRUNC BIT(5) -#define MSG_DONTWAIT BIT(6) /* Nonblocking io */ -#define MSG_EOR BIT(7) /* End of record */ -#define MSG_WAITALL BIT(8) /* Wait for a full request */ -#define MSG_FIN BIT(9) -#define MSG_SYN BIT(10) -#define MSG_CONFIRM BIT(11) /* Confirm path validity */ -#define MSG_RST BIT(12) -#define MSG_ERRQUEUE BIT(13) /* Fetch message from error queue */ -#define MSG_NOSIGNAL BIT(14) /* Do not generate SIGPIPE */ -#define MSG_MORE BIT(15) /* Sender will send more */ -#define MSG_WAITFORONE BIT(16) /* recvmmsg(): block until 1+ packets avail */ -#define MSG_SENDPAGE_NOPOLICY BIT(16) /* sendpage() internal : do no apply policy */ -#define MSG_SENDPAGE_NOTLAST BIT(17) /* sendpage() internal : not the last page */ -#define MSG_BATCH BIT(18) /* sendmmsg(): more messages coming */ -#define MSG_EOF MSG_FIN -#define MSG_NO_SHARED_FRAGS BIT(19) /* sendpage() internal : page frags - * are not shared - */ -#define MSG_SENDPAGE_DECRYPTED BIT(20) /* sendpage() internal : page may carry - * plain text and require encryption - */ - -#define MSG_ZEROCOPY BIT(26) /* Use user data in kernel path */ -#define MSG_FASTOPEN BIT(29) /* Send data in TCP SYN */ -#define MSG_CMSG_CLOEXEC BIT(30) /* Set close_on_exec for file - * descriptor received through - * SCM_RIGHTS - */ +#define MSG_OOB 1 +#define MSG_PEEK 2 +#define MSG_DONTROUTE 4 +#define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */ +#define MSG_CTRUNC 8 +#define MSG_PROBE 0x10 /* Do not send. Only probe path f.e. for MTU */ +#define MSG_TRUNC 0x20 +#define MSG_DONTWAIT 0x40 /* Nonblocking io */ +#define MSG_EOR 0x80 /* End of record */ +#define MSG_WAITALL 0x100 /* Wait for a full request */ +#define MSG_FIN 0x200 +#define MSG_SYN 0x400 +#define MSG_CONFIRM 0x800 /* Confirm path validity */ +#define MSG_RST 0x1000 +#define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */ +#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */ +#define MSG_MORE 0x8000 /* Sender will send more */ +#define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */ +#define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */ +#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */ +#define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */ +#define MSG_EOF MSG_FIN +#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */ +#define MSG_SENDPAGE_DECRYPTED 0x100000 /* sendpage() internal : page may carry + * plain text and require encryption + */ + +#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ +#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ +#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file + descriptor received through + SCM_RIGHTS */ #if defined(CONFIG_COMPAT) -#define MSG_CMSG_COMPAT BIT(31) /* This message needs 32 bit fixups */ +#define MSG_CMSG_COMPAT 0x80000000 /* This message needs 32 bit fixups */ #else -#define MSG_CMSG_COMPAT 0 /* We never have 32 bit fixups */ +#define MSG_CMSG_COMPAT 0 /* We never have 32 bit fixups */ #endif -- cgit v1.2.3 From 7a126a43a3dcf0fa6b9f7f2fe3ce82102517afe3 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 16 Sep 2020 10:11:20 +0300 Subject: net: Change dev parameter to const in netif_device_present() Not all ndos check the present bit before calling the ndo and the driver may want to check it. Sometimes the dev parameter passed as const so we pass it to netif_device_present() as const. Since netif_device_present() doesn't modify dev parameter anyway, declare it as const. Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b379d08a12ed..97254c089eb2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4175,7 +4175,7 @@ static inline bool netif_oper_up(const struct net_device *dev) * * Check if device has not been removed from system. */ -static inline bool netif_device_present(struct net_device *dev) +static inline bool netif_device_present(const struct net_device *dev) { return test_bit(__LINK_STATE_PRESENT, &dev->state); } -- cgit v1.2.3 From c276aae8c19d65e21a43c2690c7c7dafea0e97fa Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 26 Jan 2021 11:51:04 +0200 Subject: net/mlx5: Move mlx5e hw resources into a sub object This is to separate between resources attributes and other attributes we will want to use. Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 53b89631a1d9..9887181dea5f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -644,10 +644,12 @@ struct mlx5_td { }; struct mlx5e_resources { - u32 pdn; - struct mlx5_td td; - struct mlx5_core_mkey mkey; - struct mlx5_sq_bfreg bfreg; + struct mlx5e_hw_objs { + u32 pdn; + struct mlx5_td td; + struct mlx5_core_mkey mkey; + struct mlx5_sq_bfreg bfreg; + } hw_objs; }; enum mlx5_sw_icm_type { -- cgit v1.2.3 From c27971d08abecc91f06214dacc66ce3ce2662a44 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 28 Oct 2020 11:21:26 +0200 Subject: net/mlx5: Move devlink port from mlx5e priv to mlx5e resources We re-use the native NIC port net device instance for the Uplink representor, and the devlink port. When changing profiles we reset the mlx5e priv but we should still use the devlink port so move it to mlx5e resources. Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9887181dea5f..f1d0340e46a7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -650,6 +650,7 @@ struct mlx5e_resources { struct mlx5_core_mkey mkey; struct mlx5_sq_bfreg bfreg; } hw_objs; + struct devlink_port dl_port; }; enum mlx5_sw_icm_type { -- cgit v1.2.3 From 7a9fb35e8c3a67145fca262c304de65cb2f83abf Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 16 Sep 2020 10:11:33 +0300 Subject: net/mlx5e: Do not reload ethernet ports when changing eswitch mode When switching modes between legacy and switchdev and back, do not reload ethernet interfaces. just change the profile from nic profile to uplink rep profile in switchdev mode. Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f1d0340e46a7..23bb01d7c9b9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -651,6 +651,7 @@ struct mlx5e_resources { struct mlx5_sq_bfreg bfreg; } hw_objs; struct devlink_port dl_port; + struct net_device *uplink_netdev; }; enum mlx5_sw_icm_type { -- cgit v1.2.3 From 2973073a80b46daebc352c31d09d95d16cf6876e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Mar 2021 08:44:24 +0100 Subject: swiotlb: remove the alloc_size parameter to swiotlb_tbl_unmap_single Now that swiotlb remembers the allocation size there is no need to pass it back to swiotlb_tbl_unmap_single. Signed-off-by: Christoph Hellwig Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 5857a937c637..59f421d041ed 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -57,7 +57,6 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, extern void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, size_t mapping_size, - size_t alloc_size, enum dma_data_direction dir, unsigned long attrs); -- cgit v1.2.3 From 80808d273a3f075196d1a26463f65d4c9d2891c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Mar 2021 08:44:26 +0100 Subject: swiotlb: split swiotlb_tbl_sync_single Split swiotlb_tbl_sync_single into two separate funtions for the to device and to cpu synchronization. Signed-off-by: Christoph Hellwig Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 59f421d041ed..0696bdc8072e 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -42,14 +42,6 @@ extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); extern int swiotlb_late_init_with_default_size(size_t default_size); extern void __init swiotlb_update_mem_attributes(void); -/* - * Enumeration for sync targets - */ -enum dma_sync_target { - SYNC_FOR_CPU = 0, - SYNC_FOR_DEVICE = 1, -}; - phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, size_t mapping_size, size_t alloc_size, enum dma_data_direction dir, unsigned long attrs); @@ -60,11 +52,10 @@ extern void swiotlb_tbl_unmap_single(struct device *hwdev, enum dma_data_direction dir, unsigned long attrs); -extern void swiotlb_tbl_sync_single(struct device *hwdev, - phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, - enum dma_sync_target target); - +void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir); +void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir); dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs); -- cgit v1.2.3 From 3d9c3dcc58e968403f29767726407bc680e087b5 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 23 Feb 2021 14:44:54 -0800 Subject: scsi: storvsc: Enable scatterlist entry lengths > 4Kbytes storvsc currently sets .dma_boundary to limit scatterlist entries to 4 Kbytes, which is less efficient with huge pages that offer large chunks of contiguous physical memory. Improve the algorithm for creating the Hyper-V guest physical address PFN array so that scatterlist entries with lengths > 4Kbytes are handled. As a result, remove the .dma_boundary setting. The improved algorithm also adds support for scatterlist entries with offsets >= 4Kbytes, which is supported by many other SCSI low-level drivers. And it retains support for architectures where possibly PAGE_SIZE != HV_HYP_PAGE_SIZE (such as ARM64). Link: https://lore.kernel.org/r/1614120294-1930-1-git-send-email-mikelley@microsoft.com Reviewed-by: Vitaly Kuznetsov Signed-off-by: Michael Kelley Signed-off-by: Martin K. Petersen --- include/linux/hyperv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index f1d74dcf0353..7be8c5f7c5b2 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1726,6 +1726,7 @@ static inline unsigned long virt_to_hvpfn(void *addr) #define NR_HV_HYP_PAGES_IN_PAGE (PAGE_SIZE / HV_HYP_PAGE_SIZE) #define offset_in_hvpage(ptr) ((unsigned long)(ptr) & ~HV_HYP_PAGE_MASK) #define HVPFN_UP(x) (((x) + HV_HYP_PAGE_SIZE-1) >> HV_HYP_PAGE_SHIFT) +#define HVPFN_DOWN(x) ((x) >> HV_HYP_PAGE_SHIFT) #define page_to_hvpfn(page) (page_to_pfn(page) * NR_HV_HYP_PAGES_IN_PAGE) #endif /* _HYPERV_H */ -- cgit v1.2.3 From 6c172e73690e59ba74ecf12139d841b8651693f8 Mon Sep 17 00:00:00 2001 From: Steen Hegelund Date: Thu, 18 Feb 2021 17:14:49 +0100 Subject: phy: Add media type and speed serdes configuration interfaces Provide new phy configuration interfaces for media type and speed that allows e.g. PHYs used for ethernet to be configured with this information. Signed-off-by: Lars Povlsen Signed-off-by: Steen Hegelund Reviewed-by: Andrew Lunn Reviewed-by: Alexandre Belloni Acked-By: Kishon Vijay Abraham I Link: https://lore.kernel.org/r/20210218161451.3489955-3-steen.hegelund@microchip.com Signed-off-by: Vinod Koul --- include/linux/phy/phy.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index e435bdb0bab3..0ed434d02196 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -44,6 +44,12 @@ enum phy_mode { PHY_MODE_DP }; +enum phy_media { + PHY_MEDIA_DEFAULT, + PHY_MEDIA_SR, + PHY_MEDIA_DAC, +}; + /** * union phy_configure_opts - Opaque generic phy configuration * @@ -64,6 +70,8 @@ union phy_configure_opts { * @power_on: powering on the phy * @power_off: powering off the phy * @set_mode: set the mode of the phy + * @set_media: set the media type of the phy (optional) + * @set_speed: set the speed of the phy (optional) * @reset: resetting the phy * @calibrate: calibrate the phy * @release: ops to be performed while the consumer relinquishes the PHY @@ -75,6 +83,8 @@ struct phy_ops { int (*power_on)(struct phy *phy); int (*power_off)(struct phy *phy); int (*set_mode)(struct phy *phy, enum phy_mode mode, int submode); + int (*set_media)(struct phy *phy, enum phy_media media); + int (*set_speed)(struct phy *phy, int speed); /** * @configure: @@ -215,6 +225,8 @@ int phy_power_off(struct phy *phy); int phy_set_mode_ext(struct phy *phy, enum phy_mode mode, int submode); #define phy_set_mode(phy, mode) \ phy_set_mode_ext(phy, mode, 0) +int phy_set_media(struct phy *phy, enum phy_media media); +int phy_set_speed(struct phy *phy, int speed); int phy_configure(struct phy *phy, union phy_configure_opts *opts); int phy_validate(struct phy *phy, enum phy_mode mode, int submode, union phy_configure_opts *opts); @@ -344,6 +356,20 @@ static inline int phy_set_mode_ext(struct phy *phy, enum phy_mode mode, #define phy_set_mode(phy, mode) \ phy_set_mode_ext(phy, mode, 0) +static inline int phy_set_media(struct phy *phy, enum phy_media media) +{ + if (!phy) + return 0; + return -ENODEV; +} + +static inline int phy_set_speed(struct phy *phy, int speed) +{ + if (!phy) + return 0; + return -ENODEV; +} + static inline enum phy_mode phy_get_mode(struct phy *phy) { return PHY_MODE_INVALID; -- cgit v1.2.3 From bee645788e07eea63055d261d2884ea45c2ba857 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 16 Mar 2021 11:31:17 -0400 Subject: locking/ww_mutex: Fix acquire/release imbalance in ww_acquire_init()/ww_acquire_fini() In ww_acquire_init(), mutex_acquire() is gated by CONFIG_DEBUG_LOCK_ALLOC. The dep_map in the ww_acquire_ctx structure is also gated by the same config. However mutex_release() in ww_acquire_fini() is gated by CONFIG_DEBUG_MUTEXES. It is possible to set CONFIG_DEBUG_MUTEXES without setting CONFIG_DEBUG_LOCK_ALLOC though it is an unlikely configuration. That may cause a compilation error as dep_map isn't defined in this case. Fix this potential problem by enclosing mutex_release() inside CONFIG_DEBUG_LOCK_ALLOC. Signed-off-by: Waiman Long Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210316153119.13802-3-longman@redhat.com --- include/linux/ww_mutex.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 850424e5d030..6ecf2a0220db 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -173,9 +173,10 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) */ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) { -#ifdef CONFIG_DEBUG_MUTEXES +#ifdef CONFIG_DEBUG_LOCK_ALLOC mutex_release(&ctx->dep_map, _THIS_IP_); - +#endif +#ifdef CONFIG_DEBUG_MUTEXES DEBUG_LOCKS_WARN_ON(ctx->acquired); if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) /* -- cgit v1.2.3 From fa8b90070a80bb1a3042b4b25af4b3ee2c4c27e1 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 4 Mar 2021 13:35:40 +0100 Subject: quota: wire up quotactl_path Wire up the quotactl_path syscall added in the previous patch. Link: https://lore.kernel.org/r/20210304123541.30749-3-s.hauer@pengutronix.de Signed-off-by: Sascha Hauer Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- include/linux/syscalls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2839dc9a7c01..a672bbe28577 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -483,6 +483,8 @@ asmlinkage long sys_pipe2(int __user *fildes, int flags); /* fs/quota.c */ asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t id, void __user *addr); +asmlinkage long sys_quotactl_path(unsigned int cmd, const char __user *mountpoint, + qid_t id, void __user *addr); /* fs/readdir.c */ asmlinkage long sys_getdents64(unsigned int fd, -- cgit v1.2.3 From d2da74d1278a1b51ef18beafa9da770f0db1c617 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:42:04 +0100 Subject: tasklets: Replace barrier() with cpu_relax() in tasklet_unlock_wait() A barrier() in a tight loop which waits for something to happen on a remote CPU is a pointless exercise. Replace it with cpu_relax() which allows HT siblings to make progress. Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084241.249343366@linutronix.de --- include/linux/interrupt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 2b98156ec707..d689fd738152 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -672,7 +672,8 @@ static inline void tasklet_unlock(struct tasklet_struct *t) static inline void tasklet_unlock_wait(struct tasklet_struct *t) { - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } + while (test_bit(TASKLET_STATE_RUN, &t->state)) + cpu_relax(); } #else #define tasklet_trylock(t) 1 -- cgit v1.2.3 From 6951547a1399c8f56468ed93bea8f769b891aec3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:42:05 +0100 Subject: tasklets: Use static inlines for stub implementations Inlines exist for a reason. Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084241.407702697@linutronix.de --- include/linux/interrupt.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index d689fd738152..0a4ce25c1464 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -676,9 +676,9 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t) cpu_relax(); } #else -#define tasklet_trylock(t) 1 -#define tasklet_unlock_wait(t) do { } while (0) -#define tasklet_unlock(t) do { } while (0) +static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; } +static inline void tasklet_unlock(struct tasklet_struct *t) { } +static inline void tasklet_unlock_wait(struct tasklet_struct *t) { } #endif extern void __tasklet_schedule(struct tasklet_struct *t); -- cgit v1.2.3 From ca5f625118955fc544c3cb3dee7055d33ecadafb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:42:06 +0100 Subject: tasklets: Provide tasklet_disable_in_atomic() Replacing the spin wait loops in tasklet_unlock_wait() with wait_var_event() is not possible as a handful of tasklet_disable() invocations are happening in atomic context. All other invocations are in teardown paths which can sleep. Provide tasklet_disable_in_atomic() and tasklet_unlock_spin_wait() to convert the few atomic use cases over, which allows to change tasklet_disable() and tasklet_unlock_wait() in a later step. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084241.563164193@linutronix.de --- include/linux/interrupt.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 0a4ce25c1464..3c8a29176258 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -675,10 +675,21 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t) while (test_bit(TASKLET_STATE_RUN, &t->state)) cpu_relax(); } + +/* + * Do not use in new code. Waiting for tasklets from atomic contexts is + * error prone and should be avoided. + */ +static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) +{ + while (test_bit(TASKLET_STATE_RUN, &t->state)) + cpu_relax(); +} #else static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; } static inline void tasklet_unlock(struct tasklet_struct *t) { } static inline void tasklet_unlock_wait(struct tasklet_struct *t) { } +static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) { } #endif extern void __tasklet_schedule(struct tasklet_struct *t); @@ -703,6 +714,17 @@ static inline void tasklet_disable_nosync(struct tasklet_struct *t) smp_mb__after_atomic(); } +/* + * Do not use in new code. Disabling tasklets from atomic contexts is + * error prone and should be avoided. + */ +static inline void tasklet_disable_in_atomic(struct tasklet_struct *t) +{ + tasklet_disable_nosync(t); + tasklet_unlock_spin_wait(t); + smp_mb(); +} + static inline void tasklet_disable(struct tasklet_struct *t) { tasklet_disable_nosync(t); -- cgit v1.2.3 From b0cd02c2a9494dbf0a1cc7dc7a3b8b400c158d37 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:42:07 +0100 Subject: tasklets: Use spin wait in tasklet_disable() temporarily To ease the transition use spin waiting in tasklet_disable() until all usage sites from atomic context have been cleaned up. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084241.685352806@linutronix.de --- include/linux/interrupt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 3c8a29176258..b7f00121f124 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -728,7 +728,8 @@ static inline void tasklet_disable_in_atomic(struct tasklet_struct *t) static inline void tasklet_disable(struct tasklet_struct *t) { tasklet_disable_nosync(t); - tasklet_unlock_wait(t); + /* Spin wait until all atomic users are converted */ + tasklet_unlock_spin_wait(t); smp_mb(); } -- cgit v1.2.3 From da044747401fc16202e223c9da970ed4e84fd84d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Mar 2021 09:42:08 +0100 Subject: tasklets: Replace spin wait in tasklet_unlock_wait() tasklet_unlock_wait() spin waits for TASKLET_STATE_RUN to be cleared. This is wasting CPU cycles in a tight loop which is especially painful in a guest when the CPU running the tasklet is scheduled out. tasklet_unlock_wait() is invoked from tasklet_kill() which is used in teardown paths and not performance critical at all. Replace the spin wait with wait_var_event(). There are no users of tasklet_unlock_wait() which are invoked from atomic contexts. The usage in tasklet_disable() has been replaced temporarily with the spin waiting variant until the atomic users are fixed up and will be converted to the sleep wait variant later. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084241.783936921@linutronix.de --- include/linux/interrupt.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index b7f00121f124..b50be4fbbc98 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -664,17 +664,8 @@ static inline int tasklet_trylock(struct tasklet_struct *t) return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); } -static inline void tasklet_unlock(struct tasklet_struct *t) -{ - smp_mb__before_atomic(); - clear_bit(TASKLET_STATE_RUN, &(t)->state); -} - -static inline void tasklet_unlock_wait(struct tasklet_struct *t) -{ - while (test_bit(TASKLET_STATE_RUN, &t->state)) - cpu_relax(); -} +void tasklet_unlock(struct tasklet_struct *t); +void tasklet_unlock_wait(struct tasklet_struct *t); /* * Do not use in new code. Waiting for tasklets from atomic contexts is -- cgit v1.2.3 From eb2dafbba8b824ee77f166629babd470dd0b1c0a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:42:10 +0100 Subject: tasklets: Prevent tasklet_unlock_spin_wait() deadlock on RT tasklet_unlock_spin_wait() spin waits for the TASKLET_STATE_SCHED bit in the tasklet state to be cleared. This works on !RT nicely because the corresponding execution can only happen on a different CPU. On RT softirq processing is preemptible, therefore a task preempting the softirq processing thread can spin forever. Prevent this by invoking local_bh_disable()/enable() inside the loop. In case that the softirq processing thread was preempted by the current task, current will block on the local lock which yields the CPU to the preempted softirq processing thread. If the tasklet is processed on a different CPU then the local_bh_disable()/enable() pair is just a waste of processor cycles. Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084241.988908275@linutronix.de --- include/linux/interrupt.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index b50be4fbbc98..352db93c2eed 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -658,7 +658,7 @@ enum TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ }; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) static inline int tasklet_trylock(struct tasklet_struct *t) { return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); @@ -666,16 +666,8 @@ static inline int tasklet_trylock(struct tasklet_struct *t) void tasklet_unlock(struct tasklet_struct *t); void tasklet_unlock_wait(struct tasklet_struct *t); +void tasklet_unlock_spin_wait(struct tasklet_struct *t); -/* - * Do not use in new code. Waiting for tasklets from atomic contexts is - * error prone and should be avoided. - */ -static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) -{ - while (test_bit(TASKLET_STATE_RUN, &t->state)) - cpu_relax(); -} #else static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; } static inline void tasklet_unlock(struct tasklet_struct *t) { } -- cgit v1.2.3 From 6fd4e861250b5c89ad460a9f265caeb1bbbfc323 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:42:17 +0100 Subject: tasklets: Switch tasklet_disable() to the sleep wait variant -- NOT FOR IMMEDIATE MERGING -- Now that all users of tasklet_disable() are invoked from sleepable context, convert it to use tasklet_unlock_wait() which might sleep. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084242.726452321@linutronix.de --- include/linux/interrupt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 352db93c2eed..4777850a6dc7 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -711,8 +711,7 @@ static inline void tasklet_disable_in_atomic(struct tasklet_struct *t) static inline void tasklet_disable(struct tasklet_struct *t) { tasklet_disable_nosync(t); - /* Spin wait until all atomic users are converted */ - tasklet_unlock_spin_wait(t); + tasklet_unlock_wait(t); smp_mb(); } -- cgit v1.2.3 From 728b478d2d358480b333b42d0e10e0fecb20114c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:55:53 +0100 Subject: softirq: Add RT specific softirq accounting RT requires the softirq processing and local bottomhalf disabled regions to be preemptible. Using the normal preempt count based serialization is therefore not possible because this implicitely disables preemption. RT kernels use a per CPU local lock to serialize bottomhalfs. As local_bh_disable() can nest the lock can only be acquired on the outermost invocation of local_bh_disable() and released when the nest count becomes zero. Tasks which hold the local lock can be preempted so its required to keep track of the nest count per task. Add a RT only counter to task struct and adjust the relevant macros in preempt.h. Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Tested-by: Paul E. McKenney Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309085726.983627589@linutronix.de --- include/linux/hardirq.h | 1 + include/linux/preempt.h | 6 +++++- include/linux/sched.h | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 7c9d6a2d7e90..69bc86ea382c 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 69cc8b64aa3a..9881eac0698f 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -79,7 +79,11 @@ #define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) +#ifdef CONFIG_PREEMPT_RT +# define softirq_count() (current->softirq_disable_cnt & SOFTIRQ_MASK) +#else +# define softirq_count() (preempt_count() & SOFTIRQ_MASK) +#endif #define irq_count() (nmi_count() | hardirq_count() | softirq_count()) /* diff --git a/include/linux/sched.h b/include/linux/sched.h index ef00bb22164c..743a613c9cf3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1044,6 +1044,9 @@ struct task_struct { int softirq_context; int irq_config; #endif +#ifdef CONFIG_PREEMPT_RT + int softirq_disable_cnt; +#endif #ifdef CONFIG_LOCKDEP # define MAX_LOCK_DEPTH 48UL -- cgit v1.2.3 From 8b1c04acad082dec76f3f8f7e1fa13493d6cbb79 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:55:56 +0100 Subject: softirq: Make softirq control and processing RT aware Provide a local lock based serialization for soft interrupts on RT which allows the local_bh_disabled() sections and servicing soft interrupts to be preemptible. Provide the necessary inline helpers which allow to reuse the bulk of the softirq processing code. Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Tested-by: Paul E. McKenney Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309085727.426370483@linutronix.de --- include/linux/bottom_half.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h index a19519f4241d..e4dd613a070e 100644 --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h @@ -4,7 +4,7 @@ #include -#ifdef CONFIG_TRACE_IRQFLAGS +#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS) extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); #else static __always_inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) -- cgit v1.2.3 From 47c218dcae6587fb5bce30f1656b13e22391c8e3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:55:57 +0100 Subject: tick/sched: Prevent false positive softirq pending warnings on RT On RT a task which has soft interrupts disabled can block on a lock and schedule out to idle while soft interrupts are pending. This triggers the warning in the NOHZ idle code which complains about going idle with pending soft interrupts. But as the task is blocked soft interrupt processing is temporarily blocked as well which means that such a warning is a false positive. To prevent that check the per CPU state which indicates that a scheduled out task has soft interrupts disabled. Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Tested-by: Paul E. McKenney Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309085727.527563866@linutronix.de --- include/linux/bottom_half.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h index e4dd613a070e..eed86eb0a1de 100644 --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h @@ -32,4 +32,10 @@ static inline void local_bh_enable(void) __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); } +#ifdef CONFIG_PREEMPT_RT +extern bool local_bh_blocked(void); +#else +static inline bool local_bh_blocked(void) { return false; } +#endif + #endif /* _LINUX_BH_H */ -- cgit v1.2.3 From ba9e6cab49c1465c2c322dcb03d771d5cbecb692 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:55:58 +0100 Subject: rcu: Prevent false positive softirq warning on RT Soft interrupt disabled sections can legitimately be preempted or schedule out when blocking on a lock on RT enabled kernels so the RCU preempt check warning has to be disabled for RT kernels. Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Tested-by: Paul E. McKenney Reviewed-by: Paul E. McKenney Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309085727.626304079@linutronix.de --- include/linux/rcupdate.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index bd04f722714f..6d855ef091ba 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -334,7 +334,8 @@ static inline void rcu_preempt_sleep_check(void) { } #define rcu_sleep_check() \ do { \ rcu_preempt_sleep_check(); \ - RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \ "Illegal context switch in RCU-bh read-side critical section"); \ RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), \ "Illegal context switch in RCU-sched read-side critical section"); \ -- cgit v1.2.3 From 7888fe53b7066c284e172d98d98d1865d6a9e5a0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 16 Mar 2021 17:30:36 -0700 Subject: ethtool: Add common function for filling out strings Add a function to handle the common pattern of printing a string into the ethtool strings interface and incrementing the string pointer by the ETH_GSTRING_LEN. Most of the drivers end up doing this and several have implemented their own versions of this function so it would make sense to consolidate on one implementation. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/ethtool.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index ec4cd3921c67..3583f7fc075c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -571,4 +571,13 @@ struct ethtool_phy_ops { */ void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops); +/** + * ethtool_sprintf - Write formatted string to ethtool string data + * @data: Pointer to start of string to update + * @fmt: Format of string to write + * + * Write formatted string to data. Update data to point at start of + * next string. + */ +extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...); #endif /* _LINUX_ETHTOOL_H */ -- cgit v1.2.3 From 3093c3c7c136458af692d5c3d309a66c3c12d9f4 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Thu, 11 Mar 2021 15:04:09 +0100 Subject: rpmsg: Move RPMSG_ADDR_ANY in user API As the RPMSG_ADDR_ANY is a valid src or dst address that can be set by user applications, migrate its definition in user API. Reviewed-by: Bjorn Andersson Reviewed-by: Mathieu Poirier Signed-off-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20210311140413.31725-3-arnaud.pouliquen@foss.st.com Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index a5db828b2420..d97dcd049f18 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -18,8 +18,7 @@ #include #include #include - -#define RPMSG_ADDR_ANY 0xFFFFFFFF +#include struct rpmsg_device; struct rpmsg_endpoint; -- cgit v1.2.3 From 6417f03132a6952cd17ddd8eaddbac92b61b17e0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 17 Mar 2021 12:45:47 +0200 Subject: module: remove never implemented MODULE_SUPPORTED_DEVICE MODULE_SUPPORTED_DEVICE was added in pre-git era and never was implemented. We can safely remove it, because the kernel has grown to have many more reliable mechanisms to determine if device is supported or not. Signed-off-by: Leon Romanovsky Signed-off-by: Linus Torvalds --- include/linux/module.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 59f094fa6f74..da4b6fbe8ebe 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -30,9 +30,6 @@ #include #include -/* Not Yet Implemented */ -#define MODULE_SUPPORTED_DEVICE(name) - #define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN struct modversion_info { -- cgit v1.2.3 From 546aa0e4ea6ed81b6c51baeebc4364542fa3f3a7 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 17 Mar 2021 15:06:54 -0400 Subject: usb-storage: Add quirk to defeat Kindle's automatic unload Matthias reports that the Amazon Kindle automatically removes its emulated media if it doesn't receive another SCSI command within about one second after a SYNCHRONIZE CACHE. It does so even when the host has sent a PREVENT MEDIUM REMOVAL command. The reason for this behavior isn't clear, although it's not hard to make some guesses. At any rate, the results can be unexpected for anyone who tries to access the Kindle in an unusual fashion, and in theory they can lead to data loss (for example, if one file is closed and synchronized while other files are still in the middle of being written). To avoid such problems, this patch creates a new usb-storage quirks flag telling the driver always to issue a REQUEST SENSE following a SYNCHRONIZE CACHE command, and adds an unusual_devs entry for the Kindle with the flag set. This is sufficient to prevent the Kindle from doing its automatic unload, without interfering with proper operation. Another possible way to deal with this would be to increase the frequency of TEST UNIT READY polling that the kernel normally carries out for removable-media storage devices. However that would increase the overall load on the system and it is not as reliable, because the user can override the polling interval. Changing the driver's behavior is safer and has minimal overhead. CC: Reported-and-tested-by: Matthias Schwarzott Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20210317190654.GA497856@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 6b03fdd69d27..712363c7a2e8 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -86,6 +86,8 @@ /* lies about caching, so always sync */ \ US_FLAG(NO_SAME, 0x40000000) \ /* Cannot handle WRITE_SAME */ \ + US_FLAG(SENSE_AFTER_SYNC, 0x80000000) \ + /* Do REQUEST_SENSE after SYNCHRONIZE_CACHE */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3 From cb038357937ee4f589aab2469ec3896dce90f317 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 16 Mar 2021 15:36:47 -0700 Subject: net: fix race between napi kthread mode and busy poll Currently, napi_thread_wait() checks for NAPI_STATE_SCHED bit to determine if the kthread owns this napi and could call napi->poll() on it. However, if socket busy poll is enabled, it is possible that the busy poll thread grabs this SCHED bit (after the previous napi->poll() invokes napi_complete_done() and clears SCHED bit) and tries to poll on the same napi. napi_disable() could grab the SCHED bit as well. This patch tries to fix this race by adding a new bit NAPI_STATE_SCHED_THREADED in napi->state. This bit gets set in ____napi_schedule() if the threaded mode is enabled, and gets cleared in napi_complete_done(), and we only poll the napi in kthread if this bit is set. This helps distinguish the ownership of the napi between kthread and other scenarios and fixes the race issue. Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support") Reported-by: Martin Zaharinov Suggested-by: Jakub Kicinski Signed-off-by: Wei Wang Cc: Alexander Duyck Cc: Eric Dumazet Cc: Paolo Abeni Cc: Hannes Frederic Sowa Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5b67ea89d5f2..87a5d186faff 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -360,6 +360,7 @@ enum { NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */ NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ + NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */ }; enum { @@ -372,6 +373,7 @@ enum { NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL), NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), + NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED), }; enum gro_result { -- cgit v1.2.3 From e21aa341785c679dd409c8cb71f864c00fe6c463 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 16 Mar 2021 14:00:07 -0700 Subject: bpf: Fix fexit trampoline. The fexit/fmod_ret programs can be attached to kernel functions that can sleep. The synchronize_rcu_tasks() will not wait for such tasks to complete. In such case the trampoline image will be freed and when the task wakes up the return IP will point to freed memory causing the crash. Solve this by adding percpu_ref_get/put for the duration of trampoline and separate trampoline vs its image life times. The "half page" optimization has to be removed, since first_half->second_half->first_half transition cannot be guaranteed to complete in deterministic time. Every trampoline update becomes a new image. The image with fmod_ret or fexit progs will be freed via percpu_ref_kill and call_rcu_tasks. Together they will wait for the original function and trampoline asm to complete. The trampoline is patched from nop to jmp to skip fexit progs. They are freed independently from the trampoline. The image with fentry progs only will be freed via call_rcu_tasks_trace+call_rcu_tasks which will wait for both sleepable and non-sleepable progs to complete. Fixes: fec56f5890d9 ("bpf: Introduce BPF trampoline") Reported-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Paul E. McKenney # for RCU Link: https://lore.kernel.org/bpf/20210316210007.38949-1-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d7e0f479a5b0..3625f019767d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -21,6 +21,7 @@ #include #include #include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -556,7 +557,8 @@ struct bpf_tramp_progs { * fentry = a set of program to run before calling original function * fexit = a set of program to run after original function */ -int arch_prepare_bpf_trampoline(void *image, void *image_end, +struct bpf_tramp_image; +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_progs *tprogs, void *orig_call); @@ -565,6 +567,8 @@ u64 notrace __bpf_prog_enter(struct bpf_prog *prog); void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog); void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start); +void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); +void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); struct bpf_ksym { unsigned long start; @@ -583,6 +587,18 @@ enum bpf_tramp_prog_type { BPF_TRAMP_REPLACE, /* more than MAX */ }; +struct bpf_tramp_image { + void *image; + struct bpf_ksym ksym; + struct percpu_ref pcref; + void *ip_after_call; + void *ip_epilogue; + union { + struct rcu_head rcu; + struct work_struct work; + }; +}; + struct bpf_trampoline { /* hlist for trampoline_table */ struct hlist_node hlist; @@ -605,9 +621,8 @@ struct bpf_trampoline { /* Number of attached programs. A counter per kind. */ int progs_cnt[BPF_TRAMP_MAX]; /* Executable image of trampoline */ - void *image; + struct bpf_tramp_image *cur_image; u64 selector; - struct bpf_ksym ksym; }; struct bpf_attach_target_info { @@ -691,6 +706,8 @@ void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); +int bpf_jit_charge_modmem(u32 pages); +void bpf_jit_uncharge_modmem(u32 pages); #else static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) @@ -787,7 +804,6 @@ struct bpf_prog_aux { bool func_proto_unreliable; bool sleepable; bool tail_call_reachable; - enum bpf_tramp_prog_type trampoline_prog_type; struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; -- cgit v1.2.3 From 97a19caf1b1f6a9d4f620a9d51405a1973bd4641 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 17 Mar 2021 10:41:32 -0700 Subject: bpf: net: Emit anonymous enum with BPF_TCP_CLOSE value explicitly The selftest failed to compile with clang-built bpf-next. Adding LLVM=1 to your vmlinux and selftest build will use clang. The error message is: progs/test_sk_storage_tracing.c:38:18: error: use of undeclared identifier 'BPF_TCP_CLOSE' if (newstate == BPF_TCP_CLOSE) ^ 1 error generated. make: *** [Makefile:423: /bpf-next/tools/testing/selftests/bpf/test_sk_storage_tracing.o] Error 1 The reason for the failure is that BPF_TCP_CLOSE, a value of an anonymous enum defined in uapi bpf.h, is not defined in vmlinux.h. gcc does not have this problem. Since vmlinux.h is derived from BTF which is derived from vmlinux DWARF, that means gcc-produced vmlinux DWARF has BPF_TCP_CLOSE while llvm-produced vmlinux DWARF does not have. BPF_TCP_CLOSE is referenced in net/ipv4/tcp.c as BUILD_BUG_ON((int)BPF_TCP_CLOSE != (int)TCP_CLOSE); The following test mimics the above BUILD_BUG_ON, preprocessed with clang compiler, and shows gcc DWARF contains BPF_TCP_CLOSE while llvm DWARF does not. $ cat t.c enum { BPF_TCP_ESTABLISHED = 1, BPF_TCP_CLOSE = 7, }; enum { TCP_ESTABLISHED = 1, TCP_CLOSE = 7, }; int test() { do { extern void __compiletime_assert_767(void) ; if ((int)BPF_TCP_CLOSE != (int)TCP_CLOSE) __compiletime_assert_767(); } while (0); return 0; } $ clang t.c -O2 -c -g && llvm-dwarfdump t.o | grep BPF_TCP_CLOSE $ gcc t.c -O2 -c -g && llvm-dwarfdump t.o | grep BPF_TCP_CLOSE DW_AT_name ("BPF_TCP_CLOSE") Further checking clang code find clang actually tried to evaluate condition at compile time. If it is definitely true/false, it will perform optimization and the whole if condition will be removed before generating IR/debuginfo. This patch explicited add an expression after the above mentioned BUILD_BUG_ON in net/ipv4/tcp.c like (void)BPF_TCP_ESTABLISHED to enable generation of debuginfo for the anonymous enum which also includes BPF_TCP_CLOSE. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210317174132.589276-1-yhs@fb.com --- include/linux/btf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index 7fabf1428093..9c1b52738bbe 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -9,6 +9,7 @@ #include #define BTF_TYPE_EMIT(type) ((void)(type *)0) +#define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val) struct btf; struct btf_member; -- cgit v1.2.3 From 3542dcb15cef66c0b9e6c3b33168eb657e0d9520 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 5 Mar 2021 16:32:34 +0000 Subject: iommu/dma: Resurrect the "forcedac" option In converting intel-iommu over to the common IOMMU DMA ops, it quietly lost the functionality of its "forcedac" option. Since this is a handy thing both for testing and for performance optimisation on certain platforms, reimplement it under the common IOMMU parameter namespace. For the sake of fixing the inadvertent breakage of the Intel-specific parameter, remove the dmar_forcedac remnants and hook it up as an alias while documenting the transition to the new common parameter. Fixes: c588072bba6b ("iommu/vt-d: Convert intel iommu driver to the iommu ops") Signed-off-by: Robin Murphy Acked-by: Lu Baolu Reviewed-by: John Garry Link: https://lore.kernel.org/r/7eece8e0ea7bfbe2cd0e30789e0d46df573af9b0.1614961776.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- include/linux/dma-iommu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 706b68d1359b..13d1f4c14d7b 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -40,6 +40,8 @@ void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, struct iommu_domain *domain); +extern bool iommu_dma_forcedac; + #else /* CONFIG_IOMMU_DMA */ struct iommu_domain; -- cgit v1.2.3 From 6ca69e5841f01ccbfa45e56577e1b33e14e53504 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 18 Mar 2021 08:53:40 +0800 Subject: iommu/vt-d: Report more information about invalidation errors When the invalidation queue errors are encountered, dump the information logged by the VT-d hardware together with the pending queue invalidation descriptors. Signed-off-by: Ashok Raj Tested-by: Guo Kaijie Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210318005340.187311-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 1bc46b88711a..1732298ce888 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -80,6 +81,7 @@ #define DMAR_IQ_SHIFT 4 /* Invalidation queue head/tail shift */ #define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ #define DMAR_ICS_REG 0x9c /* Invalidation complete status register */ +#define DMAR_IQER_REG 0xb0 /* Invalidation queue error record register */ #define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ #define DMAR_PQH_REG 0xc0 /* Page request queue head register */ #define DMAR_PQT_REG 0xc8 /* Page request queue tail register */ @@ -126,6 +128,10 @@ #define DMAR_VCMD_REG 0xe10 /* Virtual command register */ #define DMAR_VCRSP_REG 0xe20 /* Virtual command response register */ +#define DMAR_IQER_REG_IQEI(reg) FIELD_GET(GENMASK_ULL(3, 0), reg) +#define DMAR_IQER_REG_ITESID(reg) FIELD_GET(GENMASK_ULL(47, 32), reg) +#define DMAR_IQER_REG_ICESID(reg) FIELD_GET(GENMASK_ULL(63, 48), reg) + #define OFFSET_STRIDE (9) #define dmar_readq(a) readq(a) -- cgit v1.2.3 From 4196d18903f94090f0a223d65de25e3bf50a3d13 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 12 Mar 2021 09:24:39 -0700 Subject: remoteproc: Add new RPROC_ATTACHED state Add a new RPROC_ATTACHED state to take into account scenarios where the remoteproc core needs to attach to a remote processor that is booted by another entity. Signed-off-by: Mathieu Poirier Reviewed-by: Peng Fan Reviewed-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20210312162453.1234145-4-mathieu.poirier@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 1b7d56c7a453..9193a8fb5b68 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -407,6 +407,8 @@ struct rproc_ops { * @RPROC_RUNNING: device is up and running * @RPROC_CRASHED: device has crashed; need to start recovery * @RPROC_DELETED: device is deleted + * @RPROC_ATTACHED: device has been booted by another entity and the core + * has attached to it * @RPROC_DETACHED: device has been booted by another entity and waiting * for the core to attach to it * @RPROC_LAST: just keep this one at the end @@ -423,8 +425,9 @@ enum rproc_state { RPROC_RUNNING = 2, RPROC_CRASHED = 3, RPROC_DELETED = 4, - RPROC_DETACHED = 5, - RPROC_LAST = 6, + RPROC_ATTACHED = 5, + RPROC_DETACHED = 6, + RPROC_LAST = 7, }; /** -- cgit v1.2.3 From 76f4c87587e2ff41e9b9867ffde2137f27ba39b9 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 12 Mar 2021 09:24:40 -0700 Subject: remoteproc: Properly represent the attached state There is a need to know when a remote processor has been attached to rather than booted by the remoteproc core. In order to avoid manipulating two variables, i.e rproc::autonomous and rproc::state, get rid of the former and simply use the newly introduced RPROC_ATTACHED state. Signed-off-by: Mathieu Poirier Reviewed-by: Peng Fan Reviewed-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20210312162453.1234145-5-mathieu.poirier@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 9193a8fb5b68..9e42e90cd9da 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -514,7 +514,6 @@ struct rproc_dump_segment { * @table_sz: size of @cached_table * @has_iommu: flag to indicate if remote processor is behind an MMU * @auto_boot: flag to indicate if remote processor should be auto-started - * @autonomous: true if an external entity has booted the remote processor * @dump_segments: list of segments in the firmware * @nb_vdev: number of vdev currently handled by rproc * @char_dev: character device of the rproc @@ -551,7 +550,6 @@ struct rproc { size_t table_sz; bool has_iommu; bool auto_boot; - bool autonomous; struct list_head dump_segments; int nb_vdev; u8 elf_class; -- cgit v1.2.3 From 1a631382be1d22ddab0582dae3498b3d28e2e44a Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 12 Mar 2021 09:24:41 -0700 Subject: remoteproc: Add new get_loaded_rsc_table() to rproc_ops Add a new get_loaded_rsc_table() operation in order to support scenarios where the remoteproc core has booted a remote processor and detaches from it. When re-attaching to the remote processor, the core needs to know where the resource table has been placed in memory. Signed-off-by: Mathieu Poirier Reviewed-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20210312162453.1234145-6-mathieu.poirier@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 9e42e90cd9da..eee338177a3d 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -370,7 +370,9 @@ enum rsc_handling_status { * RSC_HANDLED if resource was handled, RSC_IGNORED if not handled and a * negative value on error * @load_rsc_table: load resource table from firmware image - * @find_loaded_rsc_table: find the loaded resouce table + * @find_loaded_rsc_table: find the loaded resource table from firmware image + * @get_loaded_rsc_table: get resource table installed in memory + * by external entity * @load: load firmware to memory, where the remote processor * expects to find it * @sanity_check: sanity check the fw image @@ -392,6 +394,8 @@ struct rproc_ops { int offset, int avail); struct resource_table *(*find_loaded_rsc_table)( struct rproc *rproc, const struct firmware *fw); + struct resource_table *(*get_loaded_rsc_table)( + struct rproc *rproc, size_t *size); int (*load)(struct rproc *rproc, const struct firmware *fw); int (*sanity_check)(struct rproc *rproc, const struct firmware *fw); u64 (*get_boot_addr)(struct rproc *rproc, const struct firmware *fw); -- cgit v1.2.3 From 7f3bd0c019cb813448d867c17c9b9dad205a13eb Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 12 Mar 2021 09:24:44 -0700 Subject: remoteproc: Add new detach() remoteproc operation Add an new detach() operation in order to support scenarios where the remoteproc core is going away but the remote processor is kept operating. This could be the case when the system is rebooted or when the platform driver is removed. Signed-off-by: Mathieu Poirier Reviewed-by: Peng Fan Reviewed-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20210312162453.1234145-9-mathieu.poirier@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index eee338177a3d..2f1f0fbc3994 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -363,6 +363,7 @@ enum rsc_handling_status { * @start: power on the device and boot it * @stop: power off the device * @attach: attach to a device that his already powered up + * @detach: detach from a device, leaving it powered up * @kick: kick a virtqueue (virtqueue id given as a parameter) * @da_to_va: optional platform hook to perform address translations * @parse_fw: parse firmware to extract information (e.g. resource table) @@ -387,6 +388,7 @@ struct rproc_ops { int (*start)(struct rproc *rproc); int (*stop)(struct rproc *rproc); int (*attach)(struct rproc *rproc); + int (*detach)(struct rproc *rproc); void (*kick)(struct rproc *rproc, int vqid); void * (*da_to_va)(struct rproc *rproc, u64 da, size_t len, bool *is_iomem); int (*parse_fw)(struct rproc *rproc, const struct firmware *fw); -- cgit v1.2.3 From d3962a397885518a85d2dc6b0c51e6594f71c30f Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 12 Mar 2021 09:24:46 -0700 Subject: remoteproc: Introduce function rproc_detach() Introduce function rproc_detach() to enable the remoteproc core to release the resources associated with a remote processor without stopping its operation. Signed-off-by: Mathieu Poirier Reviewed-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20210312162453.1234145-11-mathieu.poirier@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 2f1f0fbc3994..fc2cca600423 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -664,6 +664,7 @@ rproc_of_resm_mem_entry_init(struct device *dev, u32 of_resm_idx, size_t len, int rproc_boot(struct rproc *rproc); void rproc_shutdown(struct rproc *rproc); +int rproc_detach(struct rproc *rproc); int rproc_set_firmware(struct rproc *rproc, const char *fw_name); void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type); void rproc_coredump_using_sections(struct rproc *rproc); -- cgit v1.2.3 From 9dc9507f1880fb6225e3e058cb5219b152cbf198 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 12 Mar 2021 09:24:47 -0700 Subject: remoteproc: Properly deal with the resource table when detaching If it is possible to detach the remote processor, keep an untouched copy of the resource table. That way we can start from the same resource table without having to worry about original values or what elements the startup code has changed when re-attaching to the remote processor. Signed-off-by: Mathieu Poirier Reviewed-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20210312162453.1234145-12-mathieu.poirier@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index fc2cca600423..8b795b544f75 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -516,6 +516,8 @@ struct rproc_dump_segment { * @recovery_disabled: flag that state if recovery was disabled * @max_notifyid: largest allocated notify id. * @table_ptr: pointer to the resource table in effect + * @clean_table: copy of the resource table without modifications. Used + * when a remote processor is attached or detached from the core * @cached_table: copy of the resource table * @table_sz: size of @cached_table * @has_iommu: flag to indicate if remote processor is behind an MMU @@ -552,6 +554,7 @@ struct rproc { bool recovery_disabled; int max_notifyid; struct resource_table *table_ptr; + struct resource_table *clean_table; struct resource_table *cached_table; size_t table_sz; bool has_iommu; -- cgit v1.2.3 From 48d71395896d54eec989179dd265e569fcecb15a Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Sun, 14 Mar 2021 18:44:46 +0300 Subject: reset: Add reset_control_bulk API Follow the clock and regulator subsystems' lead and add a bulk API for reset controls. Signed-off-by: Philipp Zabel Tested-by: Dmitry Osipenko Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20210314154459.15375-5-digetx@gmail.com Signed-off-by: Mark Brown --- include/linux/reset.h | 315 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 315 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reset.h b/include/linux/reset.h index b9109efa2a5c..46e6372cb431 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -10,6 +10,21 @@ struct device; struct device_node; struct reset_control; +/** + * struct reset_control_bulk_data - Data used for bulk reset control operations. + * + * @id: reset control consumer ID + * @rstc: struct reset_control * to store the associated reset control + * + * The reset APIs provide a series of reset_control_bulk_*() API calls as + * a convenience to consumers which require multiple reset controls. + * This structure is used to manage data for these calls. + */ +struct reset_control_bulk_data { + const char *id; + struct reset_control *rstc; +}; + #ifdef CONFIG_RESET_CONTROLLER int reset_control_reset(struct reset_control *rstc); @@ -20,6 +35,12 @@ int reset_control_status(struct reset_control *rstc); int reset_control_acquire(struct reset_control *rstc); void reset_control_release(struct reset_control *rstc); +int reset_control_bulk_reset(int num_rstcs, struct reset_control_bulk_data *rstcs); +int reset_control_bulk_assert(int num_rstcs, struct reset_control_bulk_data *rstcs); +int reset_control_bulk_deassert(int num_rstcs, struct reset_control_bulk_data *rstcs); +int reset_control_bulk_acquire(int num_rstcs, struct reset_control_bulk_data *rstcs); +void reset_control_bulk_release(int num_rstcs, struct reset_control_bulk_data *rstcs); + struct reset_control *__of_reset_control_get(struct device_node *node, const char *id, int index, bool shared, bool optional, bool acquired); @@ -27,10 +48,18 @@ struct reset_control *__reset_control_get(struct device *dev, const char *id, int index, bool shared, bool optional, bool acquired); void reset_control_put(struct reset_control *rstc); +int __reset_control_bulk_get(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs, + bool shared, bool optional, bool acquired); +void reset_control_bulk_put(int num_rstcs, struct reset_control_bulk_data *rstcs); + int __device_reset(struct device *dev, bool optional); struct reset_control *__devm_reset_control_get(struct device *dev, const char *id, int index, bool shared, bool optional, bool acquired); +int __devm_reset_control_bulk_get(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs, + bool shared, bool optional, bool acquired); struct reset_control *devm_reset_control_array_get(struct device *dev, bool shared, bool optional); @@ -96,6 +125,48 @@ static inline struct reset_control *__reset_control_get( return optional ? NULL : ERR_PTR(-ENOTSUPP); } +static inline int +reset_control_bulk_reset(int num_rstcs, struct reset_control_bulk_data *rstcs) +{ + return 0; +} + +static inline int +reset_control_bulk_assert(int num_rstcs, struct reset_control_bulk_data *rstcs) +{ + return 0; +} + +static inline int +reset_control_bulk_deassert(int num_rstcs, struct reset_control_bulk_data *rstcs) +{ + return 0; +} + +static inline int +reset_control_bulk_acquire(int num_rstcs, struct reset_control_bulk_data *rstcs) +{ + return 0; +} + +static inline void +reset_control_bulk_release(int num_rstcs, struct reset_control_bulk_data *rstcs) +{ +} + +static inline int +__reset_control_bulk_get(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs, + bool shared, bool optional, bool acquired) +{ + return optional ? 0 : -EOPNOTSUPP; +} + +static inline void +reset_control_bulk_put(int num_rstcs, struct reset_control_bulk_data *rstcs) +{ +} + static inline struct reset_control *__devm_reset_control_get( struct device *dev, const char *id, int index, bool shared, bool optional, @@ -104,6 +175,14 @@ static inline struct reset_control *__devm_reset_control_get( return optional ? NULL : ERR_PTR(-ENOTSUPP); } +static inline int +__devm_reset_control_bulk_get(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs, + bool shared, bool optional, bool acquired) +{ + return optional ? 0 : -EOPNOTSUPP; +} + static inline struct reset_control * devm_reset_control_array_get(struct device *dev, bool shared, bool optional) { @@ -155,6 +234,23 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id) return __reset_control_get(dev, id, 0, false, false, true); } +/** + * reset_control_bulk_get_exclusive - Lookup and obtain exclusive references to + * multiple reset controllers. + * @dev: device to be reset by the controller + * @num_rstcs: number of entries in rstcs array + * @rstcs: array of struct reset_control_bulk_data with reset line names set + * + * Fills the rstcs array with pointers to exclusive reset controls and + * returns 0, or an IS_ERR() condition containing errno. + */ +static inline int __must_check +reset_control_bulk_get_exclusive(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) +{ + return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, true); +} + /** * reset_control_get_exclusive_released - Lookup and obtain a temoprarily * exclusive reference to a reset @@ -176,6 +272,48 @@ __must_check reset_control_get_exclusive_released(struct device *dev, return __reset_control_get(dev, id, 0, false, false, false); } +/** + * reset_control_bulk_get_exclusive_released - Lookup and obtain temporarily + * exclusive references to multiple reset + * controllers. + * @dev: device to be reset by the controller + * @num_rstcs: number of entries in rstcs array + * @rstcs: array of struct reset_control_bulk_data with reset line names set + * + * Fills the rstcs array with pointers to exclusive reset controls and + * returns 0, or an IS_ERR() condition containing errno. + * reset-controls returned by this function must be acquired via + * reset_control_bulk_acquire() before they can be used and should be released + * via reset_control_bulk_release() afterwards. + */ +static inline int __must_check +reset_control_bulk_get_exclusive_released(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) +{ + return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, false); +} + +/** + * reset_control_bulk_get_optional_exclusive_released - Lookup and obtain optional + * temporarily exclusive references to multiple + * reset controllers. + * @dev: device to be reset by the controller + * @num_rstcs: number of entries in rstcs array + * @rstcs: array of struct reset_control_bulk_data with reset line names set + * + * Optional variant of reset_control_bulk_get_exclusive_released(). If the + * requested reset is not specified in the device tree, this function returns 0 + * instead of an error and missing rtsc is set to NULL. + * + * See reset_control_bulk_get_exclusive_released() for more information. + */ +static inline int __must_check +reset_control_bulk_get_optional_exclusive_released(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) +{ + return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, false); +} + /** * reset_control_get_shared - Lookup and obtain a shared reference to a * reset controller. @@ -204,6 +342,23 @@ static inline struct reset_control *reset_control_get_shared( return __reset_control_get(dev, id, 0, true, false, false); } +/** + * reset_control_bulk_get_shared - Lookup and obtain shared references to + * multiple reset controllers. + * @dev: device to be reset by the controller + * @num_rstcs: number of entries in rstcs array + * @rstcs: array of struct reset_control_bulk_data with reset line names set + * + * Fills the rstcs array with pointers to shared reset controls and + * returns 0, or an IS_ERR() condition containing errno. + */ +static inline int __must_check +reset_control_bulk_get_shared(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) +{ + return __reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, false); +} + /** * reset_control_get_optional_exclusive - optional reset_control_get_exclusive() * @dev: device to be reset by the controller @@ -221,6 +376,26 @@ static inline struct reset_control *reset_control_get_optional_exclusive( return __reset_control_get(dev, id, 0, false, true, true); } +/** + * reset_control_bulk_get_optional_exclusive - optional + * reset_control_bulk_get_exclusive() + * @dev: device to be reset by the controller + * @num_rstcs: number of entries in rstcs array + * @rstcs: array of struct reset_control_bulk_data with reset line names set + * + * Optional variant of reset_control_bulk_get_exclusive(). If any of the + * requested resets are not specified in the device tree, this function sets + * them to NULL instead of returning an error. + * + * See reset_control_bulk_get_exclusive() for more information. + */ +static inline int __must_check +reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) +{ + return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true); +} + /** * reset_control_get_optional_shared - optional reset_control_get_shared() * @dev: device to be reset by the controller @@ -238,6 +413,26 @@ static inline struct reset_control *reset_control_get_optional_shared( return __reset_control_get(dev, id, 0, true, true, false); } +/** + * reset_control_bulk_get_optional_shared - optional + * reset_control_bulk_get_shared() + * @dev: device to be reset by the controller + * @num_rstcs: number of entries in rstcs array + * @rstcs: array of struct reset_control_bulk_data with reset line names set + * + * Optional variant of reset_control_bulk_get_shared(). If the requested resets + * are not specified in the device tree, this function sets them to NULL + * instead of returning an error. + * + * See reset_control_bulk_get_shared() for more information. + */ +static inline int __must_check +reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) +{ + return __reset_control_bulk_get(dev, num_rstcs, rstcs, true, true, false); +} + /** * of_reset_control_get_exclusive - Lookup and obtain an exclusive reference * to a reset controller. @@ -343,6 +538,26 @@ __must_check devm_reset_control_get_exclusive(struct device *dev, return __devm_reset_control_get(dev, id, 0, false, false, true); } +/** + * devm_reset_control_bulk_get_exclusive - resource managed + * reset_control_bulk_get_exclusive() + * @dev: device to be reset by the controller + * @num_rstcs: number of entries in rstcs array + * @rstcs: array of struct reset_control_bulk_data with reset line names set + * + * Managed reset_control_bulk_get_exclusive(). For reset controllers returned + * from this function, reset_control_put() is called automatically on driver + * detach. + * + * See reset_control_bulk_get_exclusive() for more information. + */ +static inline int __must_check +devm_reset_control_bulk_get_exclusive(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) +{ + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, true); +} + /** * devm_reset_control_get_exclusive_released - resource managed * reset_control_get_exclusive_released() @@ -362,6 +577,26 @@ __must_check devm_reset_control_get_exclusive_released(struct device *dev, return __devm_reset_control_get(dev, id, 0, false, false, false); } +/** + * devm_reset_control_bulk_get_exclusive_released - resource managed + * reset_control_bulk_get_exclusive_released() + * @dev: device to be reset by the controller + * @num_rstcs: number of entries in rstcs array + * @rstcs: array of struct reset_control_bulk_data with reset line names set + * + * Managed reset_control_bulk_get_exclusive_released(). For reset controllers + * returned from this function, reset_control_put() is called automatically on + * driver detach. + * + * See reset_control_bulk_get_exclusive_released() for more information. + */ +static inline int __must_check +devm_reset_control_bulk_get_exclusive_released(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) +{ + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, false); +} + /** * devm_reset_control_get_optional_exclusive_released - resource managed * reset_control_get_optional_exclusive_released() @@ -381,6 +616,26 @@ __must_check devm_reset_control_get_optional_exclusive_released(struct device *d return __devm_reset_control_get(dev, id, 0, false, true, false); } +/** + * devm_reset_control_bulk_get_optional_exclusive_released - resource managed + * reset_control_bulk_optional_get_exclusive_released() + * @dev: device to be reset by the controller + * @num_rstcs: number of entries in rstcs array + * @rstcs: array of struct reset_control_bulk_data with reset line names set + * + * Managed reset_control_bulk_optional_get_exclusive_released(). For reset + * controllers returned from this function, reset_control_put() is called + * automatically on driver detach. + * + * See reset_control_bulk_optional_get_exclusive_released() for more information. + */ +static inline int __must_check +devm_reset_control_bulk_get_optional_exclusive_released(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) +{ + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, false); +} + /** * devm_reset_control_get_shared - resource managed reset_control_get_shared() * @dev: device to be reset by the controller @@ -396,6 +651,26 @@ static inline struct reset_control *devm_reset_control_get_shared( return __devm_reset_control_get(dev, id, 0, true, false, false); } +/** + * devm_reset_control_bulk_get_shared - resource managed + * reset_control_bulk_get_shared() + * @dev: device to be reset by the controller + * @num_rstcs: number of entries in rstcs array + * @rstcs: array of struct reset_control_bulk_data with reset line names set + * + * Managed reset_control_bulk_get_shared(). For reset controllers returned + * from this function, reset_control_put() is called automatically on driver + * detach. + * + * See reset_control_bulk_get_shared() for more information. + */ +static inline int __must_check +devm_reset_control_bulk_get_shared(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) +{ + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, false); +} + /** * devm_reset_control_get_optional_exclusive - resource managed * reset_control_get_optional_exclusive() @@ -414,6 +689,26 @@ static inline struct reset_control *devm_reset_control_get_optional_exclusive( return __devm_reset_control_get(dev, id, 0, false, true, true); } +/** + * devm_reset_control_bulk_get_optional_exclusive - resource managed + * reset_control_bulk_get_optional_exclusive() + * @dev: device to be reset by the controller + * @num_rstcs: number of entries in rstcs array + * @rstcs: array of struct reset_control_bulk_data with reset line names set + * + * Managed reset_control_bulk_get_optional_exclusive(). For reset controllers + * returned from this function, reset_control_put() is called automatically on + * driver detach. + * + * See reset_control_bulk_get_optional_exclusive() for more information. + */ +static inline int __must_check +devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) +{ + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, true); +} + /** * devm_reset_control_get_optional_shared - resource managed * reset_control_get_optional_shared() @@ -432,6 +727,26 @@ static inline struct reset_control *devm_reset_control_get_optional_shared( return __devm_reset_control_get(dev, id, 0, true, true, false); } +/** + * devm_reset_control_bulk_get_optional_shared - resource managed + * reset_control_bulk_get_optional_shared() + * @dev: device to be reset by the controller + * @num_rstcs: number of entries in rstcs array + * @rstcs: array of struct reset_control_bulk_data with reset line names set + * + * Managed reset_control_bulk_get_optional_shared(). For reset controllers + * returned from this function, reset_control_put() is called automatically on + * driver detach. + * + * See reset_control_bulk_get_optional_shared() for more information. + */ +static inline int __must_check +devm_reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) +{ + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, true, false); +} + /** * devm_reset_control_get_exclusive_by_index - resource managed * reset_control_get_exclusive() -- cgit v1.2.3 From 1066cfbdfa3f5c401870fad577fe63d1171a5bcd Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Wed, 10 Mar 2021 16:39:52 -0800 Subject: regmap-irq: Extend sub-irq to support non-fixed reg strides Qualcomm's MFD chips have a top level interrupt status register and sub-irqs (peripherals). When a bit in the main status register goes high, it means that the peripheral corresponding to that bit has an unserviced interrupt. If the bit is not set, this means that the corresponding peripheral does not. Commit a2d21848d9211d ("regmap: regmap-irq: Add main status register support") introduced the sub-irq logic that is currently applied only when reading status registers, but not for any other functions like acking or masking. Extend the use of sub-irq to all other functions, with two caveats regarding the specification of offsets: - Each member of the sub_reg_offsets array should be of length 1 - The specified offsets should be the unequal strides for each sub-irq device. In QCOM's case, all the *_base registers are to be configured to the base addresses of the first sub-irq group, with offsets of each subsequent group calculated as a difference from these addresses. Continuing from the example mentioned in the cover letter: /* * Address of MISC_INT_MASK = 0x1011 * Address of TEMP_ALARM_INT_MASK = 0x2011 * Address of GPIO01_INT_MASK = 0x3011 * * Calculate offsets as: * offset_0 = 0x1011 - 0x1011 = 0 (to access MISC's * registers) * offset_1 = 0x2011 - 0x1011 = 0x1000 * offset_2 = 0x3011 - 0x1011 = 0x2000 */ static unsigned int sub_unit0_offsets[] = {0}; static unsigned int sub_unit1_offsets[] = {0x1000}; static unsigned int sub_unit2_offsets[] = {0x2000}; static struct regmap_irq_sub_irq_map chip_sub_irq_offsets[] = { REGMAP_IRQ_MAIN_REG_OFFSET(sub_unit0_offsets), REGMAP_IRQ_MAIN_REG_OFFSET(sub_unit0_offsets), REGMAP_IRQ_MAIN_REG_OFFSET(sub_unit0_offsets), }; static struct regmap_irq_chip chip_irq_chip = { --------8<-------- .not_fixed_stride = true, .mask_base = MISC_INT_MASK, .type_base = MISC_INT_TYPE, .ack_base = MISC_INT_ACK, .sub_reg_offsets = chip_sub_irq_offsets, --------8<-------- }; Signed-off-by: Guru Das Srinagesh Link: https://lore.kernel.org/r/526562423eaa58b4075362083f561841f1d6956c.1615423027.git.gurus@codeaurora.org Signed-off-by: Mark Brown --- include/linux/regmap.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 2cc4ecd36298..18910bd809f7 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1378,6 +1378,9 @@ struct regmap_irq_sub_irq_map { * status_base. Should contain num_regs arrays. * Can be provided for chips with more complex mapping than * 1.st bit to 1.st sub-reg, 2.nd bit to 2.nd sub-reg, ... + * When used with not_fixed_stride, each one-element array + * member contains offset calculated as address from each + * peripheral to first peripheral. * @num_main_regs: Number of 'main status' irq registers for chips which have * main_status set. * @@ -1404,6 +1407,9 @@ struct regmap_irq_sub_irq_map { * @clear_on_unmask: For chips with interrupts cleared on read: read the status * registers before unmasking interrupts to clear any bits * set when they were masked. + * @not_fixed_stride: Used when chip peripherals are not laid out with fixed + * stride. Must be used with sub_reg_offsets containing the + * offsets to each peripheral. * @runtime_pm: Hold a runtime PM lock on the device when accessing it. * * @num_regs: Number of registers in each control bank. @@ -1450,6 +1456,7 @@ struct regmap_irq_chip { bool type_invert:1; bool type_in_mask:1; bool clear_on_unmask:1; + bool not_fixed_stride:1; int num_regs; -- cgit v1.2.3 From 8ccbed2476f2a615d5045a7c5c7b459db7dd9263 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 8 Jan 2021 12:55:49 +0200 Subject: thunderbolt: Do not re-establish XDomain DMA paths automatically This step is actually not needed. The service drivers themselves will handle this once they have negotiated the service up and running again with the remote side. Also dropping this makes it easier to add support for multiple DMA tunnels over a single XDomain connection. Signed-off-by: Mika Westerberg --- include/linux/thunderbolt.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 659a0a810fa1..7ec977161f5c 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -185,7 +185,6 @@ void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir); * @link_speed: Speed of the link in Gb/s * @link_width: Width of the link (1 or 2) * @is_unplugged: The XDomain is unplugged - * @resume: The XDomain is being resumed * @needs_uuid: If the XDomain does not have @remote_uuid it will be * queried first * @transmit_path: HopID which the remote end expects us to transmit @@ -231,7 +230,6 @@ struct tb_xdomain { unsigned int link_speed; unsigned int link_width; bool is_unplugged; - bool resume; bool needs_uuid; u16 transmit_path; u16 transmit_ring; -- cgit v1.2.3 From 7d3084c0b77c6c417a16fc1c5bf3bc3149d20fab Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 8 Jan 2021 14:38:24 +0200 Subject: thunderbolt: Add tb_property_copy_dir() This function takes a deep copy of the properties. We need this in order to support more dynamic properties per XDomain connection as required by the USB4 inter-domain service spec. Signed-off-by: Mika Westerberg --- include/linux/thunderbolt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 7ec977161f5c..003a9ad29168 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -146,6 +146,7 @@ struct tb_property_dir *tb_property_parse_dir(const u32 *block, size_t block_len); ssize_t tb_property_format_dir(const struct tb_property_dir *dir, u32 *block, size_t block_len); +struct tb_property_dir *tb_property_copy_dir(const struct tb_property_dir *dir); struct tb_property_dir *tb_property_create_dir(const uuid_t *uuid); void tb_property_free_dir(struct tb_property_dir *dir); int tb_property_add_immediate(struct tb_property_dir *parent, const char *key, -- cgit v1.2.3 From 46b494f286812a88caba28dd0810cf3a55747431 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 8 Jan 2021 14:57:19 +0200 Subject: thunderbolt: Add support for maxhopid XDomain property USB4 inter-domain spec mandates that the compatible hosts expose a new property "maxhopid" that tells the connection manager on the other side what is the maximum supported input HopID over the connection. Since this is depend on the lane adapter the cable is connected it needs to be filled in dynamically. For this reason we take a copy of the global properties and fill then for each XDomain connection upon first connect, and then keep updating it if the generation changes as services are being added/removed. We also take advantage of this copy to fill in the hostname. We also expose this maxhopid as an attribute under each XDomain device. While there drop kernel-doc entry for property_lock which seems to be left there when the structure was originally introduced. Signed-off-by: Mika Westerberg --- include/linux/thunderbolt.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 003a9ad29168..3e0ce654d60c 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -180,6 +180,8 @@ void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir); * @route: Route string the other domain can be reached * @vendor: Vendor ID of the remote domain * @device: Device ID of the demote domain + * @local_max_hopid: Maximum input HopID of this host + * @remote_max_hopid: Maximum input HopID of the remote host * @lock: Lock to serialize access to the following fields of this structure * @vendor_name: Name of the vendor (or %NULL if not known) * @device_name: Name of the device (or %NULL if not known) @@ -193,9 +195,11 @@ void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir); * @receive_path: HopID which we expect the remote end to transmit * @receive_ring: Local ring (hop) where incoming packets arrive * @service_ids: Used to generate IDs for the services - * @properties: Properties exported by the remote domain - * @property_block_gen: Generation of @properties - * @properties_lock: Lock protecting @properties. + * @local_property_block: Local block of properties + * @local_property_block_gen: Generation of @local_property_block + * @local_property_block_len: Length of the @local_property_block in dwords + * @remote_properties: Properties exported by the remote domain + * @remote_property_block_gen: Generation of @remote_properties * @get_uuid_work: Work used to retrieve @remote_uuid * @uuid_retries: Number of times left @remote_uuid is requested before * giving up @@ -225,6 +229,8 @@ struct tb_xdomain { u64 route; u16 vendor; u16 device; + unsigned int local_max_hopid; + unsigned int remote_max_hopid; struct mutex lock; const char *vendor_name; const char *device_name; @@ -237,8 +243,11 @@ struct tb_xdomain { u16 receive_path; u16 receive_ring; struct ida service_ids; - struct tb_property_dir *properties; - u32 property_block_gen; + u32 *local_property_block; + u32 local_property_block_gen; + u32 local_property_block_len; + struct tb_property_dir *remote_properties; + u32 remote_property_block_gen; struct delayed_work get_uuid_work; int uuid_retries; struct delayed_work get_properties_work; -- cgit v1.2.3 From 180b0689425c6fb2b35e69a3316ee38371a782df Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 8 Jan 2021 16:25:39 +0200 Subject: thunderbolt: Allow multiple DMA tunnels over a single XDomain connection Currently we have had an artificial limitation of a single DMA tunnel per XDomain connection. However, hardware wise there is no such limit and software based connection manager can take advantage of all the DMA rings available on the host to establish tunnels. For this reason make the tb_xdomain_[enable|disable]_paths() to take the DMA ring and HopID as parameter instead of storing them in the struct tb_xdomain. We also add API functions to allocate input and output HopIDs of the XDomain connection that the service drivers can use instead of hard-coding. Also convert the two existing service drivers over to this API. Signed-off-by: Mika Westerberg --- include/linux/thunderbolt.h | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 3e0ce654d60c..e7c96c37174f 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -190,11 +190,9 @@ void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir); * @is_unplugged: The XDomain is unplugged * @needs_uuid: If the XDomain does not have @remote_uuid it will be * queried first - * @transmit_path: HopID which the remote end expects us to transmit - * @transmit_ring: Local ring (hop) where outgoing packets are pushed - * @receive_path: HopID which we expect the remote end to transmit - * @receive_ring: Local ring (hop) where incoming packets arrive * @service_ids: Used to generate IDs for the services + * @in_hopids: Input HopIDs for DMA tunneling + * @out_hopids; Output HopIDs for DMA tunneling * @local_property_block: Local block of properties * @local_property_block_gen: Generation of @local_property_block * @local_property_block_len: Length of the @local_property_block in dwords @@ -238,11 +236,9 @@ struct tb_xdomain { unsigned int link_width; bool is_unplugged; bool needs_uuid; - u16 transmit_path; - u16 transmit_ring; - u16 receive_path; - u16 receive_ring; struct ida service_ids; + struct ida in_hopids; + struct ida out_hopids; u32 *local_property_block; u32 local_property_block_gen; u32 local_property_block_len; @@ -260,10 +256,22 @@ struct tb_xdomain { int tb_xdomain_lane_bonding_enable(struct tb_xdomain *xd); void tb_xdomain_lane_bonding_disable(struct tb_xdomain *xd); -int tb_xdomain_enable_paths(struct tb_xdomain *xd, u16 transmit_path, - u16 transmit_ring, u16 receive_path, - u16 receive_ring); -int tb_xdomain_disable_paths(struct tb_xdomain *xd); +int tb_xdomain_alloc_in_hopid(struct tb_xdomain *xd, int hopid); +void tb_xdomain_release_in_hopid(struct tb_xdomain *xd, int hopid); +int tb_xdomain_alloc_out_hopid(struct tb_xdomain *xd, int hopid); +void tb_xdomain_release_out_hopid(struct tb_xdomain *xd, int hopid); +int tb_xdomain_enable_paths(struct tb_xdomain *xd, int transmit_path, + int transmit_ring, int receive_path, + int receive_ring); +int tb_xdomain_disable_paths(struct tb_xdomain *xd, int transmit_path, + int transmit_ring, int receive_path, + int receive_ring); + +static inline int tb_xdomain_disable_all_paths(struct tb_xdomain *xd) +{ + return tb_xdomain_disable_paths(xd, -1, -1, -1, -1); +} + struct tb_xdomain *tb_xdomain_find_by_uuid(struct tb *tb, const uuid_t *uuid); struct tb_xdomain *tb_xdomain_find_by_route(struct tb *tb, u64 route); -- cgit v1.2.3 From 53e043b2b432ef2294efec04dd8a88d96c024624 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 15 Mar 2021 12:56:56 +0100 Subject: io_uring: remove structures from include/linux/io_uring.h Link: https://lore.kernel.org/r/8c1d14f3748105f4caeda01716d47af2fa41d11c.1615809009.git.metze@samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 9761a0ec9f95..79cde9906be0 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -5,31 +5,6 @@ #include #include -struct io_wq_work_node { - struct io_wq_work_node *next; -}; - -struct io_wq_work_list { - struct io_wq_work_node *first; - struct io_wq_work_node *last; -}; - -struct io_uring_task { - /* submission side */ - struct xarray xa; - struct wait_queue_head wait; - void *last; - void *io_wq; - struct percpu_counter inflight; - atomic_t in_idle; - bool sqpoll; - - spinlock_t task_lock; - struct io_wq_work_list task_list; - unsigned long task_state; - struct callback_head task_work; -}; - #if defined(CONFIG_IO_URING) struct sock *io_uring_get_socket(struct file *file); void __io_uring_task_cancel(void); -- cgit v1.2.3 From 2e8496f31d0be8f43849b2980b069f3a9805d047 Mon Sep 17 00:00:00 2001 From: Richard Gong Date: Tue, 9 Feb 2021 16:20:27 -0600 Subject: firmware: stratix10-svc: reset COMMAND_RECONFIG_FLAG_PARTIAL to 0 Clean up COMMAND_RECONFIG_FLAG_PARTIAL flag by resetting it to 0, which aligns with the firmware settings. Fixes: 36847f9e3e56 ("firmware: stratix10-svc: correct reconfig flag and timeout values") Signed-off-by: Richard Gong Reviewed-by: Tom Rix Signed-off-by: Moritz Fischer --- include/linux/firmware/intel/stratix10-svc-client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index ebc295647581..19781b0f6429 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -56,7 +56,7 @@ * COMMAND_RECONFIG_FLAG_PARTIAL: * Set to FPGA configuration type (full or partial). */ -#define COMMAND_RECONFIG_FLAG_PARTIAL 1 +#define COMMAND_RECONFIG_FLAG_PARTIAL 0 /* * Timeout settings for service clients: -- cgit v1.2.3 From b47e330231acbf4506b049643145cc64268a1940 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 16 Mar 2021 12:41:03 -0400 Subject: tracing: Pass buffer of event to trigger operations The ring_buffer_event_time_stamp() is going to be updated to extract the time stamp for the event without needing it to be set to have absolute values for all events. But to do so, it needs the buffer that the event is on as the buffer saves information for the event before it is committed to the buffer. If the trace buffer is disabled, a temporary buffer is used, and there's no access to this buffer from the current histogram triggers, even though it is passed to the trace event code. Pass the buffer that the event is on all the way down to the histogram triggers. Link: https://lkml.kernel.org/r/20210316164113.542448131@goodmis.org Reviewed-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 28e7af1406f2..8cba64ce23a4 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -640,7 +640,8 @@ enum event_trigger_type { extern int filter_match_preds(struct event_filter *filter, void *rec); extern enum event_trigger_type -event_triggers_call(struct trace_event_file *file, void *rec, +event_triggers_call(struct trace_event_file *file, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event); extern void event_triggers_post_call(struct trace_event_file *file, @@ -664,7 +665,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file) if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { if (eflags & EVENT_FILE_FL_TRIGGER_MODE) - event_triggers_call(file, NULL, NULL); + event_triggers_call(file, NULL, NULL, NULL); if (eflags & EVENT_FILE_FL_SOFT_DISABLED) return true; if (eflags & EVENT_FILE_FL_PID_FILTER) -- cgit v1.2.3 From efe6196a6bc5bbc84b856316c4687fd24566a95c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 16 Mar 2021 12:41:04 -0400 Subject: ring-buffer: Allow ring_buffer_event_time_stamp() to return time stamp of all events Currently, ring_buffer_event_time_stamp() only returns an accurate time stamp of the event if it has an absolute extended time stamp attached to it. To make it more robust, use the event_stamp() in case the event does not have an absolute value attached to it. This will allow ring_buffer_event_time_stamp() to be used in more cases than just histograms, and it will also allow histograms to not require including absolute values all the time. Link: https://lkml.kernel.org/r/20210316164113.704830885@goodmis.org Reviewed-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- include/linux/ring_buffer.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 136ea0997e6d..057b7ed4fe24 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -61,7 +61,8 @@ enum ring_buffer_type { unsigned ring_buffer_event_length(struct ring_buffer_event *event); void *ring_buffer_event_data(struct ring_buffer_event *event); -u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event); +u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer, + struct ring_buffer_event *event); /* * ring_buffer_discard_commit will remove an event that has not -- cgit v1.2.3 From f2616c772c768485de18e7fcb2816bcdcd098339 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 17 Mar 2021 13:34:35 -0400 Subject: seq_buf: Add seq_buf_terminate() API In the case that the seq_buf buffer needs to be printed directly, add a way to make sure that the buffer is safe to read by forcing a nul terminating character at the end of the string, or the last byte of the buffer if the string has overflowed. Signed-off-by: Steven Rostedt (VMware) --- include/linux/seq_buf.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 9d6c28cc4d8f..5b31c5147969 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -71,6 +71,31 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) return min(s->len, s->size); } +/** + * seq_buf_terminate - Make sure buffer is nul terminated + * @s: the seq_buf descriptor to terminate. + * + * This makes sure that the buffer in @s is nul terminated and + * safe to read as a string. + * + * Note, if this is called when the buffer has overflowed, then + * the last byte of the buffer is zeroed, and the len will still + * point passed it. + * + * After this function is called, s->buffer is safe to use + * in string operations. + */ +static inline void seq_buf_terminate(struct seq_buf *s) +{ + if (WARN_ON(s->size == 0)) + return; + + if (seq_buf_buffer_left(s)) + s->buffer[s->len] = 0; + else + s->buffer[s->size - 1] = 0; +} + /** * seq_buf_get_buf - get buffer to write arbitrary data to * @s: the seq_buf handle -- cgit v1.2.3 From 255c04a87f4381849fce9ed81e5efabf78a71a30 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Thu, 18 Mar 2021 19:37:43 +0100 Subject: net: embed num_tc in the xps maps The xps cpus/rxqs map is accessed using dev->num_tc, which is used when allocating the map. But later updates of dev->num_tc can lead to having a mismatch between the maps and how they're accessed. In such cases the map values do not make any sense and out of bound accesses can occur (that can be easily seen using KASAN). This patch aims at fixing this by embedding num_tc into the maps, using the value at the time the map is created. This brings two improvements: - The maps can be accessed using the embedded num_tc, so we know for sure we won't have out of bound accesses. - Checks can be made before accessing the maps so we know the values retrieved will make sense. We also update __netif_set_xps_queue to conditionally copy old maps from dev_maps in the new one only if the number of traffic classes from both maps match. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 97254c089eb2..c38534c55ea1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -771,9 +771,15 @@ struct xps_map { /* * This structure holds all XPS maps for device. Maps are indexed by CPU. + * + * We keep track of the number of traffic classes used when the struct is + * allocated, in num_tc. This will be used to navigate the maps, to ensure we're + * not crossing its upper bound, as the original dev->num_tc can be updated in + * the meantime. */ struct xps_dev_maps { struct rcu_head rcu; + s16 num_tc; struct xps_map __rcu *attr_map[]; /* Either CPUs map or RXQs map */ }; -- cgit v1.2.3 From 5478fcd0f48322e04ae6c173ad3a1959e066dc83 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Thu, 18 Mar 2021 19:37:44 +0100 Subject: net: embed nr_ids in the xps maps Embed nr_ids (the number of cpu for the xps cpus map, and the number of rxqs for the xps cpus map) in dev_maps. That will help not accessing out of bound memory if those values change after dev_maps was allocated. Suggested-by: Alexander Duyck Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c38534c55ea1..09e73f5a8c78 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -772,6 +772,9 @@ struct xps_map { /* * This structure holds all XPS maps for device. Maps are indexed by CPU. * + * We keep track of the number of cpus/rxqs used when the struct is allocated, + * in nr_ids. This will help not accessing out-of-bound memory. + * * We keep track of the number of traffic classes used when the struct is * allocated, in num_tc. This will be used to navigate the maps, to ensure we're * not crossing its upper bound, as the original dev->num_tc can be updated in @@ -779,6 +782,7 @@ struct xps_map { */ struct xps_dev_maps { struct rcu_head rcu; + unsigned int nr_ids; s16 num_tc; struct xps_map __rcu *attr_map[]; /* Either CPUs map or RXQs map */ }; -- cgit v1.2.3 From 044ab86d431b59b88966457dbb62679f274ec442 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Thu, 18 Mar 2021 19:37:46 +0100 Subject: net: move the xps maps to an array Move the xps maps (xps_cpus_map and xps_rxqs_map) to an array in net_device. That will simplify a lot the code removing the need for lots of if/else conditionals as the correct map will be available using its offset in the array. This should not modify the xps maps behaviour in any way. Suggested-by: Alexander Duyck Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- include/linux/netdevice.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 09e73f5a8c78..4940509999be 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -754,6 +754,13 @@ struct rx_queue_attribute { const char *buf, size_t len); }; +/* XPS map type and offset of the xps map within net_device->xps_maps[]. */ +enum xps_map_type { + XPS_CPUS = 0, + XPS_RXQS, + XPS_MAPS_MAX, +}; + #ifdef CONFIG_XPS /* * This structure holds an XPS map which can be of variable length. The @@ -1773,8 +1780,7 @@ enum netdev_ml_priv_type { * @tx_queue_len: Max frames per queue allowed * @tx_global_lock: XXX: need comments on this one * @xdp_bulkq: XDP device bulk queue - * @xps_cpus_map: all CPUs map for XPS device - * @xps_rxqs_map: all RXQs map for XPS device + * @xps_maps: all CPUs/RXQs maps for XPS device * * @xps_maps: XXX: need comments on this one * @miniq_egress: clsact qdisc specific data for @@ -2070,8 +2076,7 @@ struct net_device { struct xdp_dev_bulk_queue __percpu *xdp_bulkq; #ifdef CONFIG_XPS - struct xps_dev_maps __rcu *xps_cpus_map; - struct xps_dev_maps __rcu *xps_rxqs_map; + struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX]; #endif #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc __rcu *miniq_egress; @@ -3701,7 +3706,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, u16 index); int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, - u16 index, bool is_rxqs_map); + u16 index, enum xps_map_type type); /** * netif_attr_test_mask - Test a CPU or Rx queue set in a mask @@ -3796,7 +3801,7 @@ static inline int netif_set_xps_queue(struct net_device *dev, static inline int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, - u16 index, bool is_rxqs_map) + u16 index, enum xps_map_type type) { return 0; } -- cgit v1.2.3 From 76da35dc99afb460b9c335182ba6a3e7ff924186 Mon Sep 17 00:00:00 2001 From: "Wong, Vee Khee" Date: Wed, 17 Mar 2021 09:32:47 +0800 Subject: stmmac: intel: Add PSE and PCH PTP clock source selection Intel mGbE variant implemented in EHL and TGL can be set to select different clock frequency based on GPO bits in MAC_GPIO_STATUS register. We introduce a new "void (*ptp_clk_freq_config)(void *priv)" in platform data so that if a platform is required to configure the frequency of clock source, in this case Intel mGBE does, the platform-specific configuration of the PTP clock setting is done when stmmac_ptp_register() is called. Signed-off-by: Wong, Vee Khee Signed-off-by: Voon Weifeng Co-developed-by: Ong Boon Leong Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 51004ebd0540..10abc80b601e 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -181,6 +181,7 @@ struct plat_stmmacenet_data { void (*fix_mac_speed)(void *priv, unsigned int speed); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); + void (*ptp_clk_freq_config)(void *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); struct mac_device_info *(*setup)(void *priv); -- cgit v1.2.3 From 035e9f471691a16c32b389c8b2f236043a2a50d7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 16 Mar 2021 20:26:48 -0700 Subject: scsi: sbitmap: Silence a debug kernel warning triggered by sbitmap_put() All sbitmap code uses implied preemption protection to update sb->alloc_hint except sbitmap_put(). Using implied preemption protection is safe since the value of sb->alloc_hint only affects performance of sbitmap allocations but not their correctness. Change this_cpu_ptr() in sbitmap_put() into raw_cpu_ptr() to suppress the following kernel warning that appears with preemption debugging enabled (CONFIG_DEBUG_PREEMPT): BUG: using smp_processor_id() in preemptible [00000000] code: scsi_eh_0/152 caller is debug_smp_processor_id+0x17/0x20 CPU: 1 PID: 152 Comm: scsi_eh_0 Tainted: G W 5.12.0-rc1-dbg+ #6 Call Trace: show_stack+0x52/0x58 dump_stack+0xaf/0xf3 check_preemption_disabled+0xce/0xd0 debug_smp_processor_id+0x17/0x20 scsi_device_unbusy+0x13a/0x1c0 [scsi_mod] scsi_finish_command+0x4d/0x290 [scsi_mod] scsi_eh_flush_done_q+0x1e7/0x280 [scsi_mod] ata_scsi_port_error_handler+0x592/0x750 [libata] ata_scsi_error+0x1a0/0x1f0 [libata] scsi_error_handler+0x19e/0x330 [scsi_mod] kthread+0x222/0x250 ret_from_fork+0x1f/0x30 Link: https://lore.kernel.org/r/20210317032648.9080-1-bvanassche@acm.org Fixes: c548e62bcf6a ("scsi: sbitmap: Move allocation hint into sbitmap") Cc: Hannes Reinecke Cc: Omar Sandoval Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/linux/sbitmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 3087e1f15fdd..2713e689ad66 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -324,7 +324,7 @@ static inline void sbitmap_put(struct sbitmap *sb, unsigned int bitnr) sbitmap_deferred_clear_bit(sb, bitnr); if (likely(sb->alloc_hint && !sb->round_robin && bitnr < sb->depth)) - *this_cpu_ptr(sb->alloc_hint) = bitnr; + *raw_cpu_ptr(sb->alloc_hint) = bitnr; } static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) -- cgit v1.2.3 From 73f620951b2b594bdc38722c0d647c3b3312af7a Mon Sep 17 00:00:00 2001 From: Claire Chang Date: Thu, 18 Mar 2021 17:14:22 +0100 Subject: swiotlb: move global variables into a new io_tlb_mem structure Added a new struct, io_tlb_mem, as the IO TLB memory pool descriptor and moved relevant global variables into that struct. This will be useful later to allow for restricted DMA pool. Signed-off-by: Claire Chang [hch: rebased] Signed-off-by: Christoph Hellwig Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 0696bdc8072e..5ec5378b17c3 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -6,6 +6,7 @@ #include #include #include +#include struct device; struct page; @@ -61,11 +62,49 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, #ifdef CONFIG_SWIOTLB extern enum swiotlb_force swiotlb_force; -extern phys_addr_t io_tlb_start, io_tlb_end; + +/** + * struct io_tlb_mem - IO TLB Memory Pool Descriptor + * + * @start: The start address of the swiotlb memory pool. Used to do a quick + * range check to see if the memory was in fact allocated by this + * API. + * @end: The end address of the swiotlb memory pool. Used to do a quick + * range check to see if the memory was in fact allocated by this + * API. + * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and + * @end. This is command line adjustable via setup_io_tlb_npages. + * @used: The number of used IO TLB block. + * @list: The free list describing the number of free entries available + * from each index. + * @index: The index to start searching in the next round. + * @orig_addr: The original address corresponding to a mapped entry. + * @alloc_size: Size of the allocated buffer. + * @lock: The lock to protect the above data structures in the map and + * unmap calls. + * @debugfs: The dentry to debugfs. + * @late_alloc: %true if allocated using the page allocator + */ +struct io_tlb_mem { + phys_addr_t start; + phys_addr_t end; + unsigned long nslabs; + unsigned long used; + unsigned int *list; + unsigned int index; + phys_addr_t *orig_addr; + size_t *alloc_size; + spinlock_t lock; + struct dentry *debugfs; + bool late_alloc; +}; +extern struct io_tlb_mem io_tlb_default_mem; static inline bool is_swiotlb_buffer(phys_addr_t paddr) { - return paddr >= io_tlb_start && paddr < io_tlb_end; + struct io_tlb_mem *mem = &io_tlb_default_mem; + + return paddr >= mem->start && paddr < mem->end; } void __init swiotlb_exit(void); -- cgit v1.2.3 From 2d29960af0bee8cc6731b9bd3964850c9e7a6840 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Mar 2021 17:14:23 +0100 Subject: swiotlb: dynamically allocate io_tlb_default_mem Instead of allocating ->list and ->orig_addr separately just do one dynamic allocation for the actual io_tlb_mem structure. This simplifies a lot of the initialization code, and also allows to just check io_tlb_default_mem to see if swiotlb is in use. Signed-off-by: Christoph Hellwig Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 5ec5378b17c3..63f7a63f61d0 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -90,28 +90,30 @@ struct io_tlb_mem { phys_addr_t end; unsigned long nslabs; unsigned long used; - unsigned int *list; unsigned int index; - phys_addr_t *orig_addr; - size_t *alloc_size; spinlock_t lock; struct dentry *debugfs; bool late_alloc; + struct io_tlb_slot { + phys_addr_t orig_addr; + size_t alloc_size; + unsigned int list; + } slots[]; }; -extern struct io_tlb_mem io_tlb_default_mem; +extern struct io_tlb_mem *io_tlb_default_mem; static inline bool is_swiotlb_buffer(phys_addr_t paddr) { - struct io_tlb_mem *mem = &io_tlb_default_mem; + struct io_tlb_mem *mem = io_tlb_default_mem; - return paddr >= mem->start && paddr < mem->end; + return mem && paddr >= mem->start && paddr < mem->end; } void __init swiotlb_exit(void); unsigned int swiotlb_max_segment(void); size_t swiotlb_max_mapping_size(struct device *dev); bool is_swiotlb_active(void); -void __init swiotlb_adjust_size(unsigned long new_size); +void __init swiotlb_adjust_size(unsigned long size); #else #define swiotlb_force SWIOTLB_NO_FORCE static inline bool is_swiotlb_buffer(phys_addr_t paddr) @@ -135,7 +137,7 @@ static inline bool is_swiotlb_active(void) return false; } -static inline void swiotlb_adjust_size(unsigned long new_size) +static inline void swiotlb_adjust_size(unsigned long size) { } #endif /* CONFIG_SWIOTLB */ -- cgit v1.2.3 From 2cbc2776efe4faed0e17c48ae076aa03a0fcc61f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Mar 2021 17:14:24 +0100 Subject: swiotlb: remove swiotlb_nr_tbl All callers just use it to check if swiotlb is active at all, for which they can just use is_swiotlb_active. In the longer run drivers need to stop using is_swiotlb_active as well, but let's do the simple step first. Signed-off-by: Christoph Hellwig Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 63f7a63f61d0..216854a5e513 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -37,7 +37,6 @@ enum swiotlb_force { extern void swiotlb_init(int verbose); int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); -extern unsigned long swiotlb_nr_tbl(void); unsigned long swiotlb_size_or_default(void); extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); extern int swiotlb_late_init_with_default_size(size_t default_size); -- cgit v1.2.3 From fb98cc0b3af2ba4d87301dff2b381b12eee35d7d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Mar 2021 08:33:19 +0100 Subject: efi: use 32-bit alignment for efi_guid_t literals Commit 494c704f9af0 ("efi: Use 32-bit alignment for efi_guid_t") updated the type definition of efi_guid_t to ensure that it always appears sufficiently aligned (the UEFI spec is ambiguous about this, but given the fact that its EFI_GUID type is defined in terms of a struct carrying a uint32_t, the natural alignment is definitely >= 32 bits). However, we missed the EFI_GUID() macro which is used to instantiate efi_guid_t literals: that macro is still based on the guid_t type, which does not have a minimum alignment at all. This results in warnings such as In file included from drivers/firmware/efi/mokvar-table.c:35: include/linux/efi.h:1093:34: warning: passing 1-byte aligned argument to 4-byte aligned parameter 2 of 'get_var' may result in an unaligned pointer access [-Walign-mismatch] status = get_var(L"SecureBoot", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size, ^ include/linux/efi.h:1101:24: warning: passing 1-byte aligned argument to 4-byte aligned parameter 2 of 'get_var' may result in an unaligned pointer access [-Walign-mismatch] get_var(L"SetupMode", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size, &setupmode); The distinction only matters on CPUs that do not support misaligned loads fully, but 32-bit ARM's load-multiple instructions fall into that category, and these are likely to be emitted by the compiler that built the firmware for loading word-aligned 128-bit GUIDs from memory So re-implement the initializer in terms of our own efi_guid_t type, so that the alignment becomes a property of the literal's type. Fixes: 494c704f9af0 ("efi: Use 32-bit alignment for efi_guid_t") Reported-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://github.com/ClangBuiltLinux/linux/issues/1327 Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 8710f5710c1d..6b5d36babfcc 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -72,8 +72,10 @@ typedef void *efi_handle_t; */ typedef guid_t efi_guid_t __aligned(__alignof__(u32)); -#define EFI_GUID(a,b,c,d0,d1,d2,d3,d4,d5,d6,d7) \ - GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) +#define EFI_GUID(a, b, c, d...) (efi_guid_t){ { \ + (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ + (b) & 0xff, ((b) >> 8) & 0xff, \ + (c) & 0xff, ((c) >> 8) & 0xff, d } } /* * Generic EFI table header -- cgit v1.2.3 From 5261ced47f8e89173c3b015f6152a05f11a418c3 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 18 Mar 2021 13:28:12 -0400 Subject: locking/ww_mutex: Remove DEFINE_WW_MUTEX() macro The current DEFINE_WW_MUTEX() macro fails to properly set up the lockdep key of the ww_mutexes causing potential circular locking dependency splat. Though it is possible to add more macro magic to make it work, but the result is rather ugly. Since locktorture was the only user of DEFINE_WW_MUTEX() and the previous commit has just removed its use. It is easier to just remove the macro to force future users of ww_mutexes to use ww_mutex_init() for initialization. Suggested-by: Peter Zijlstra Signed-off-by: Waiman Long Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210318172814.4400-4-longman@redhat.com --- include/linux/ww_mutex.h | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 6ecf2a0220db..b77f39f319ad 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -48,39 +48,26 @@ struct ww_acquire_ctx { #endif }; -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) \ - , .ww_class = class -#else -# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) -#endif - #define __WW_CLASS_INITIALIZER(ww_class, _is_wait_die) \ { .stamp = ATOMIC_LONG_INIT(0) \ , .acquire_name = #ww_class "_acquire" \ , .mutex_name = #ww_class "_mutex" \ , .is_wait_die = _is_wait_die } -#define __WW_MUTEX_INITIALIZER(lockname, class) \ - { .base = __MUTEX_INITIALIZER(lockname.base) \ - __WW_CLASS_MUTEX_INITIALIZER(lockname, class) } - #define DEFINE_WD_CLASS(classname) \ struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 1) #define DEFINE_WW_CLASS(classname) \ struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 0) -#define DEFINE_WW_MUTEX(mutexname, ww_class) \ - struct ww_mutex mutexname = __WW_MUTEX_INITIALIZER(mutexname, ww_class) - /** * ww_mutex_init - initialize the w/w mutex * @lock: the mutex to be initialized * @ww_class: the w/w class the mutex should belong to * * Initialize the w/w mutex to unlocked state and associate it with the given - * class. + * class. Static define macro for w/w mutex is not provided and this function + * is the only way to properly initialize the w/w mutex. * * It is not allowed to initialize an already locked mutex. */ -- cgit v1.2.3 From 84fcfbdadbfdd86c9a43a52703203e05fe7efd92 Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Fri, 12 Mar 2021 10:19:12 +0800 Subject: dma-mapping: remove a pointless empty line in dma_alloc_coherent Signed-off-by: Wang Qing Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 954847f9a3e0..e9d19b974f26 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -435,7 +435,6 @@ static inline void dma_sync_sgtable_for_device(struct device *dev, static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { - return dma_alloc_attrs(dev, size, dma_handle, gfp, (gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0); } -- cgit v1.2.3 From 919067cc845f323a80b6fe987b64238bd82d309e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Mar 2021 10:39:33 -0700 Subject: net: add CONFIG_PCPU_DEV_REFCNT I was working on a syzbot issue, claiming one device could not be dismantled because its refcount was -1 unregister_netdevice: waiting for sit0 to become free. Usage count = -1 It would be nice if syzbot could trigger a warning at the time this reference count became negative. This patch adds CONFIG_PCPU_DEV_REFCNT options which defaults to per cpu variables (as before this patch) on SMP builds. v2: free_dev label in alloc_netdev_mqs() is moved to avoid a compiler warning (-Wunused-label), as reported by kernel test robot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4940509999be..8f003955c485 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2092,7 +2092,12 @@ struct net_device { u32 proto_down_reason; struct list_head todo_list; + +#ifdef CONFIG_PCPU_DEV_REFCNT int __percpu *pcpu_refcnt; +#else + refcount_t dev_refcnt; +#endif struct list_head link_watch_list; @@ -4044,7 +4049,11 @@ void netdev_run_todo(void); */ static inline void dev_put(struct net_device *dev) { +#ifdef CONFIG_PCPU_DEV_REFCNT this_cpu_dec(*dev->pcpu_refcnt); +#else + refcount_dec(&dev->dev_refcnt); +#endif } /** @@ -4055,7 +4064,11 @@ static inline void dev_put(struct net_device *dev) */ static inline void dev_hold(struct net_device *dev) { +#ifdef CONFIG_PCPU_DEV_REFCNT this_cpu_inc(*dev->pcpu_refcnt); +#else + refcount_inc(&dev->dev_refcnt); +#endif } /* Carrier loss detection, dial on demand. The functions netif_carrier_on -- cgit v1.2.3 From f60a85cad677c4f9bb4cadd764f1d106c38c7cf8 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Wed, 17 Mar 2021 11:09:15 +0800 Subject: bpf: Fix umd memory leak in copy_process() The syzbot reported a memleak as follows: BUG: memory leak unreferenced object 0xffff888101b41d00 (size 120): comm "kworker/u4:0", pid 8, jiffies 4294944270 (age 12.780s) backtrace: [] alloc_pid+0x66/0x560 [] copy_process+0x1465/0x25e0 [] kernel_clone+0xf3/0x670 [] kernel_thread+0x61/0x80 [] call_usermodehelper_exec_work [] call_usermodehelper_exec_work+0xc4/0x120 [] process_one_work+0x2c9/0x600 [] worker_thread+0x59/0x5d0 [] kthread+0x178/0x1b0 [] ret_from_fork+0x1f/0x30 unreferenced object 0xffff888110ef5c00 (size 232): comm "kworker/u4:0", pid 8414, jiffies 4294944270 (age 12.780s) backtrace: [] kmem_cache_zalloc [] __alloc_file+0x1f/0xf0 [] alloc_empty_file+0x69/0x120 [] alloc_file+0x33/0x1b0 [] alloc_file_pseudo+0xb2/0x140 [] create_pipe_files+0x138/0x2e0 [] umd_setup+0x33/0x220 [] call_usermodehelper_exec_async+0xb4/0x1b0 [] ret_from_fork+0x1f/0x30 After the UMD process exits, the pipe_to_umh/pipe_from_umh and tgid need to be released. Fixes: d71fa5c9763c ("bpf: Add kernel module with user mode driver that populates bpffs.") Reported-by: syzbot+44908bb56d2bfe56b28e@syzkaller.appspotmail.com Signed-off-by: Zqiang Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210317030915.2865-1-qiang.zhang@windriver.com --- include/linux/usermode_driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usermode_driver.h b/include/linux/usermode_driver.h index 073a9e0ec07d..ad970416260d 100644 --- a/include/linux/usermode_driver.h +++ b/include/linux/usermode_driver.h @@ -14,5 +14,6 @@ struct umd_info { int umd_load_blob(struct umd_info *info, const void *data, size_t len); int umd_unload_blob(struct umd_info *info); int fork_usermode_driver(struct umd_info *info); +void umd_cleanup_helper(struct umd_info *info); #endif /* __LINUX_USERMODE_DRIVER_H__ */ -- cgit v1.2.3 From a2bbe66493ee380eb25e080e7fcdd1278a847f7e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 7 Jul 2019 09:57:53 -0400 Subject: constify dentry argument of dentry_path()/dentry_path_raw() Signed-off-by: Al Viro --- include/linux/dcache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index c1e48014106f..4ecde5d8250c 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -300,8 +300,8 @@ char *dynamic_dname(struct dentry *, char *, int, const char *, ...); extern char *__d_path(const struct path *, const struct path *, char *, int); extern char *d_absolute_path(const struct path *, char *, int); extern char *d_path(const struct path *, char *, int); -extern char *dentry_path_raw(struct dentry *, char *, int); -extern char *dentry_path(struct dentry *, char *, int); +extern char *dentry_path_raw(const struct dentry *, char *, int); +extern char *dentry_path(const struct dentry *, char *, int); /* Allocation counts.. */ -- cgit v1.2.3 From 4eed7f5a8334a179f40b2c78c1ead572b9dd04a0 Mon Sep 17 00:00:00 2001 From: LI Qingwu Date: Fri, 19 Mar 2021 11:02:36 +0000 Subject: power: supply: bq27xxx: Add support for BQ78Z100 Add support for TI BQ78Z100, I2C interface gas gauge. It provides a fully integrated safety protection and authentication for 1 to 2-series cell Li-Ion and Li-Polymer battery packs. The patch was tested with BQ78Z100 equipment. CASE I: Discharging: POWER_SUPPLY_NAME=bq78z100-0 POWER_SUPPLY_STATUS=Discharging POWER_SUPPLY_PRESENT=1 POWER_SUPPLY_VOLTAGE_NOW=3386000 POWER_SUPPLY_CURRENT_NOW=-5000 POWER_SUPPLY_CAPACITY=27 POWER_SUPPLY_CAPACITY_LEVEL=Normal POWER_SUPPLY_TEMP=269 POWER_SUPPLY_TIME_TO_EMPTY_NOW=1249920 POWER_SUPPLY_TECHNOLOGY=Li-ion POWER_SUPPLY_CHARGE_FULL=6494000 POWER_SUPPLY_CHARGE_NOW=1736000 POWER_SUPPLY_CHARGE_FULL_DESIGN=6000000 POWER_SUPPLY_CYCLE_COUNT=1 POWER_SUPPLY_POWER_AVG=-20000 POWER_SUPPLY_HEALTH=Good POWER_SUPPLY_MANUFACTURER=Texas Instruments CASE II : No discharging current: POWER_SUPPLY_NAME=bq78z100-0 POWER_SUPPLY_STATUS=Not charging POWER_SUPPLY_PRESENT=1 POWER_SUPPLY_VOLTAGE_NOW=3386000 POWER_SUPPLY_CURRENT_NOW=0 POWER_SUPPLY_CAPACITY=27 POWER_SUPPLY_CAPACITY_LEVEL=Normal POWER_SUPPLY_TEMP=270 POWER_SUPPLY_TECHNOLOGY=Li-ion POWER_SUPPLY_CHARGE_FULL=6494000 POWER_SUPPLY_CHARGE_NOW=1734000 POWER_SUPPLY_CHARGE_FULL_DESIGN=6000000 POWER_SUPPLY_CYCLE_COUNT=1 POWER_SUPPLY_POWER_AVG=0 POWER_SUPPLY_HEALTH=Good POWER_SUPPLY_MANUFACTURER=Texas Instruments Signed-off-by: LI Qingwu Reviewed-by: Krzysztof Kozlowski Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 8d5f4f40fb41..a1aa68141d0b 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -33,6 +33,7 @@ enum bq27xxx_chip { BQ27Z561, BQ28Z610, BQ34Z100, + BQ78Z100, }; struct bq27xxx_device_info; -- cgit v1.2.3 From 3b03706fa621ce31a3e9ef6307020fde4e6aae16 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 18 Mar 2021 13:38:50 +0100 Subject: sched: Fix various typos Fix ~42 single-word typos in scheduler code comments. We have accumulated a few fun ones over the years. :-) Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Juri Lelli Cc: Vincent Guittot Cc: Dietmar Eggemann Cc: Steven Rostedt Cc: Ben Segall Cc: Mel Gorman Cc: linux-kernel@vger.kernel.org --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index cf245bc237e7..05572e2140ad 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1097,7 +1097,7 @@ struct task_struct { #ifdef CONFIG_CPUSETS /* Protected by ->alloc_lock: */ nodemask_t mems_allowed; - /* Seqence number to catch updates: */ + /* Sequence number to catch updates: */ seqcount_spinlock_t mems_allowed_seq; int cpuset_mem_spread_rotor; int cpuset_slab_spread_rotor; -- cgit v1.2.3 From e2db7592be8e83df47519116621411e1056b21c7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 22 Mar 2021 02:35:05 +0100 Subject: locking: Fix typos in comments Fix ~16 single-word typos in locking code comments. Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Paul E. McKenney Cc: Will Deacon Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 2 +- include/linux/rwsem.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 17805aac0e85..09ac2e8348d2 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -155,7 +155,7 @@ extern void lockdep_set_selftest_task(struct task_struct *task); extern void lockdep_init_task(struct task_struct *task); /* - * Split the recrursion counter in two to readily detect 'off' vs recursion. + * Split the recursion counter in two to readily detect 'off' vs recursion. */ #define LOCKDEP_RECURSION_BITS 16 #define LOCKDEP_OFF (1U << LOCKDEP_RECURSION_BITS) diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 4c715be48717..a66038d88878 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -110,7 +110,7 @@ do { \ /* * This is the same regardless of which rwsem implementation that is being used. - * It is just a heuristic meant to be called by somebody alreadying holding the + * It is just a heuristic meant to be called by somebody already holding the * rwsem to see if somebody from an incompatible type is wanting access to the * lock. */ -- cgit v1.2.3 From 97258ce902d1e1c396a4d7c38f6ae7085adb73c5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 22 Mar 2021 03:55:50 +0100 Subject: entry: Fix typos in comments Fix 3 single-word typos in the generic syscall entry code. Signed-off-by: Ingo Molnar Cc: Borislav Petkov Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/entry-common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 883acef895bc..2e2b8d6140ed 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -360,7 +360,7 @@ void syscall_exit_to_user_mode_work(struct pt_regs *regs); * * This is a combination of syscall_exit_to_user_mode_work() (1,2) and * exit_to_user_mode(). This function is preferred unless there is a - * compelling architectural reason to use the seperate functions. + * compelling architectural reason to use the separate functions. */ void syscall_exit_to_user_mode(struct pt_regs *regs); @@ -381,7 +381,7 @@ void irqentry_enter_from_user_mode(struct pt_regs *regs); * irqentry_exit_to_user_mode - Interrupt exit work * @regs: Pointer to current's pt_regs * - * Invoked with interrupts disbled and fully valid regs. Returns with all + * Invoked with interrupts disabled and fully valid regs. Returns with all * work handled, interrupts disabled such that the caller can immediately * switch to user mode. Called from architecture specific interrupt * handling code. -- cgit v1.2.3 From a359f757965aafd0f58570de95dc6bc06cf12a9c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 22 Mar 2021 04:21:30 +0100 Subject: irq: Fix typos in comments Fix ~36 single-word typos in the IRQ, irqchip and irqdomain code comments. Signed-off-by: Ingo Molnar Cc: Thomas Gleixner Cc: Marc Zyngier Cc: Borislav Petkov Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/irq.h | 4 ++-- include/linux/irqdesc.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 2efde6a79b7e..bee82809107c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -116,7 +116,7 @@ enum { * IRQ_SET_MASK_NOCPY - OK, chip did update irq_common_data.affinity * IRQ_SET_MASK_OK_DONE - Same as IRQ_SET_MASK_OK for core. Special code to * support stacked irqchips, which indicates skipping - * all descendent irqchips. + * all descendant irqchips. */ enum { IRQ_SET_MASK_OK = 0, @@ -302,7 +302,7 @@ static inline bool irqd_is_level_type(struct irq_data *d) /* * Must only be called of irqchip.irq_set_affinity() or low level - * hieararchy domain allocation functions. + * hierarchy domain allocation functions. */ static inline void irqd_set_single_target(struct irq_data *d) { diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 891b323266df..df4651250785 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -32,7 +32,7 @@ struct pt_regs; * @last_unhandled: aging timer for unhandled count * @irqs_unhandled: stats field for spurious unhandled interrupts * @threads_handled: stats field for deferred spurious detection of threaded handlers - * @threads_handled_last: comparator field for deferred spurious detection of theraded handlers + * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers * @lock: locking for SMP * @affinity_hint: hint to user space for preferred irq affinity * @affinity_notify: context for notification of affinity changes -- cgit v1.2.3 From 4c38f2df71c8e33c0b64865992d693f5022eeaad Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 23 Jun 2020 15:49:40 +0530 Subject: cpufreq: CPPC: Add support for frequency invariance The Frequency Invariance Engine (FIE) is providing a frequency scaling correction factor that helps achieve more accurate load-tracking. Normally, this scaling factor can be obtained directly with the help of the cpufreq drivers as they know the exact frequency the hardware is running at. But that isn't the case for CPPC cpufreq driver. Another way of obtaining that is using the arch specific counter support, which is already present in kernel, but that hardware is optional for platforms. This patch updates the CPPC driver to register itself with the topology core to provide its own implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which gets called by the scheduler on every tick. Note that the arch specific counters have higher priority than CPPC counters, if available, though the CPPC driver doesn't need to have any special handling for that. On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we reach here from hard-irq context), which then schedules a normal work item and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable based on the counter updates since the last tick. To allow platforms to disable this CPPC counter-based frequency invariance support, this is all done under CONFIG_ACPI_CPPC_CPUFREQ_FIE, which is enabled by default. This also exports sched_setattr_nocheck() as the CPPC driver can be built as a module. Cc: linux-acpi@vger.kernel.org Reviewed-by: Ionela Voinescu Tested-by: Ionela Voinescu Tested-by: Vincent Guittot Acked-by: Rafael J. Wysocki Signed-off-by: Viresh Kumar --- include/linux/arch_topology.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 11e555cfaecb..f180240dc95f 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -37,6 +37,7 @@ bool topology_scale_freq_invariant(void); enum scale_freq_source { SCALE_FREQ_SOURCE_CPUFREQ = 0, SCALE_FREQ_SOURCE_ARCH, + SCALE_FREQ_SOURCE_CPPC, }; struct scale_freq_data { -- cgit v1.2.3 From 60250052a1a6e3d605d736fea240226a42e4b3ee Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 15 Mar 2021 20:44:45 +0100 Subject: media: camera-mx2: Remove unused header file The imx27/imx25 camera driver has been removed a long time ago, so get rid of this unused header file. Signed-off-by: Fabio Estevam Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/platform_data/media/camera-mx2.h | 31 -------------------------- 1 file changed, 31 deletions(-) delete mode 100644 include/linux/platform_data/media/camera-mx2.h (limited to 'include/linux') diff --git a/include/linux/platform_data/media/camera-mx2.h b/include/linux/platform_data/media/camera-mx2.h deleted file mode 100644 index 8cfa76b6e1e1..000000000000 --- a/include/linux/platform_data/media/camera-mx2.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * mx2-cam.h - i.MX27/i.MX25 camera driver header file - * - * Copyright (C) 2003, Intel Corporation - * Copyright (C) 2008, Sascha Hauer - * Copyright (C) 2010, Baruch Siach - */ - -#ifndef __MACH_MX2_CAM_H_ -#define __MACH_MX2_CAM_H_ - -#define MX2_CAMERA_EXT_VSYNC (1 << 1) -#define MX2_CAMERA_CCIR (1 << 2) -#define MX2_CAMERA_CCIR_INTERLACE (1 << 3) -#define MX2_CAMERA_HSYNC_HIGH (1 << 4) -#define MX2_CAMERA_GATED_CLOCK (1 << 5) -#define MX2_CAMERA_INV_DATA (1 << 6) -#define MX2_CAMERA_PCLK_SAMPLE_RISING (1 << 7) - -/** - * struct mx2_camera_platform_data - optional platform data for mx2_camera - * @flags: any combination of MX2_CAMERA_* - * @clk: clock rate of the csi block / 2 - */ -struct mx2_camera_platform_data { - unsigned long flags; - unsigned long clk; -}; - -#endif /* __MACH_MX2_CAM_H_ */ -- cgit v1.2.3 From 3f5b610b45a36049392883f3754c616ad11b7c7e Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 15 Mar 2021 20:44:46 +0100 Subject: media: camera-mx3: Remove unused header file The imx3 camera driver has been removed a long time ago, so get rid of this unused header file. Signed-off-by: Fabio Estevam Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/platform_data/media/camera-mx3.h | 43 -------------------------- 1 file changed, 43 deletions(-) delete mode 100644 include/linux/platform_data/media/camera-mx3.h (limited to 'include/linux') diff --git a/include/linux/platform_data/media/camera-mx3.h b/include/linux/platform_data/media/camera-mx3.h deleted file mode 100644 index 781c004e5596..000000000000 --- a/include/linux/platform_data/media/camera-mx3.h +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * mx3_camera.h - i.MX3x camera driver header file - * - * Copyright (C) 2008, Guennadi Liakhovetski, DENX Software Engineering, - */ - -#ifndef _MX3_CAMERA_H_ -#define _MX3_CAMERA_H_ - -#include - -#define MX3_CAMERA_CLK_SRC 1 -#define MX3_CAMERA_EXT_VSYNC 2 -#define MX3_CAMERA_DP 4 -#define MX3_CAMERA_PCP 8 -#define MX3_CAMERA_HSP 0x10 -#define MX3_CAMERA_VSP 0x20 -#define MX3_CAMERA_DATAWIDTH_4 0x40 -#define MX3_CAMERA_DATAWIDTH_8 0x80 -#define MX3_CAMERA_DATAWIDTH_10 0x100 -#define MX3_CAMERA_DATAWIDTH_15 0x200 - -#define MX3_CAMERA_DATAWIDTH_MASK (MX3_CAMERA_DATAWIDTH_4 | MX3_CAMERA_DATAWIDTH_8 | \ - MX3_CAMERA_DATAWIDTH_10 | MX3_CAMERA_DATAWIDTH_15) - -struct v4l2_async_subdev; - -/** - * struct mx3_camera_pdata - i.MX3x camera platform data - * @flags: MX3_CAMERA_* flags - * @mclk_10khz: master clock frequency in 10kHz units - * @dma_dev: IPU DMA device to match against in channel allocation - */ -struct mx3_camera_pdata { - unsigned long flags; - unsigned long mclk_10khz; - struct device *dma_dev; - struct v4l2_async_subdev **asd; /* Flat array, arranged in groups */ - int *asd_sizes; /* 0-terminated array of asd group sizes */ -}; - -#endif -- cgit v1.2.3 From 9666cec380d60808eb86d3be4caf84faeebe3081 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 7 Dec 2020 14:45:56 +0100 Subject: pwm: Drop function pwmchip_add_with_polarity() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pwmchip_add() only calls pwmchip_add_with_polarity() and nothing else. All other users of pwmchip_add_with_polarity() are gone. So drop pwmchip_add_with_polarity() and move the code instead to pwmchip_add(). The initial assignment to pwm->state.polarity is dropped. In every correct usage of the PWM API this value is overwritten later anyhow. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/pwm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index e4d84d4db293..8f4eefd129aa 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -392,8 +392,6 @@ int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, int pwm_set_chip_data(struct pwm_device *pwm, void *data); void *pwm_get_chip_data(struct pwm_device *pwm); -int pwmchip_add_with_polarity(struct pwm_chip *chip, - enum pwm_polarity polarity); int pwmchip_add(struct pwm_chip *chip); int pwmchip_remove(struct pwm_chip *chip); struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, -- cgit v1.2.3 From 6558b667a7297418b8951ba54da68d551035ecc5 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 2 Mar 2021 15:51:03 +0800 Subject: soundwire: add override addr ops Platform firmware may have incorrect _ADR values causing the driver probes to fail. Add the override_ops, which when configured will allow for quirks based on DMI etc to override the addr values. Co-developed-by: Bard Liao Signed-off-by: Bard Liao Signed-off-by: Vinod Koul Reviewed-by: Rander Wang Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20210302075105.11515-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index d08039d65825..f0a3895e8faf 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -804,6 +804,7 @@ struct sdw_defer { /** * struct sdw_master_ops - Master driver ops * @read_prop: Read Master properties + * @override_adr: Override value read from firmware (quirk for buggy firmware) * @xfer_msg: Transfer message callback * @xfer_msg_defer: Defer version of transfer message callback * @reset_page_addr: Reset the SCP page address registers @@ -813,7 +814,8 @@ struct sdw_defer { */ struct sdw_master_ops { int (*read_prop)(struct sdw_bus *bus); - + u64 (*override_adr) + (struct sdw_bus *bus, u64 addr); enum sdw_command_response (*xfer_msg) (struct sdw_bus *bus, struct sdw_msg *msg); enum sdw_command_response (*xfer_msg_defer) -- cgit v1.2.3 From 5bb643c39b97c440b2981c69f05596c7ea868b73 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 2 Mar 2021 16:27:18 +0800 Subject: soundwire: add master quirks for bus clash and parity Currently quirks are only allowed for Slave devices. This patch describes the need for two quirks at the Master level. a) bus clash The SoundWire specification allows a Slave device to report a bus clash with the in-band interrupt mechanism when it detects a conflict while driving a bitSlot it owns. This can be a symptom of an electrical conflict or a programming error, and it's vital to detect reliably. Unfortunately, on some platforms, bus clashes are randomly reported by Slave devices after a bus reset, with an interrupt status set even before the bus clash interrupt is enabled. These initial spurious interrupts are not relevant and should optionally be filtered out, while leaving the interrupt mechanism enabled to detect 'true' issues. This patch suggests the addition of a Master level quirk to discard such interrupts. The quirk should in theory have been added at the Slave level, but since the problem was detected with different generations of Slave devices it's hard to point to a specific IP. The problem might also be board-dependent and hence dealing with a Master quirk is simpler. b) parity Additional tests on a new platform with the Maxim 98373 amplifier showed a rare case where the parity interrupt is also thrown on startup, at the same time as bus clashes. This issue only seems to happen infrequently and was only observed during suspend-resume stress tests while audio is streaming. We could make the problem go away by adding a Slave-level quirk, but there is no evidence that the issue is actually a Slave problem: the parity is provided by the Master, which could also set an invalid parity in corner cases. BugLink: https://github.com/thesofproject/linux/issues/2578 BugLink: https://github.com/thesofproject/linux/issues/2533 Co-developed-by: Pierre-Louis Bossart Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20210302082720.12322-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index f0a3895e8faf..eaa1486bdca9 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -405,6 +405,7 @@ struct sdw_slave_prop { * command * @mclk_freq: clock reference passed to SoundWire Master, in Hz. * @hw_disabled: if true, the Master is not functional, typically due to pin-mux + * @quirks: bitmask identifying optional behavior beyond the scope of the MIPI specification */ struct sdw_master_prop { u32 revision; @@ -421,8 +422,29 @@ struct sdw_master_prop { u32 err_threshold; u32 mclk_freq; bool hw_disabled; + u64 quirks; }; +/* Definitions for Master quirks */ + +/* + * In a number of platforms bus clashes are reported after a hardware + * reset but without any explanations or evidence of a real problem. + * The following quirk will discard all initial bus clash interrupts + * but will leave the detection on should real bus clashes happen + */ +#define SDW_MASTER_QUIRKS_CLEAR_INITIAL_CLASH BIT(0) + +/* + * Some Slave devices have known issues with incorrect parity errors + * reported after a hardware reset. However during integration unexplained + * parity errors can be reported by Slave devices, possibly due to electrical + * issues at the Master level. + * The following quirk will discard all initial parity errors but will leave + * the detection on should real parity errors happen. + */ +#define SDW_MASTER_QUIRKS_CLEAR_INITIAL_PARITY BIT(1) + int sdw_master_read_prop(struct sdw_bus *bus); int sdw_slave_read_prop(struct sdw_slave *slave); -- cgit v1.2.3 From 458025f6c16188a9f28219578448fac24b2cd487 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Mar 2021 11:53:00 +0100 Subject: vgaarb: avoid -Wempty-body warnings Building with W=1 shows a few warnings for an empty macro: drivers/gpu/drm/qxl/qxl_drv.c: In function 'qxl_pci_probe': drivers/gpu/drm/qxl/qxl_drv.c:131:50: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body] 131 | vga_put(pdev, VGA_RSRC_LEGACY_IO); | ^ drivers/gpu/drm/qxl/qxl_drv.c: In function 'qxl_pci_remove': drivers/gpu/drm/qxl/qxl_drv.c:159:50: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body] 159 | vga_put(pdev, VGA_RSRC_LEGACY_IO); Change this to an inline function to make it more robust and avoid the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210322105307.1291840-2-arnd@kernel.org --- include/linux/vgaarb.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index fc6dfeba04a5..dc6ddce92066 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -112,7 +112,9 @@ static inline int vga_get_uninterruptible(struct pci_dev *pdev, #if defined(CONFIG_VGA_ARB) extern void vga_put(struct pci_dev *pdev, unsigned int rsrc); #else -#define vga_put(pdev, rsrc) +static inline void vga_put(struct pci_dev *pdev, unsigned int rsrc) +{ +} #endif -- cgit v1.2.3 From bddfdbcddbe267519cd36aeb115fdf8620980111 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Oct 2020 15:53:42 -0400 Subject: NFSD: Extract the svcxdr_init_encode() helper NFSD initializes an encode xdr_stream only after the RPC layer has already inserted the RPC Reply header. Thus it behaves differently than xdr_init_encode does, which assumes the passed-in xdr_buf is entirely devoid of content. nfs4proc.c has this server-side stream initialization helper, but it is visible only to the NFSv4 code. Move this helper to a place that can be accessed by NFSv2 and NFSv3 server XDR functions. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 31ee3b6047c3..e91d51ea028b 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -248,6 +248,7 @@ struct svc_rqst { size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; struct xdr_stream rq_arg_stream; + struct xdr_stream rq_res_stream; struct page *rq_scratch_page; struct xdr_buf rq_res; struct page *rq_pages[RPCSVC_MAXPAGES + 1]; @@ -574,4 +575,28 @@ static inline void svcxdr_init_decode(struct svc_rqst *rqstp) xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); } +/** + * svcxdr_init_encode - Prepare an xdr_stream for svc Reply encoding + * @rqstp: controlling server RPC transaction context + * + */ +static inline void svcxdr_init_encode(struct svc_rqst *rqstp) +{ + struct xdr_stream *xdr = &rqstp->rq_res_stream; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *resv = buf->head; + + xdr_reset_scratch_buffer(xdr); + + xdr->buf = buf; + xdr->iov = resv; + xdr->p = resv->iov_base + resv->iov_len; + xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; + buf->len = resv->iov_len; + xdr->page_ptr = buf->pages - 1; + buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages); + buf->buflen -= rqstp->rq_auth_slack; + xdr->rqst = NULL; +} + #endif /* SUNRPC_SVC_H */ -- cgit v1.2.3 From cc9bcdad7773c295375e66c892c7ac00524706f2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 22 Oct 2020 15:23:50 -0400 Subject: NFSD: Update the NFSv3 READ3res encode to use struct xdr_stream Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 2bc75c167f00..9dda7171b7b4 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -394,6 +394,26 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) return len; } +/** + * xdr_stream_encode_bool - Encode a "not present" list item + * @xdr: pointer to xdr_stream + * @n: boolean value to encode + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline int xdr_stream_encode_bool(struct xdr_stream *xdr, __u32 n) +{ + const size_t len = XDR_UNIT; + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + *p = n ? xdr_one : xdr_zero; + return len; +} + /** * xdr_stream_encode_u32 - Encode a 32-bit integer * @xdr: pointer to xdr_stream -- cgit v1.2.3 From ded04a587f6ceaaba3caefad4021f2212b46c9ff Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 6 Nov 2020 13:15:09 -0500 Subject: NFSD: Update the NFSv3 PATHCONF3res encoder to use struct xdr_stream Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 9dda7171b7b4..a965cbc136ad 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -395,7 +395,21 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) } /** - * xdr_stream_encode_bool - Encode a "not present" list item + * xdr_encode_bool - Encode a boolean item + * @p: address in a buffer into which to encode + * @n: boolean value to encode + * + * Return value: + * Address of item following the encoded boolean + */ +static inline __be32 *xdr_encode_bool(__be32 *p, u32 n) +{ + *p = n ? xdr_one : xdr_zero; + return p++; +} + +/** + * xdr_stream_encode_bool - Encode a boolean item * @xdr: pointer to xdr_stream * @n: boolean value to encode * @@ -410,7 +424,7 @@ static inline int xdr_stream_encode_bool(struct xdr_stream *xdr, __u32 n) if (unlikely(!p)) return -EMSGSIZE; - *p = n ? xdr_one : xdr_zero; + xdr_encode_bool(p, n); return len; } -- cgit v1.2.3 From 8edc0648880a151026fe625fa1b76772b5766f68 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Nov 2020 14:55:05 -0500 Subject: NFSD: Add an xdr_stream-based encoder for NFSv2/3 ACLs Signed-off-by: Chuck Lever --- include/linux/nfsacl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h index 0ba99c513649..8e76a79cdc6a 100644 --- a/include/linux/nfsacl.h +++ b/include/linux/nfsacl.h @@ -41,5 +41,8 @@ nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, extern bool nfs_stream_decode_acl(struct xdr_stream *xdr, unsigned int *aclcnt, struct posix_acl **pacl); +extern bool +nfs_stream_encode_acl(struct xdr_stream *xdr, struct inode *inode, + struct posix_acl *acl, int encode_entries, int typeflag); #endif /* __LINUX_NFSACL_H */ -- cgit v1.2.3 From 417c0fc24dd4dbd60d94fa8deb36bf1176930e06 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 12 Mar 2021 09:36:02 +0100 Subject: mfd/power: ab8500: Push data to power supply code The global definition of platform data for the battery management code has no utility after the OF conversion, move the to be a local file in drivers/power/supply and stop defining the platform data in drivers/power/supply/ab8500_bmdata.c and broadcast to the kernel only to have it assigned as platform data to the MFD cells and then picked back into the same subsystem that defined it in the first place. This kills off a layer of indirection. Signed-off-by: Linus Walleij Acked-by: Sebastian Reichel Signed-off-by: Lee Jones --- include/linux/mfd/abx500/ab8500-bm.h | 476 ----------------------------------- 1 file changed, 476 deletions(-) delete mode 100644 include/linux/mfd/abx500/ab8500-bm.h (limited to 'include/linux') diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h deleted file mode 100644 index 903e94c189d8..000000000000 --- a/include/linux/mfd/abx500/ab8500-bm.h +++ /dev/null @@ -1,476 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright ST-Ericsson 2012. - * - * Author: Arun Murthy - */ - -#ifndef _AB8500_BM_H -#define _AB8500_BM_H - -#include -#include - -/* - * System control 2 register offsets. - * bank = 0x02 - */ -#define AB8500_MAIN_WDOG_CTRL_REG 0x01 -#define AB8500_LOW_BAT_REG 0x03 -#define AB8500_BATT_OK_REG 0x04 -/* - * USB/ULPI register offsets - * Bank : 0x5 - */ -#define AB8500_USB_LINE_STAT_REG 0x80 -#define AB8500_USB_LINE_CTRL2_REG 0x82 -#define AB8500_USB_LINK1_STAT_REG 0x94 - -/* - * Charger / status register offfsets - * Bank : 0x0B - */ -#define AB8500_CH_STATUS1_REG 0x00 -#define AB8500_CH_STATUS2_REG 0x01 -#define AB8500_CH_USBCH_STAT1_REG 0x02 -#define AB8500_CH_USBCH_STAT2_REG 0x03 -#define AB8540_CH_USBCH_STAT3_REG 0x04 -#define AB8500_CH_STAT_REG 0x05 - -/* - * Charger / control register offfsets - * Bank : 0x0B - */ -#define AB8500_CH_VOLT_LVL_REG 0x40 -#define AB8500_CH_VOLT_LVL_MAX_REG 0x41 /*Only in Cut2.0*/ -#define AB8500_CH_OPT_CRNTLVL_REG 0x42 -#define AB8500_CH_OPT_CRNTLVL_MAX_REG 0x43 /*Only in Cut2.0*/ -#define AB8500_CH_WD_TIMER_REG 0x50 -#define AB8500_CHARG_WD_CTRL 0x51 -#define AB8500_BTEMP_HIGH_TH 0x52 -#define AB8500_LED_INDICATOR_PWM_CTRL 0x53 -#define AB8500_LED_INDICATOR_PWM_DUTY 0x54 -#define AB8500_BATT_OVV 0x55 -#define AB8500_CHARGER_CTRL 0x56 -#define AB8500_BAT_CTRL_CURRENT_SOURCE 0x60 /*Only in Cut2.0*/ - -/* - * Charger / main control register offsets - * Bank : 0x0B - */ -#define AB8500_MCH_CTRL1 0x80 -#define AB8500_MCH_CTRL2 0x81 -#define AB8500_MCH_IPT_CURLVL_REG 0x82 -#define AB8500_CH_WD_REG 0x83 - -/* - * Charger / USB control register offsets - * Bank : 0x0B - */ -#define AB8500_USBCH_CTRL1_REG 0xC0 -#define AB8500_USBCH_CTRL2_REG 0xC1 -#define AB8500_USBCH_IPT_CRNTLVL_REG 0xC2 -#define AB8540_USB_PP_MODE_REG 0xC5 -#define AB8540_USB_PP_CHR_REG 0xC6 - -/* - * Gas Gauge register offsets - * Bank : 0x0C - */ -#define AB8500_GASG_CC_CTRL_REG 0x00 -#define AB8500_GASG_CC_ACCU1_REG 0x01 -#define AB8500_GASG_CC_ACCU2_REG 0x02 -#define AB8500_GASG_CC_ACCU3_REG 0x03 -#define AB8500_GASG_CC_ACCU4_REG 0x04 -#define AB8500_GASG_CC_SMPL_CNTRL_REG 0x05 -#define AB8500_GASG_CC_SMPL_CNTRH_REG 0x06 -#define AB8500_GASG_CC_SMPL_CNVL_REG 0x07 -#define AB8500_GASG_CC_SMPL_CNVH_REG 0x08 -#define AB8500_GASG_CC_CNTR_AVGOFF_REG 0x09 -#define AB8500_GASG_CC_OFFSET_REG 0x0A -#define AB8500_GASG_CC_NCOV_ACCU 0x10 -#define AB8500_GASG_CC_NCOV_ACCU_CTRL 0x11 -#define AB8500_GASG_CC_NCOV_ACCU_LOW 0x12 -#define AB8500_GASG_CC_NCOV_ACCU_MED 0x13 -#define AB8500_GASG_CC_NCOV_ACCU_HIGH 0x14 - -/* - * Interrupt register offsets - * Bank : 0x0E - */ -#define AB8500_IT_SOURCE2_REG 0x01 -#define AB8500_IT_SOURCE21_REG 0x14 - -/* - * RTC register offsets - * Bank: 0x0F - */ -#define AB8500_RTC_BACKUP_CHG_REG 0x0C -#define AB8500_RTC_CC_CONF_REG 0x01 -#define AB8500_RTC_CTRL_REG 0x0B -#define AB8500_RTC_CTRL1_REG 0x11 - -/* - * OTP register offsets - * Bank : 0x15 - */ -#define AB8500_OTP_CONF_15 0x0E - -/* GPADC constants from AB8500 spec, UM0836 */ -#define ADC_RESOLUTION 1024 -#define ADC_CH_MAIN_MIN 0 -#define ADC_CH_MAIN_MAX 20030 -#define ADC_CH_VBUS_MIN 0 -#define ADC_CH_VBUS_MAX 20030 -#define ADC_CH_VBAT_MIN 2300 -#define ADC_CH_VBAT_MAX 4800 -#define ADC_CH_BKBAT_MIN 0 -#define ADC_CH_BKBAT_MAX 3200 - -/* Main charge i/p current */ -#define MAIN_CH_IP_CUR_0P9A 0x80 -#define MAIN_CH_IP_CUR_1P0A 0x90 -#define MAIN_CH_IP_CUR_1P1A 0xA0 -#define MAIN_CH_IP_CUR_1P2A 0xB0 -#define MAIN_CH_IP_CUR_1P3A 0xC0 -#define MAIN_CH_IP_CUR_1P4A 0xD0 -#define MAIN_CH_IP_CUR_1P5A 0xE0 - -/* ChVoltLevel */ -#define CH_VOL_LVL_3P5 0x00 -#define CH_VOL_LVL_4P0 0x14 -#define CH_VOL_LVL_4P05 0x16 -#define CH_VOL_LVL_4P1 0x1B -#define CH_VOL_LVL_4P15 0x20 -#define CH_VOL_LVL_4P2 0x25 -#define CH_VOL_LVL_4P6 0x4D - -/* ChOutputCurrentLevel */ -#define CH_OP_CUR_LVL_0P1 0x00 -#define CH_OP_CUR_LVL_0P2 0x01 -#define CH_OP_CUR_LVL_0P3 0x02 -#define CH_OP_CUR_LVL_0P4 0x03 -#define CH_OP_CUR_LVL_0P5 0x04 -#define CH_OP_CUR_LVL_0P6 0x05 -#define CH_OP_CUR_LVL_0P7 0x06 -#define CH_OP_CUR_LVL_0P8 0x07 -#define CH_OP_CUR_LVL_0P9 0x08 -#define CH_OP_CUR_LVL_1P4 0x0D -#define CH_OP_CUR_LVL_1P5 0x0E -#define CH_OP_CUR_LVL_1P6 0x0F -#define CH_OP_CUR_LVL_2P 0x3F - -/* BTEMP High thermal limits */ -#define BTEMP_HIGH_TH_57_0 0x00 -#define BTEMP_HIGH_TH_52 0x01 -#define BTEMP_HIGH_TH_57_1 0x02 -#define BTEMP_HIGH_TH_62 0x03 - -/* current is mA */ -#define USB_0P1A 100 -#define USB_0P2A 200 -#define USB_0P3A 300 -#define USB_0P4A 400 -#define USB_0P5A 500 - -#define LOW_BAT_3P1V 0x20 -#define LOW_BAT_2P3V 0x00 -#define LOW_BAT_RESET 0x01 -#define LOW_BAT_ENABLE 0x01 - -/* Backup battery constants */ -#define BUP_ICH_SEL_50UA 0x00 -#define BUP_ICH_SEL_150UA 0x04 -#define BUP_ICH_SEL_300UA 0x08 -#define BUP_ICH_SEL_700UA 0x0C - -enum bup_vch_sel { - BUP_VCH_SEL_2P5V, - BUP_VCH_SEL_2P6V, - BUP_VCH_SEL_2P8V, - BUP_VCH_SEL_3P1V, - /* - * Note that the following 5 values 2.7v, 2.9v, 3.0v, 3.2v, 3.3v - * are only available on ab8540. You can't choose these 5 - * voltage on ab8500/ab8505/ab9540. - */ - BUP_VCH_SEL_2P7V, - BUP_VCH_SEL_2P9V, - BUP_VCH_SEL_3P0V, - BUP_VCH_SEL_3P2V, - BUP_VCH_SEL_3P3V, -}; - -#define BUP_VCH_RANGE 0x02 -#define VBUP33_VRTCN 0x01 - -/* Battery OVV constants */ -#define BATT_OVV_ENA 0x02 -#define BATT_OVV_TH_3P7 0x00 -#define BATT_OVV_TH_4P75 0x01 - -/* A value to indicate over voltage */ -#define BATT_OVV_VALUE 4750 - -/* VBUS OVV constants */ -#define VBUS_OVV_SELECT_MASK 0x78 -#define VBUS_OVV_SELECT_5P6V 0x00 -#define VBUS_OVV_SELECT_5P7V 0x08 -#define VBUS_OVV_SELECT_5P8V 0x10 -#define VBUS_OVV_SELECT_5P9V 0x18 -#define VBUS_OVV_SELECT_6P0V 0x20 -#define VBUS_OVV_SELECT_6P1V 0x28 -#define VBUS_OVV_SELECT_6P2V 0x30 -#define VBUS_OVV_SELECT_6P3V 0x38 - -#define VBUS_AUTO_IN_CURR_LIM_ENA 0x04 - -/* Fuel Gauge constants */ -#define RESET_ACCU 0x02 -#define READ_REQ 0x01 -#define CC_DEEP_SLEEP_ENA 0x02 -#define CC_PWR_UP_ENA 0x01 -#define CC_SAMPLES_40 0x28 -#define RD_NCONV_ACCU_REQ 0x01 -#define CC_CALIB 0x08 -#define CC_INTAVGOFFSET_ENA 0x10 -#define CC_MUXOFFSET 0x80 -#define CC_INT_CAL_N_AVG_MASK 0x60 -#define CC_INT_CAL_SAMPLES_16 0x40 -#define CC_INT_CAL_SAMPLES_8 0x20 -#define CC_INT_CAL_SAMPLES_4 0x00 - -/* RTC constants */ -#define RTC_BUP_CH_ENA 0x10 - -/* BatCtrl Current Source Constants */ -#define BAT_CTRL_7U_ENA 0x01 -#define BAT_CTRL_20U_ENA 0x02 -#define BAT_CTRL_18U_ENA 0x01 -#define BAT_CTRL_16U_ENA 0x02 -#define BAT_CTRL_CMP_ENA 0x04 -#define FORCE_BAT_CTRL_CMP_HIGH 0x08 -#define BAT_CTRL_PULL_UP_ENA 0x10 - -/* Battery type */ -#define BATTERY_UNKNOWN 00 - -/* Registers for pcut feature in ab8505 and ab9540 */ -#define AB8505_RTC_PCUT_CTL_STATUS_REG 0x12 -#define AB8505_RTC_PCUT_TIME_REG 0x13 -#define AB8505_RTC_PCUT_MAX_TIME_REG 0x14 -#define AB8505_RTC_PCUT_FLAG_TIME_REG 0x15 -#define AB8505_RTC_PCUT_RESTART_REG 0x16 -#define AB8505_RTC_PCUT_DEBOUNCE_REG 0x17 - -/* USB Power Path constants for ab8540 */ -#define BUS_VSYS_VOL_SELECT_MASK 0x06 -#define BUS_VSYS_VOL_SELECT_3P6V 0x00 -#define BUS_VSYS_VOL_SELECT_3P325V 0x02 -#define BUS_VSYS_VOL_SELECT_3P9V 0x04 -#define BUS_VSYS_VOL_SELECT_4P3V 0x06 -#define BUS_POWER_PATH_MODE_ENA 0x01 -#define BUS_PP_PRECHG_CURRENT_MASK 0x0E -#define BUS_POWER_PATH_PRECHG_ENA 0x01 - -/** - * struct res_to_temp - defines one point in a temp to res curve. To - * be used in battery packs that combines the identification resistor with a - * NTC resistor. - * @temp: battery pack temperature in Celsius - * @resist: NTC resistor net total resistance - */ -struct res_to_temp { - int temp; - int resist; -}; - -/** - * struct batres_vs_temp - defines one point in a temp vs battery internal - * resistance curve. - * @temp: battery pack temperature in Celsius - * @resist: battery internal reistance in mOhm - */ -struct batres_vs_temp { - int temp; - int resist; -}; - -/* Forward declaration */ -struct ab8500_fg; - -/** - * struct ab8500_fg_parameters - Fuel gauge algorithm parameters, in seconds - * if not specified - * @recovery_sleep_timer: Time between measurements while recovering - * @recovery_total_time: Total recovery time - * @init_timer: Measurement interval during startup - * @init_discard_time: Time we discard voltage measurement at startup - * @init_total_time: Total init time during startup - * @high_curr_time: Time current has to be high to go to recovery - * @accu_charging: FG accumulation time while charging - * @accu_high_curr: FG accumulation time in high current mode - * @high_curr_threshold: High current threshold, in mA - * @lowbat_threshold: Low battery threshold, in mV - * @battok_falling_th_sel0 Threshold in mV for battOk signal sel0 - * Resolution in 50 mV step. - * @battok_raising_th_sel1 Threshold in mV for battOk signal sel1 - * Resolution in 50 mV step. - * @user_cap_limit Capacity reported from user must be within this - * limit to be considered as sane, in percentage - * points. - * @maint_thres This is the threshold where we stop reporting - * battery full while in maintenance, in per cent - * @pcut_enable: Enable power cut feature in ab8505 - * @pcut_max_time: Max time threshold - * @pcut_flag_time: Flagtime threshold - * @pcut_max_restart: Max number of restarts - * @pcut_debunce_time: Sets battery debounce time - */ -struct ab8500_fg_parameters { - int recovery_sleep_timer; - int recovery_total_time; - int init_timer; - int init_discard_time; - int init_total_time; - int high_curr_time; - int accu_charging; - int accu_high_curr; - int high_curr_threshold; - int lowbat_threshold; - int battok_falling_th_sel0; - int battok_raising_th_sel1; - int user_cap_limit; - int maint_thres; - bool pcut_enable; - u8 pcut_max_time; - u8 pcut_flag_time; - u8 pcut_max_restart; - u8 pcut_debunce_time; -}; - -/** - * struct ab8500_charger_maximization - struct used by the board config. - * @use_maxi: Enable maximization for this battery type - * @maxi_chg_curr: Maximum charger current allowed - * @maxi_wait_cycles: cycles to wait before setting charger current - * @charger_curr_step delta between two charger current settings (mA) - */ -struct ab8500_maxim_parameters { - bool ena_maxi; - int chg_curr; - int wait_cycles; - int charger_curr_step; -}; - -/** - * struct ab8500_bm_capacity_levels - ab8500 capacity level data - * @critical: critical capacity level in percent - * @low: low capacity level in percent - * @normal: normal capacity level in percent - * @high: high capacity level in percent - * @full: full capacity level in percent - */ -struct ab8500_bm_capacity_levels { - int critical; - int low; - int normal; - int high; - int full; -}; - -/** - * struct ab8500_bm_charger_parameters - Charger specific parameters - * @usb_volt_max: maximum allowed USB charger voltage in mV - * @usb_curr_max: maximum allowed USB charger current in mA - * @ac_volt_max: maximum allowed AC charger voltage in mV - * @ac_curr_max: maximum allowed AC charger current in mA - */ -struct ab8500_bm_charger_parameters { - int usb_volt_max; - int usb_curr_max; - int ac_volt_max; - int ac_curr_max; -}; - -/** - * struct ab8500_bm_data - ab8500 battery management data - * @temp_under under this temp, charging is stopped - * @temp_low between this temp and temp_under charging is reduced - * @temp_high between this temp and temp_over charging is reduced - * @temp_over over this temp, charging is stopped - * @temp_interval_chg temperature measurement interval in s when charging - * @temp_interval_nochg temperature measurement interval in s when not charging - * @main_safety_tmr_h safety timer for main charger - * @usb_safety_tmr_h safety timer for usb charger - * @bkup_bat_v voltage which we charge the backup battery with - * @bkup_bat_i current which we charge the backup battery with - * @no_maintenance indicates that maintenance charging is disabled - * @capacity_scaling indicates whether capacity scaling is to be used - * @adc_therm placement of thermistor, batctrl or battemp adc - * @chg_unknown_bat flag to enable charging of unknown batteries - * @enable_overshoot flag to enable VBAT overshoot control - * @fg_res resistance of FG resistor in 0.1mOhm - * @n_btypes number of elements in array bat_type - * @batt_id index of the identified battery in array bat_type - * @interval_charging charge alg cycle period time when charging (sec) - * @interval_not_charging charge alg cycle period time when not charging (sec) - * @temp_hysteresis temperature hysteresis - * @gnd_lift_resistance Battery ground to phone ground resistance (mOhm) - * @maxi: maximization parameters - * @cap_levels capacity in percent for the different capacity levels - * @bat_type table of supported battery types - * @chg_params charger parameters - * @fg_params fuel gauge parameters - */ -struct ab8500_bm_data { - int temp_under; - int temp_low; - int temp_high; - int temp_over; - int temp_interval_chg; - int temp_interval_nochg; - int main_safety_tmr_h; - int usb_safety_tmr_h; - int bkup_bat_v; - int bkup_bat_i; - bool no_maintenance; - bool capacity_scaling; - bool chg_unknown_bat; - bool enable_overshoot; - enum abx500_adc_therm adc_therm; - int fg_res; - int n_btypes; - int batt_id; - int interval_charging; - int interval_not_charging; - int temp_hysteresis; - int gnd_lift_resistance; - const struct ab8500_maxim_parameters *maxi; - const struct ab8500_bm_capacity_levels *cap_levels; - const struct ab8500_bm_charger_parameters *chg_params; - const struct ab8500_fg_parameters *fg_params; -}; - -struct ab8500_btemp; -struct ab8500_gpadc; -struct ab8500_fg; - -#ifdef CONFIG_AB8500_BM -extern struct abx500_bm_data ab8500_bm_data; - -void ab8500_charger_usb_state_changed(u8 bm_usb_state, u16 mA); -struct ab8500_btemp *ab8500_btemp_get(void); -int ab8500_btemp_get_batctrl_temp(struct ab8500_btemp *btemp); -int ab8500_btemp_get_temp(struct ab8500_btemp *btemp); -struct ab8500_fg *ab8500_fg_get(void); -int ab8500_fg_inst_curr_blocking(struct ab8500_fg *dev); -int ab8500_fg_inst_curr_start(struct ab8500_fg *di); -int ab8500_fg_inst_curr_finalize(struct ab8500_fg *di, int *res); -int ab8500_fg_inst_curr_started(struct ab8500_fg *di); -int ab8500_fg_inst_curr_done(struct ab8500_fg *di); - -#else -static struct abx500_bm_data ab8500_bm_data; -#endif -#endif /* _AB8500_BM_H */ -- cgit v1.2.3 From a65aa0ce23bd3bc29c9f3a6a90fe032e1dbd8f34 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 12 Mar 2021 09:36:03 +0100 Subject: mfd/power: ab8500: Push algorithm to power supply code The charging algorithm header is only used locally in the power supply subsystem so push this down into drivers/power/supply and rename from the confusing "ux500_chargalg.h" to "ab8500-chargalg.h" for clarity: it is only used with the AB8500. This is another remnant of non-DT code needing to pass data from boardfiles, which we don't do anymore. Signed-off-by: Linus Walleij Acked-by: Sebastian Reichel Signed-off-by: Lee Jones --- include/linux/mfd/abx500/ux500_chargalg.h | 51 ------------------------------- 1 file changed, 51 deletions(-) delete mode 100644 include/linux/mfd/abx500/ux500_chargalg.h (limited to 'include/linux') diff --git a/include/linux/mfd/abx500/ux500_chargalg.h b/include/linux/mfd/abx500/ux500_chargalg.h deleted file mode 100644 index 9b97d284d0ce..000000000000 --- a/include/linux/mfd/abx500/ux500_chargalg.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson SA 2012 - * Author: Johan Gardsmark for ST-Ericsson. - */ - -#ifndef _UX500_CHARGALG_H -#define _UX500_CHARGALG_H - -#include - -/* - * Valid only for supplies of type: - * - POWER_SUPPLY_TYPE_MAINS, - * - POWER_SUPPLY_TYPE_USB, - * because only them store as drv_data pointer to struct ux500_charger. - */ -#define psy_to_ux500_charger(x) power_supply_get_drvdata(psy) - -/* Forward declaration */ -struct ux500_charger; - -struct ux500_charger_ops { - int (*enable) (struct ux500_charger *, int, int, int); - int (*check_enable) (struct ux500_charger *, int, int); - int (*kick_wd) (struct ux500_charger *); - int (*update_curr) (struct ux500_charger *, int); -}; - -/** - * struct ux500_charger - power supply ux500 charger sub class - * @psy power supply base class - * @ops ux500 charger operations - * @max_out_volt maximum output charger voltage in mV - * @max_out_curr maximum output charger current in mA - * @enabled indicates if this charger is used or not - * @external external charger unit (pm2xxx) - */ -struct ux500_charger { - struct power_supply *psy; - struct ux500_charger_ops ops; - int max_out_volt; - int max_out_curr; - int wdt_refresh; - bool enabled; - bool external; -}; - -extern struct blocking_notifier_head charger_notifier_list; - -#endif -- cgit v1.2.3 From ee0975c3089e1c3357ccc3ada7a94a95b61e708c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 12 Mar 2021 09:36:04 +0100 Subject: mfd/power: ab8500: Push data to power supply code There is a slew of defines, structs and enums and even a function call only relevant for the charging code that still lives in . Push it down to the "ab8500-bm.h" header in the power supply subsystem where it is actually used. Signed-off-by: Linus Walleij Acked-by: Sebastian Reichel Signed-off-by: Lee Jones --- include/linux/mfd/abx500.h | 276 --------------------------------------------- 1 file changed, 276 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h index 23040b6f1615..7f07cfe44753 100644 --- a/include/linux/mfd/abx500.h +++ b/include/linux/mfd/abx500.h @@ -28,282 +28,6 @@ struct abx500_init_settings { u8 setting; }; -/* Battery driver related data */ -/* - * ADC for the battery thermistor. - * When using the ABx500_ADC_THERM_BATCTRL the battery ID resistor is combined - * with a NTC resistor to both identify the battery and to measure its - * temperature. Different phone manufactures uses different techniques to both - * identify the battery and to read its temperature. - */ -enum abx500_adc_therm { - ABx500_ADC_THERM_BATCTRL, - ABx500_ADC_THERM_BATTEMP, -}; - -/** - * struct abx500_res_to_temp - defines one point in a temp to res curve. To - * be used in battery packs that combines the identification resistor with a - * NTC resistor. - * @temp: battery pack temperature in Celsius - * @resist: NTC resistor net total resistance - */ -struct abx500_res_to_temp { - int temp; - int resist; -}; - -/** - * struct abx500_v_to_cap - Table for translating voltage to capacity - * @voltage: Voltage in mV - * @capacity: Capacity in percent - */ -struct abx500_v_to_cap { - int voltage; - int capacity; -}; - -/* Forward declaration */ -struct abx500_fg; - -/** - * struct abx500_fg_parameters - Fuel gauge algorithm parameters, in seconds - * if not specified - * @recovery_sleep_timer: Time between measurements while recovering - * @recovery_total_time: Total recovery time - * @init_timer: Measurement interval during startup - * @init_discard_time: Time we discard voltage measurement at startup - * @init_total_time: Total init time during startup - * @high_curr_time: Time current has to be high to go to recovery - * @accu_charging: FG accumulation time while charging - * @accu_high_curr: FG accumulation time in high current mode - * @high_curr_threshold: High current threshold, in mA - * @lowbat_threshold: Low battery threshold, in mV - * @overbat_threshold: Over battery threshold, in mV - * @battok_falling_th_sel0 Threshold in mV for battOk signal sel0 - * Resolution in 50 mV step. - * @battok_raising_th_sel1 Threshold in mV for battOk signal sel1 - * Resolution in 50 mV step. - * @user_cap_limit Capacity reported from user must be within this - * limit to be considered as sane, in percentage - * points. - * @maint_thres This is the threshold where we stop reporting - * battery full while in maintenance, in per cent - * @pcut_enable: Enable power cut feature in ab8505 - * @pcut_max_time: Max time threshold - * @pcut_flag_time: Flagtime threshold - * @pcut_max_restart: Max number of restarts - * @pcut_debounce_time: Sets battery debounce time - */ -struct abx500_fg_parameters { - int recovery_sleep_timer; - int recovery_total_time; - int init_timer; - int init_discard_time; - int init_total_time; - int high_curr_time; - int accu_charging; - int accu_high_curr; - int high_curr_threshold; - int lowbat_threshold; - int overbat_threshold; - int battok_falling_th_sel0; - int battok_raising_th_sel1; - int user_cap_limit; - int maint_thres; - bool pcut_enable; - u8 pcut_max_time; - u8 pcut_flag_time; - u8 pcut_max_restart; - u8 pcut_debounce_time; -}; - -/** - * struct abx500_charger_maximization - struct used by the board config. - * @use_maxi: Enable maximization for this battery type - * @maxi_chg_curr: Maximum charger current allowed - * @maxi_wait_cycles: cycles to wait before setting charger current - * @charger_curr_step delta between two charger current settings (mA) - */ -struct abx500_maxim_parameters { - bool ena_maxi; - int chg_curr; - int wait_cycles; - int charger_curr_step; -}; - -/** - * struct abx500_battery_type - different batteries supported - * @name: battery technology - * @resis_high: battery upper resistance limit - * @resis_low: battery lower resistance limit - * @charge_full_design: Maximum battery capacity in mAh - * @nominal_voltage: Nominal voltage of the battery in mV - * @termination_vol: max voltage upto which battery can be charged - * @termination_curr battery charging termination current in mA - * @recharge_cap battery capacity limit that will trigger a new - * full charging cycle in the case where maintenan- - * -ce charging has been disabled - * @normal_cur_lvl: charger current in normal state in mA - * @normal_vol_lvl: charger voltage in normal state in mV - * @maint_a_cur_lvl: charger current in maintenance A state in mA - * @maint_a_vol_lvl: charger voltage in maintenance A state in mV - * @maint_a_chg_timer_h: charge time in maintenance A state - * @maint_b_cur_lvl: charger current in maintenance B state in mA - * @maint_b_vol_lvl: charger voltage in maintenance B state in mV - * @maint_b_chg_timer_h: charge time in maintenance B state - * @low_high_cur_lvl: charger current in temp low/high state in mA - * @low_high_vol_lvl: charger voltage in temp low/high state in mV' - * @battery_resistance: battery inner resistance in mOhm. - * @n_r_t_tbl_elements: number of elements in r_to_t_tbl - * @r_to_t_tbl: table containing resistance to temp points - * @n_v_cap_tbl_elements: number of elements in v_to_cap_tbl - * @v_to_cap_tbl: Voltage to capacity (in %) table - * @n_batres_tbl_elements number of elements in the batres_tbl - * @batres_tbl battery internal resistance vs temperature table - */ -struct abx500_battery_type { - int name; - int resis_high; - int resis_low; - int charge_full_design; - int nominal_voltage; - int termination_vol; - int termination_curr; - int recharge_cap; - int normal_cur_lvl; - int normal_vol_lvl; - int maint_a_cur_lvl; - int maint_a_vol_lvl; - int maint_a_chg_timer_h; - int maint_b_cur_lvl; - int maint_b_vol_lvl; - int maint_b_chg_timer_h; - int low_high_cur_lvl; - int low_high_vol_lvl; - int battery_resistance; - int n_temp_tbl_elements; - const struct abx500_res_to_temp *r_to_t_tbl; - int n_v_cap_tbl_elements; - const struct abx500_v_to_cap *v_to_cap_tbl; - int n_batres_tbl_elements; - const struct batres_vs_temp *batres_tbl; -}; - -/** - * struct abx500_bm_capacity_levels - abx500 capacity level data - * @critical: critical capacity level in percent - * @low: low capacity level in percent - * @normal: normal capacity level in percent - * @high: high capacity level in percent - * @full: full capacity level in percent - */ -struct abx500_bm_capacity_levels { - int critical; - int low; - int normal; - int high; - int full; -}; - -/** - * struct abx500_bm_charger_parameters - Charger specific parameters - * @usb_volt_max: maximum allowed USB charger voltage in mV - * @usb_curr_max: maximum allowed USB charger current in mA - * @ac_volt_max: maximum allowed AC charger voltage in mV - * @ac_curr_max: maximum allowed AC charger current in mA - */ -struct abx500_bm_charger_parameters { - int usb_volt_max; - int usb_curr_max; - int ac_volt_max; - int ac_curr_max; -}; - -/** - * struct abx500_bm_data - abx500 battery management data - * @temp_under under this temp, charging is stopped - * @temp_low between this temp and temp_under charging is reduced - * @temp_high between this temp and temp_over charging is reduced - * @temp_over over this temp, charging is stopped - * @temp_now present battery temperature - * @temp_interval_chg temperature measurement interval in s when charging - * @temp_interval_nochg temperature measurement interval in s when not charging - * @main_safety_tmr_h safety timer for main charger - * @usb_safety_tmr_h safety timer for usb charger - * @bkup_bat_v voltage which we charge the backup battery with - * @bkup_bat_i current which we charge the backup battery with - * @no_maintenance indicates that maintenance charging is disabled - * @capacity_scaling indicates whether capacity scaling is to be used - * @abx500_adc_therm placement of thermistor, batctrl or battemp adc - * @chg_unknown_bat flag to enable charging of unknown batteries - * @enable_overshoot flag to enable VBAT overshoot control - * @auto_trig flag to enable auto adc trigger - * @fg_res resistance of FG resistor in 0.1mOhm - * @n_btypes number of elements in array bat_type - * @batt_id index of the identified battery in array bat_type - * @interval_charging charge alg cycle period time when charging (sec) - * @interval_not_charging charge alg cycle period time when not charging (sec) - * @temp_hysteresis temperature hysteresis - * @gnd_lift_resistance Battery ground to phone ground resistance (mOhm) - * @n_chg_out_curr number of elements in array chg_output_curr - * @n_chg_in_curr number of elements in array chg_input_curr - * @chg_output_curr charger output current level map - * @chg_input_curr charger input current level map - * @maxi maximization parameters - * @cap_levels capacity in percent for the different capacity levels - * @bat_type table of supported battery types - * @chg_params charger parameters - * @fg_params fuel gauge parameters - */ -struct abx500_bm_data { - int temp_under; - int temp_low; - int temp_high; - int temp_over; - int temp_now; - int temp_interval_chg; - int temp_interval_nochg; - int main_safety_tmr_h; - int usb_safety_tmr_h; - int bkup_bat_v; - int bkup_bat_i; - bool autopower_cfg; - bool ac_enabled; - bool usb_enabled; - bool no_maintenance; - bool capacity_scaling; - bool chg_unknown_bat; - bool enable_overshoot; - bool auto_trig; - enum abx500_adc_therm adc_therm; - int fg_res; - int n_btypes; - int batt_id; - int interval_charging; - int interval_not_charging; - int temp_hysteresis; - int gnd_lift_resistance; - int n_chg_out_curr; - int n_chg_in_curr; - int *chg_output_curr; - int *chg_input_curr; - const struct abx500_maxim_parameters *maxi; - const struct abx500_bm_capacity_levels *cap_levels; - struct abx500_battery_type *bat_type; - const struct abx500_bm_charger_parameters *chg_params; - const struct abx500_fg_parameters *fg_params; -}; - -enum { - NTC_EXTERNAL = 0, - NTC_INTERNAL, -}; - -int ab8500_bm_of_probe(struct device *dev, - struct device_node *np, - struct abx500_bm_data *bm); - int abx500_set_register_interruptible(struct device *dev, u8 bank, u8 reg, u8 value); int abx500_get_register_interruptible(struct device *dev, u8 bank, u8 reg, -- cgit v1.2.3 From 7abb18bd7567480e34f46d3512369ec49499064e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 25 Feb 2021 16:10:38 -0800 Subject: rcu: Provide polling interfaces for Tree RCU grace periods There is a need for a non-blocking polling interface for RCU grace periods, so this commit supplies start_poll_synchronize_rcu() and poll_state_synchronize_rcu() for this purpose. Note that the existing get_state_synchronize_rcu() may be used if future grace periods are inevitable (perhaps due to a later call_rcu() invocation). The new start_poll_synchronize_rcu() is to be used if future grace periods might not otherwise happen. Finally, poll_state_synchronize_rcu() provides a lockless check for a grace period having elapsed since the corresponding call to either of the get_state_synchronize_rcu() or start_poll_synchronize_rcu(). As with get_state_synchronize_rcu(), the return value from either get_state_synchronize_rcu() or start_poll_synchronize_rcu() is passed in to a later call to either poll_state_synchronize_rcu() or the existing (might_sleep) cond_synchronize_rcu(). [ paulmck: Remove redundant smp_mb() per Frederic Weisbecker feedback. ] [ Update poll_state_synchronize_rcu() docbook per Frederic Weisbecker feedback. ] Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/rcutree.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index df578b73960f..b89b54130f49 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -41,6 +41,8 @@ void rcu_momentary_dyntick_idle(void); void kfree_rcu_scheduler_running(void); bool rcu_gp_might_be_stalled(void); unsigned long get_state_synchronize_rcu(void); +unsigned long start_poll_synchronize_rcu(void); +bool poll_state_synchronize_rcu(unsigned long oldstate); void cond_synchronize_rcu(unsigned long oldstate); void rcu_idle_enter(void); -- cgit v1.2.3 From 2d669ceb69c276f7637cf760287ca4187add082e Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Tue, 16 Mar 2021 13:36:02 +0900 Subject: dm table: Fix zoned model check and zone sectors check Commit 24f6b6036c9e ("dm table: fix zoned iterate_devices based device capability checks") triggered dm table load failure when dm-zoned device is set up for zoned block devices and a regular device for cache. The commit inverted logic of two callback functions for iterate_devices: device_is_zoned_model() and device_matches_zone_sectors(). The logic of device_is_zoned_model() was inverted then all destination devices of all targets in dm table are required to have the expected zoned model. This is fine for dm-linear, dm-flakey and dm-crypt on zoned block devices since each target has only one destination device. However, this results in failure for dm-zoned with regular cache device since that target has both regular block device and zoned block devices. As for device_matches_zone_sectors(), the commit inverted the logic to require all zoned block devices in each target have the specified zone_sectors. This check also fails for regular block device which does not have zones. To avoid the check failures, fix the zone model check and the zone sectors check. For zone model check, introduce the new feature flag DM_TARGET_MIXED_ZONED_MODEL, and set it to dm-zoned target. When the target has this flag, allow it to have destination devices with any zoned model. For zone sectors check, skip the check if the destination device is not a zoned block device. Also add comments and improve an error message to clarify expectations to the two checks. Fixes: 24f6b6036c9e ("dm table: fix zoned iterate_devices based device capability checks") Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Damien Le Moal Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 7f4ac87c0b32..5c641f930caf 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -253,7 +253,11 @@ struct target_type { #define dm_target_passes_integrity(type) ((type)->features & DM_TARGET_PASSES_INTEGRITY) /* - * Indicates that a target supports host-managed zoned block devices. + * Indicates support for zoned block devices: + * - DM_TARGET_ZONED_HM: the target also supports host-managed zoned + * block devices but does not support combining different zoned models. + * - DM_TARGET_MIXED_ZONED_MODEL: the target supports combining multiple + * devices with different zoned models. */ #ifdef CONFIG_BLK_DEV_ZONED #define DM_TARGET_ZONED_HM 0x00000040 @@ -275,6 +279,15 @@ struct target_type { #define DM_TARGET_PASSES_CRYPTO 0x00000100 #define dm_target_passes_crypto(type) ((type)->features & DM_TARGET_PASSES_CRYPTO) +#ifdef CONFIG_BLK_DEV_ZONED +#define DM_TARGET_MIXED_ZONED_MODEL 0x00000200 +#define dm_target_supports_mixed_zoned_model(type) \ + ((type)->features & DM_TARGET_MIXED_ZONED_MODEL) +#else +#define DM_TARGET_MIXED_ZONED_MODEL 0x00000000 +#define dm_target_supports_mixed_zoned_model(type) (false) +#endif + struct dm_target { struct dm_table *table; struct target_type *type; -- cgit v1.2.3 From c558d47596867ff1082fd7475b63670f63f7f5cf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Mar 2021 18:32:30 -0500 Subject: svcrdma: Maintain a Receive water mark Post more Receives when the number of pending Receives drops below a water mark. The batch mechanism is disabled if the underlying device cannot support a reasonably-sized Receive Queue. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 1e76ed688044..722fc7c48725 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -94,6 +94,8 @@ struct svcxprt_rdma { spinlock_t sc_rw_ctxt_lock; struct list_head sc_rw_ctxts; + u32 sc_pending_recvs; + u32 sc_recv_batch; struct list_head sc_rq_dto_q; spinlock_t sc_rq_dto_lock; struct ib_qp *sc_qp; -- cgit v1.2.3 From e844d307d46cfa7e09cdb671941bfd5f1be86773 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 20 Feb 2021 18:53:40 -0500 Subject: svcrdma: Add a "deferred close" helper Refactor a bit of commonly used logic so that every site that wants a close deferred to an nfsd thread does all the right things (set_bit(XPT_CLOSE) then enqueue). Also, once XPT_CLOSE is set on a transport, it is never cleared. If XPT_CLOSE is already set, then the close is already being handled and the enqueue can be skipped. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 92455e0d5244..34dacadfe517 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -143,6 +143,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen); void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *xprt); void svc_age_temp_xprts_now(struct svc_serv *, struct sockaddr *); +void svc_xprt_deferred_close(struct svc_xprt *xprt); static inline void svc_xprt_get(struct svc_xprt *xprt) { -- cgit v1.2.3 From 2a1e4f21d84184f7ff5768ee3d3d0c30b1135867 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 13 Jan 2021 13:57:18 -0500 Subject: svcrdma: Normalize Send page handling Currently svc_rdma_sendto() migrates xdr_buf pages into a separate page list and NULLs out a bunch of entries in rq_pages while the pages are under I/O. The Send completion handler then frees those pages later. Instead, let's wait for the Send completion, then handle page releasing in the nfsd thread. I'd like to avoid the cost of 250+ put_page() calls in the Send completion handler, which is single- threaded. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 722fc7c48725..5841978550c6 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -160,6 +160,7 @@ struct svc_rdma_send_ctxt { struct ib_send_wr sc_send_wr; struct ib_cqe sc_cqe; + struct completion sc_done; struct xdr_buf sc_hdrbuf; struct xdr_stream sc_stream; void *sc_xprt_buf; -- cgit v1.2.3 From 579900670ac770a547ff607a60c02c56a7d27bd7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 28 Jan 2021 16:47:56 -0500 Subject: svcrdma: Remove unused sc_pages field Clean up. This significantly reduces the size of struct svc_rdma_send_ctxt. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 5841978550c6..6e621e1f56b8 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -164,9 +164,8 @@ struct svc_rdma_send_ctxt { struct xdr_buf sc_hdrbuf; struct xdr_stream sc_stream; void *sc_xprt_buf; - int sc_page_count; int sc_cur_sge_no; - struct page *sc_pages[RPCSVC_MAXPAGES]; + struct ib_sge sc_sges[]; }; -- cgit v1.2.3 From 7dcfbd86adc45f6d6b37278efd22530cf80ab474 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 29 Jan 2021 13:04:04 -0500 Subject: SUNRPC: Export svc_xprt_received() Prepare svc_xprt_received() to be called from transport code instead of from generic RPC server code. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 34dacadfe517..571f605bc91e 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -130,6 +130,7 @@ void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, int svc_create_xprt(struct svc_serv *, const char *, struct net *, const int, const unsigned short, int, const struct cred *); +void svc_xprt_received(struct svc_xprt *xprt); void svc_xprt_do_enqueue(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); -- cgit v1.2.3 From 1f7ea1cd6a3748427512ccc9582e18cd9efea966 Mon Sep 17 00:00:00 2001 From: Qi Zhang Date: Tue, 9 Mar 2021 11:08:04 +0800 Subject: ice: Enable FDIR Configure for AVF The virtual channel is going to be extended to support FDIR and RSS configure from AVF. New data structures and OP codes will be added, the patch enable the FDIR part. To support above advanced AVF feature, we need to figure out what kind of data structure should be passed from VF to PF to describe an FDIR rule or RSS config rule. The common part of the requirement is we need a data structure to represent the input set selection of a rule's hash key. An input set selection is a group of fields be selected from one or more network protocol layers that could be identified as a specific flow. For example, select dst IP address from an IPv4 header combined with dst port from the TCP header as the input set for an IPv4/TCP flow. The patch adds a new data structure virtchnl_proto_hdrs to abstract a network protocol headers group which is composed of layers of network protocol header(virtchnl_proto_hdr). A protocol header contains a 32 bits mask (field_selector) to describe which fields are selected as input sets, as well as a header type (enum virtchnl_proto_hdr_type). Each bit is mapped to a field in enum virtchnl_proto_hdr_field guided by its header type. +------------+-----------+------------------------------+ | | Proto Hdr | Header Type A | | | +------------------------------+ | | | BIT 31 | ... | BIT 1 | BIT 0 | | |-----------+------------------------------+ |Proto Hdrs | Proto Hdr | Header Type B | | | +------------------------------+ | | | BIT 31 | ... | BIT 1 | BIT 0 | | |-----------+------------------------------+ | | Proto Hdr | Header Type C | | | +------------------------------+ | | | BIT 31 | ... | BIT 1 | BIT 0 | | |-----------+------------------------------+ | | .... | +-------------------------------------------------------+ All fields in enum virtchnl_proto_hdr_fields are grouped with header type and the value of the first field of a header type is always 32 aligned. enum proto_hdr_type { header_type_A = 0; header_type_B = 1; .... } enum proto_hdr_field { /* header type A */ header_A_field_0 = 0, header_A_field_1 = 1, header_A_field_2 = 2, header_A_field_3 = 3, /* header type B */ header_B_field_0 = 32, // = header_type_B << 5 header_B_field_0 = 33, header_B_field_0 = 34 header_B_field_0 = 35, .... }; So we have: proto_hdr_type = proto_hdr_field / 32 bit offset = proto_hdr_field % 32 To simply the protocol header's operations, couple help macros are added. For example, to select src IP and dst port as input set for an IPv4/UDP flow. we have: struct virtchnl_proto_hdr hdr[2]; VIRTCHNL_SET_PROTO_HDR_TYPE(&hdr[0], IPV4) VIRTCHNL_ADD_PROTO_HDR_FIELD(&hdr[0], IPV4, SRC) VIRTCHNL_SET_PROTO_HDR_TYPE(&hdr[1], UDP) VIRTCHNL_ADD_PROTO_HDR_FIELD(&hdr[1], UDP, DST) The byte array is used to store the protocol header of a training package. The byte array must be network order. The patch added virtual channel support for iAVF FDIR add/validate/delete filter. iAVF FDIR is Flow Director for Intel Adaptive Virtual Function which can direct Ethernet packets to the queues of the Network Interface Card. Add/delete command is adding or deleting one rule for each virtual channel message, while validate command is just verifying if this rule is valid without any other operations. To add or delete one rule, driver needs to config TCAM and Profile, build training packets which contains the input set value, and send the training packets through FDIR Tx queue. In addition, driver needs to manage the software context to avoid adding duplicated rules, deleting non-existent rule, input set conflicts and other invalid cases. NOTE: Supported pattern/actions and their parse functions are not be included in this patch, they will be added in a separate one. Signed-off-by: Jeff Guo Signed-off-by: Yahui Cao Signed-off-by: Simei Su Signed-off-by: Beilei Xing Signed-off-by: Qi Zhang Tested-by: Chen Bo Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 278 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 278 insertions(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 40bad71865ea..47482049f640 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -136,6 +136,9 @@ enum virtchnl_ops { VIRTCHNL_OP_DISABLE_CHANNELS = 31, VIRTCHNL_OP_ADD_CLOUD_FILTER = 32, VIRTCHNL_OP_DEL_CLOUD_FILTER = 33, + /* opcode 34 - 46 are reserved */ + VIRTCHNL_OP_ADD_FDIR_FILTER = 47, + VIRTCHNL_OP_DEL_FDIR_FILTER = 48, }; /* These macros are used to generate compilation errors if a structure/union @@ -247,6 +250,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000 #define VIRTCHNL_VF_OFFLOAD_ADQ 0X00800000 +#define VIRTCHNL_VF_OFFLOAD_FDIR_PF 0X10000000 /* Define below the capability flags that are not offloads */ #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED 0x00000080 @@ -559,6 +563,11 @@ enum virtchnl_action { /* action types */ VIRTCHNL_ACTION_DROP = 0, VIRTCHNL_ACTION_TC_REDIRECT, + VIRTCHNL_ACTION_PASSTHRU, + VIRTCHNL_ACTION_QUEUE, + VIRTCHNL_ACTION_Q_REGION, + VIRTCHNL_ACTION_MARK, + VIRTCHNL_ACTION_COUNT, }; enum virtchnl_flow_type { @@ -668,6 +677,269 @@ enum virtchnl_vfr_states { VIRTCHNL_VFR_VFACTIVE, }; +#define VIRTCHNL_MAX_NUM_PROTO_HDRS 32 +#define PROTO_HDR_SHIFT 5 +#define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT) +#define PROTO_HDR_FIELD_MASK ((1UL << PROTO_HDR_SHIFT) - 1) + +/* VF use these macros to configure each protocol header. + * Specify which protocol headers and protocol header fields base on + * virtchnl_proto_hdr_type and virtchnl_proto_hdr_field. + * @param hdr: a struct of virtchnl_proto_hdr + * @param hdr_type: ETH/IPV4/TCP, etc + * @param field: SRC/DST/TEID/SPI, etc + */ +#define VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, field) \ + ((hdr)->field_selector |= BIT((field) & PROTO_HDR_FIELD_MASK)) +#define VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, field) \ + ((hdr)->field_selector &= ~BIT((field) & PROTO_HDR_FIELD_MASK)) +#define VIRTCHNL_TEST_PROTO_HDR_FIELD(hdr, val) \ + ((hdr)->field_selector & BIT((val) & PROTO_HDR_FIELD_MASK)) +#define VIRTCHNL_GET_PROTO_HDR_FIELD(hdr) ((hdr)->field_selector) + +#define VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \ + (VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, \ + VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field)) +#define VIRTCHNL_DEL_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \ + (VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, \ + VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field)) + +#define VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, hdr_type) \ + ((hdr)->type = VIRTCHNL_PROTO_HDR_ ## hdr_type) +#define VIRTCHNL_GET_PROTO_HDR_TYPE(hdr) \ + (((hdr)->type) >> PROTO_HDR_SHIFT) +#define VIRTCHNL_TEST_PROTO_HDR_TYPE(hdr, val) \ + ((hdr)->type == ((val) >> PROTO_HDR_SHIFT)) +#define VIRTCHNL_TEST_PROTO_HDR(hdr, val) \ + (VIRTCHNL_TEST_PROTO_HDR_TYPE((hdr), (val)) && \ + VIRTCHNL_TEST_PROTO_HDR_FIELD((hdr), (val))) + +/* Protocol header type within a packet segment. A segment consists of one or + * more protocol headers that make up a logical group of protocol headers. Each + * logical group of protocol headers encapsulates or is encapsulated using/by + * tunneling or encapsulation protocols for network virtualization. + */ +enum virtchnl_proto_hdr_type { + VIRTCHNL_PROTO_HDR_NONE, + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_S_VLAN, + VIRTCHNL_PROTO_HDR_C_VLAN, + VIRTCHNL_PROTO_HDR_IPV4, + VIRTCHNL_PROTO_HDR_IPV6, + VIRTCHNL_PROTO_HDR_TCP, + VIRTCHNL_PROTO_HDR_UDP, + VIRTCHNL_PROTO_HDR_SCTP, + VIRTCHNL_PROTO_HDR_GTPU_IP, + VIRTCHNL_PROTO_HDR_GTPU_EH, + VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN, + VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP, + VIRTCHNL_PROTO_HDR_PPPOE, + VIRTCHNL_PROTO_HDR_L2TPV3, + VIRTCHNL_PROTO_HDR_ESP, + VIRTCHNL_PROTO_HDR_AH, + VIRTCHNL_PROTO_HDR_PFCP, +}; + +/* Protocol header field within a protocol header. */ +enum virtchnl_proto_hdr_field { + /* ETHER */ + VIRTCHNL_PROTO_HDR_ETH_SRC = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ETH), + VIRTCHNL_PROTO_HDR_ETH_DST, + VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE, + /* S-VLAN */ + VIRTCHNL_PROTO_HDR_S_VLAN_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_S_VLAN), + /* C-VLAN */ + VIRTCHNL_PROTO_HDR_C_VLAN_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_C_VLAN), + /* IPV4 */ + VIRTCHNL_PROTO_HDR_IPV4_SRC = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV4), + VIRTCHNL_PROTO_HDR_IPV4_DST, + VIRTCHNL_PROTO_HDR_IPV4_DSCP, + VIRTCHNL_PROTO_HDR_IPV4_TTL, + VIRTCHNL_PROTO_HDR_IPV4_PROT, + /* IPV6 */ + VIRTCHNL_PROTO_HDR_IPV6_SRC = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV6), + VIRTCHNL_PROTO_HDR_IPV6_DST, + VIRTCHNL_PROTO_HDR_IPV6_TC, + VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT, + VIRTCHNL_PROTO_HDR_IPV6_PROT, + /* TCP */ + VIRTCHNL_PROTO_HDR_TCP_SRC_PORT = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_TCP), + VIRTCHNL_PROTO_HDR_TCP_DST_PORT, + /* UDP */ + VIRTCHNL_PROTO_HDR_UDP_SRC_PORT = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_UDP), + VIRTCHNL_PROTO_HDR_UDP_DST_PORT, + /* SCTP */ + VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_SCTP), + VIRTCHNL_PROTO_HDR_SCTP_DST_PORT, + /* GTPU_IP */ + VIRTCHNL_PROTO_HDR_GTPU_IP_TEID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_IP), + /* GTPU_EH */ + VIRTCHNL_PROTO_HDR_GTPU_EH_PDU = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_EH), + VIRTCHNL_PROTO_HDR_GTPU_EH_QFI, + /* PPPOE */ + VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PPPOE), + /* L2TPV3 */ + VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_L2TPV3), + /* ESP */ + VIRTCHNL_PROTO_HDR_ESP_SPI = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ESP), + /* AH */ + VIRTCHNL_PROTO_HDR_AH_SPI = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_AH), + /* PFCP */ + VIRTCHNL_PROTO_HDR_PFCP_S_FIELD = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PFCP), + VIRTCHNL_PROTO_HDR_PFCP_SEID, +}; + +struct virtchnl_proto_hdr { + enum virtchnl_proto_hdr_type type; + u32 field_selector; /* a bit mask to select field for header type */ + u8 buffer[64]; + /** + * binary buffer in network order for specific header type. + * For example, if type = VIRTCHNL_PROTO_HDR_IPV4, a IPv4 + * header is expected to be copied into the buffer. + */ +}; + +VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_proto_hdr); + +struct virtchnl_proto_hdrs { + u8 tunnel_level; + /** + * specify where protocol header start from. + * 0 - from the outer layer + * 1 - from the first inner layer + * 2 - from the second inner layer + * .... + **/ + int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */ + struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs); + +/* action configuration for FDIR */ +struct virtchnl_filter_action { + enum virtchnl_action type; + union { + /* used for queue and qgroup action */ + struct { + u16 index; + u8 region; + } queue; + /* used for count action */ + struct { + /* share counter ID with other flow rules */ + u8 shared; + u32 id; /* counter ID */ + } count; + /* used for mark action */ + u32 mark_id; + u8 reserve[32]; + } act_conf; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_filter_action); + +#define VIRTCHNL_MAX_NUM_ACTIONS 8 + +struct virtchnl_filter_action_set { + /* action number must be less then VIRTCHNL_MAX_NUM_ACTIONS */ + int count; + struct virtchnl_filter_action actions[VIRTCHNL_MAX_NUM_ACTIONS]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(292, virtchnl_filter_action_set); + +/* pattern and action for FDIR rule */ +struct virtchnl_fdir_rule { + struct virtchnl_proto_hdrs proto_hdrs; + struct virtchnl_filter_action_set action_set; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(2604, virtchnl_fdir_rule); + +/* Status returned to VF after VF requests FDIR commands + * VIRTCHNL_FDIR_SUCCESS + * VF FDIR related request is successfully done by PF + * The request can be OP_ADD/DEL. + * + * VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE + * OP_ADD_FDIR_FILTER request is failed due to no Hardware resource. + * + * VIRTCHNL_FDIR_FAILURE_RULE_EXIST + * OP_ADD_FDIR_FILTER request is failed due to the rule is already existed. + * + * VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT + * OP_ADD_FDIR_FILTER request is failed due to conflict with existing rule. + * + * VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST + * OP_DEL_FDIR_FILTER request is failed due to this rule doesn't exist. + * + * VIRTCHNL_FDIR_FAILURE_RULE_INVALID + * OP_ADD_FDIR_FILTER request is failed due to parameters validation + * or HW doesn't support. + * + * VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT + * OP_ADD/DEL_FDIR_FILTER request is failed due to timing out + * for programming. + */ +enum virtchnl_fdir_prgm_status { + VIRTCHNL_FDIR_SUCCESS = 0, + VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE, + VIRTCHNL_FDIR_FAILURE_RULE_EXIST, + VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT, + VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST, + VIRTCHNL_FDIR_FAILURE_RULE_INVALID, + VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT, +}; + +/* VIRTCHNL_OP_ADD_FDIR_FILTER + * VF sends this request to PF by filling out vsi_id, + * validate_only and rule_cfg. PF will return flow_id + * if the request is successfully done and return add_status to VF. + */ +struct virtchnl_fdir_add { + u16 vsi_id; /* INPUT */ + /* + * 1 for validating a fdir rule, 0 for creating a fdir rule. + * Validate and create share one ops: VIRTCHNL_OP_ADD_FDIR_FILTER. + */ + u16 validate_only; /* INPUT */ + u32 flow_id; /* OUTPUT */ + struct virtchnl_fdir_rule rule_cfg; /* INPUT */ + enum virtchnl_fdir_prgm_status status; /* OUTPUT */ +}; + +VIRTCHNL_CHECK_STRUCT_LEN(2616, virtchnl_fdir_add); + +/* VIRTCHNL_OP_DEL_FDIR_FILTER + * VF sends this request to PF by filling out vsi_id + * and flow_id. PF will return del_status to VF. + */ +struct virtchnl_fdir_del { + u16 vsi_id; /* INPUT */ + u16 pad; + u32 flow_id; /* INPUT */ + enum virtchnl_fdir_prgm_status status; /* OUTPUT */ +}; + +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); + /** * virtchnl_vc_validate_vf_msg * @ver: Virtchnl version info @@ -828,6 +1100,12 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_DEL_CLOUD_FILTER: valid_len = sizeof(struct virtchnl_filter); break; + case VIRTCHNL_OP_ADD_FDIR_FILTER: + valid_len = sizeof(struct virtchnl_fdir_add); + break; + case VIRTCHNL_OP_DEL_FDIR_FILTER: + valid_len = sizeof(struct virtchnl_fdir_del); + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: -- cgit v1.2.3 From 69c4a42d72eb9b41e1c6e4bc9ab7f3650bf35f62 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 26 Feb 2021 22:37:55 -0500 Subject: lsm,selinux: add new hook to compare new mount to an existing mount Add a new hook that takes an existing super block and a new mount with new options and determines if new options confict with an existing mount or not. A filesystem can use this new hook to determine if it can share the an existing superblock with a new superblock for the new mount. Signed-off-by: Olga Kornievskaia Acked-by: Anna Schumaker [PM: tweak the subject line, fix tab/space problems] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 1 + include/linux/lsm_hooks.h | 6 ++++++ include/linux/security.h | 8 ++++++++ 3 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 477a597db013..1b61bc5dc215 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -62,6 +62,7 @@ LSM_HOOK(int, 0, sb_alloc_security, struct super_block *sb) LSM_HOOK(void, LSM_RET_VOID, sb_free_security, struct super_block *sb) LSM_HOOK(void, LSM_RET_VOID, sb_free_mnt_opts, void *mnt_opts) LSM_HOOK(int, 0, sb_eat_lsm_opts, char *orig, void **mnt_opts) +LSM_HOOK(int, 0, sb_mnt_opts_compat, struct super_block *sb, void *mnt_opts) LSM_HOOK(int, 0, sb_remount, struct super_block *sb, void *mnt_opts) LSM_HOOK(int, 0, sb_kern_mount, struct super_block *sb) LSM_HOOK(int, 0, sb_show_options, struct seq_file *m, struct super_block *sb) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index fb7f3193753d..97bb36d7e994 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -142,6 +142,12 @@ * @orig the original mount data copied from userspace. * @copy copied data which will be passed to the security module. * Returns 0 if the copy was successful. + * @sb_mnt_opts_compat: + * Determine if the new mount options in @mnt_opts are allowed given + * the existing mounted filesystem at @sb. + * @sb superblock being compared + * @mnt_opts new mount options + * Return 0 if options are compatible. * @sb_remount: * Extracts security system specific mount options and verifies no changes * are being made to those options. diff --git a/include/linux/security.h b/include/linux/security.h index 8aeebd6646dc..f1e5833bfedc 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -294,6 +294,7 @@ int security_sb_alloc(struct super_block *sb); void security_sb_free(struct super_block *sb); void security_free_mnt_opts(void **mnt_opts); int security_sb_eat_lsm_opts(char *options, void **mnt_opts); +int security_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts); int security_sb_remount(struct super_block *sb, void *mnt_opts); int security_sb_kern_mount(struct super_block *sb); int security_sb_show_options(struct seq_file *m, struct super_block *sb); @@ -646,6 +647,13 @@ static inline int security_sb_remount(struct super_block *sb, return 0; } +static inline int security_sb_mnt_opts_compat(struct super_block *sb, + void *mnt_opts) +{ + return 0; +} + + static inline int security_sb_kern_mount(struct super_block *sb) { return 0; -- cgit v1.2.3 From ec1ade6a0448e3bfb07bb905aca1bc18836220c7 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 19 Feb 2021 17:22:33 -0500 Subject: nfs: account for selinux security context when deciding to share superblock Keep track of whether or not there were LSM security context options passed during mount (ie creation of the superblock). Then, while deciding if the superblock can be shared for the new mount, check if the newly passed in LSM security context options are compatible with the existing superblock's ones by calling security_sb_mnt_opts_compat(). Previously, with selinux enabled, NFS wasn't able to do the following 2mounts: mount -o vers=4.2,sec=sys,context=system_u:object_r:root_t:s0 :/ /mnt mount -o vers=4.2,sec=sys,context=system_u:object_r:swapfile_t:s0 :/scratch /scratch 2nd mount would fail with "mount.nfs: an incorrect mount option was specified" and var log messages would have: "SElinux: mount invalid. Same superblock, different security settings for.." Signed-off-by: Olga Kornievskaia [PM: tweak subject line] Signed-off-by: Paul Moore --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6f76b32a0238..a28d71b45b5f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -256,6 +256,7 @@ struct nfs_server { /* User namespace info */ const struct cred *cred; + bool has_sec_mnt_opts; }; /* Server capabilities */ -- cgit v1.2.3 From 4ebd7651bfc8992ba05b355a8036cb7fd0e8d7de Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 19 Feb 2021 14:26:21 -0500 Subject: lsm: separate security_task_getsecid() into subjective and objective variants Of the three LSMs that implement the security_task_getsecid() LSM hook, all three LSMs provide the task's objective security credentials. This turns out to be unfortunate as most of the hook's callers seem to expect the task's subjective credentials, although a small handful of callers do correctly expect the objective credentials. This patch is the first step towards fixing the problem: it splits the existing security_task_getsecid() hook into two variants, one for the subjective creds, one for the objective creds. void security_task_getsecid_subj(struct task_struct *p, u32 *secid); void security_task_getsecid_obj(struct task_struct *p, u32 *secid); While this patch does fix all of the callers to use the correct variant, in order to keep this patch focused on the callers and to ease review, the LSMs continue to use the same implementation for both hooks. The net effect is that this patch should not change the behavior of the kernel in any way, it will be up to the latter LSM specific patches in this series to change the hook implementations and return the correct credentials. Acked-by: Mimi Zohar (IMA) Acked-by: Casey Schaufler Reviewed-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/cred.h | 2 +- include/linux/lsm_hook_defs.h | 5 ++++- include/linux/lsm_hooks.h | 12 +++++++++--- include/linux/security.h | 10 ++++++++-- 4 files changed, 22 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 4c6350503697..ac0e5f97d7d8 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -140,7 +140,7 @@ struct cred { struct key *request_key_auth; /* assumed request_key authority */ #endif #ifdef CONFIG_SECURITY - void *security; /* subjective LSM security */ + void *security; /* LSM security */ #endif struct user_struct *user; /* real user ID subscription */ struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */ diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 1b61bc5dc215..61f04f7dc1a4 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -204,7 +204,10 @@ LSM_HOOK(int, 0, task_fix_setgid, struct cred *new, const struct cred * old, LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid) LSM_HOOK(int, 0, task_getpgid, struct task_struct *p) LSM_HOOK(int, 0, task_getsid, struct task_struct *p) -LSM_HOOK(void, LSM_RET_VOID, task_getsecid, struct task_struct *p, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, task_getsecid_subj, + struct task_struct *p, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, task_getsecid_obj, + struct task_struct *p, u32 *secid) LSM_HOOK(int, 0, task_setnice, struct task_struct *p, int nice) LSM_HOOK(int, 0, task_setioprio, struct task_struct *p, int ioprio) LSM_HOOK(int, 0, task_getioprio, struct task_struct *p) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 97bb36d7e994..ba2ccd950833 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -713,9 +713,15 @@ * @p. * @p contains the task_struct for the process. * Return 0 if permission is granted. - * @task_getsecid: - * Retrieve the security identifier of the process @p. - * @p contains the task_struct for the process and place is into @secid. + * @task_getsecid_subj: + * Retrieve the subjective security identifier of the task_struct in @p + * and return it in @secid. Special care must be taken to ensure that @p + * is the either the "current" task, or the caller has exclusive access + * to @p. + * In case of failure, @secid will be set to zero. + * @task_getsecid_obj: + * Retrieve the objective security identifier of the task_struct in @p + * and return it in @secid. * In case of failure, @secid will be set to zero. * * @task_setnice: diff --git a/include/linux/security.h b/include/linux/security.h index f1e5833bfedc..9aeda3f9e838 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -415,7 +415,8 @@ int security_task_fix_setgid(struct cred *new, const struct cred *old, int security_task_setpgid(struct task_struct *p, pid_t pgid); int security_task_getpgid(struct task_struct *p); int security_task_getsid(struct task_struct *p); -void security_task_getsecid(struct task_struct *p, u32 *secid); +void security_task_getsecid_subj(struct task_struct *p, u32 *secid); +void security_task_getsecid_obj(struct task_struct *p, u32 *secid); int security_task_setnice(struct task_struct *p, int nice); int security_task_setioprio(struct task_struct *p, int ioprio); int security_task_getioprio(struct task_struct *p); @@ -1106,7 +1107,12 @@ static inline int security_task_getsid(struct task_struct *p) return 0; } -static inline void security_task_getsecid(struct task_struct *p, u32 *secid) +static inline void security_task_getsecid_subj(struct task_struct *p, u32 *secid) +{ + *secid = 0; +} + +static inline void security_task_getsecid_obj(struct task_struct *p, u32 *secid) { *secid = 0; } -- cgit v1.2.3 From f57bac3c33e761fdd78fef159fdc677056c706d0 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Sun, 21 Mar 2021 22:48:49 +0900 Subject: netdev: add netdev_queue_set_dql_min_limit() Add a function to set the dynamic queue limit minimum value. Some specific drivers might have legitimate reasons to configure dql.min_limit to a given value. Typically, this is the case when the PDU of the protocol is smaller than the packet size to used to carry those frames to the device. Concrete example: a CAN (Control Area Network) device with an USB 2.0 interface. The PDU of classical CAN protocol are roughly 16 bytes but the USB packet size (which is used to carry the CAN frames to the device) might be up to 512 bytes. Wen small traffic burst occurs, BQL algorithm is not able to immediately adjust and this would result in having to send many small USB packets (i.e packet of 16 bytes for each CAN frame). Filling up the USB packet with CAN frames is relatively fast (small latency issue) but the gain of not having to send several small USB packets is huge (big throughput increase). In this case, forcing dql.min_limit to a given value that would allow to stuff the USB packet is always a win. This function is to be used by network drivers which are able to prove through a rationale and through empirical tests on several environment (with other applications, heavy context switching, virtualization...), that they constantly reach better performances with a specific predefined dql.min_limit value with no noticeable latency impact. Signed-off-by: Vincent Mailhol Signed-off-by: David S. Miller --- include/linux/netdevice.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8f003955c485..33b8ea08996e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3446,6 +3446,24 @@ netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue) return dev_queue->state & QUEUE_STATE_DRV_XOFF_OR_FROZEN; } +/** + * netdev_queue_set_dql_min_limit - set dql minimum limit + * @dev_queue: pointer to transmit queue + * @min_limit: dql minimum limit + * + * Forces xmit_more() to return true until the minimum threshold + * defined by @min_limit is reached (or until the tx queue is + * empty). Warning: to be use with care, misuse will impact the + * latency. + */ +static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue, + unsigned int min_limit) +{ +#ifdef CONFIG_BQL + dev_queue->dql.min_limit = min_limit; +#endif +} + /** * netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write * @dev_queue: pointer to transmit queue -- cgit v1.2.3 From 405a129f59384c474343d6261a2e0a75650d29a8 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Mon, 22 Mar 2021 08:25:16 +0530 Subject: linux/qed: Mundane spelling fixes throughout the file s/unrequired/"not required"/ s/consme/consume/ .....two different places s/accros/across/ Signed-off-by: Bhaskar Chowdhury Acked-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index e339b48de32d..f34dbd0db795 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -19,7 +19,7 @@ enum qed_chain_mode { /* Each Page contains a next pointer at its end */ QED_CHAIN_MODE_NEXT_PTR, - /* Chain is a single page (next ptr) is unrequired */ + /* Chain is a single page (next ptr) is not required */ QED_CHAIN_MODE_SINGLE, /* Page pointers are located in a side list */ @@ -56,13 +56,13 @@ struct qed_chain_pbl_u32 { }; struct qed_chain_u16 { - /* Cyclic index of next element to produce/consme */ + /* Cyclic index of next element to produce/consume */ u16 prod_idx; u16 cons_idx; }; struct qed_chain_u32 { - /* Cyclic index of next element to produce/consme */ + /* Cyclic index of next element to produce/consume */ u32 prod_idx; u32 cons_idx; }; @@ -270,7 +270,7 @@ static inline dma_addr_t qed_chain_get_pbl_phys(const struct qed_chain *chain) /** * @brief qed_chain_advance_page - * - * Advance the next element accros pages for a linked chain + * Advance the next element across pages for a linked chain * * @param p_chain * @param p_next_elem -- cgit v1.2.3 From 744b8376632208137fe4acc9967b93e2970732a3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 22 Mar 2021 13:31:48 +0200 Subject: net: move the ptype_all and ptype_base declarations to include/linux/netdevice.h ptype_all and ptype_base are declared in net/core/dev.c as non-static, because they are used by net-procfs.c too. However, a "make W=1" build complains that there was no previous declaration of ptype_all and ptype_base in a header file, so this way of declaring things constitutes a violation of coding style. Let's move the extern declarations of ptype_all and ptype_base to the linux/netdevice.h file, which is included by net-procfs.c too. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 33b8ea08996e..e4a503288d9b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5336,6 +5336,9 @@ do { \ #define PTYPE_HASH_SIZE (16) #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) +extern struct list_head ptype_all __read_mostly; +extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; + extern struct net_device *blackhole_netdev; #endif /* _LINUX_NETDEVICE_H */ -- cgit v1.2.3 From 4bf07f6562a01a488877e05267808da7147f44a5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 22 Mar 2021 22:39:03 +0100 Subject: timekeeping, clocksource: Fix various typos in comments Fix ~56 single-word typos in timekeeping & clocksource code comments. Signed-off-by: Ingo Molnar Cc: Thomas Gleixner Cc: John Stultz Cc: Stephen Boyd Cc: Daniel Lezcano Cc: linux-kernel@vger.kernel.org --- include/linux/clocksource.h | 2 +- include/linux/timex.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 86d143db6523..a247b089ca78 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -70,7 +70,7 @@ struct module; * @mark_unstable: Optional function to inform the clocksource driver that * the watchdog marked the clocksource unstable * @tick_stable: Optional function called periodically from the watchdog - * code to provide stable syncrhonization points + * code to provide stable synchronization points * @wd_list: List head to enqueue into the watchdog list (internal) * @cs_last: Last clocksource value for clocksource watchdog * @wd_last: Last watchdog value corresponding to @cs_last diff --git a/include/linux/timex.h b/include/linux/timex.h index 9c2e54faf9b7..059b18eb1f1f 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -133,7 +133,7 @@ /* * kernel variables - * Note: maximum error = NTP synch distance = dispersion + delay / 2; + * Note: maximum error = NTP sync distance = dispersion + delay / 2; * estimated error = NTP dispersion. */ extern unsigned long tick_usec; /* USER_HZ period (usec) */ -- cgit v1.2.3 From add2d73631070c951b0de81a01d1463a15cfbd47 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Mar 2021 11:21:45 -0700 Subject: net: set initial device refcount to 1 When adding CONFIG_PCPU_DEV_REFCNT, I forgot that the initial net device refcount was 0. When CONFIG_PCPU_DEV_REFCNT is not set, this means the first dev_hold() triggers an illegal refcount operation (addition on 0) refcount_t: addition on 0; use-after-free. WARNING: CPU: 0 PID: 1 at lib/refcount.c:25 refcount_warn_saturate+0x128/0x1a4 Fix is to change initial (and final) refcount to be 1. Also add a missing kerneldoc piece, as reported by Stephen Rothwell. Fixes: 919067cc845f ("net: add CONFIG_PCPU_DEV_REFCNT") Signed-off-by: Eric Dumazet Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e4a503288d9b..7005ad80e8d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1792,6 +1792,7 @@ enum netdev_ml_priv_type { * * @proto_down_reason: reason a netdev interface is held down * @pcpu_refcnt: Number of references to this device + * @dev_refcnt: Number of references to this device * @todo_list: Delayed register/unregister * @link_watch_list: XXX: need comments on this one * -- cgit v1.2.3 From 1ab568e92bf8f6a359c977869dc546a23a6b5f13 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Mon, 22 Mar 2021 19:51:13 +0100 Subject: net: dsa: hellcreek: Report switch name and ID Report the driver name, ASIC ID and the switch name via devlink. This is a useful information for user space tooling. Signed-off-by: Kurt Kanzenbach Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/platform_data/hirschmann-hellcreek.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/hirschmann-hellcreek.h b/include/linux/platform_data/hirschmann-hellcreek.h index 388846766bb2..6a000df5541f 100644 --- a/include/linux/platform_data/hirschmann-hellcreek.h +++ b/include/linux/platform_data/hirschmann-hellcreek.h @@ -12,6 +12,7 @@ #include struct hellcreek_platform_data { + const char *name; /* Switch name */ int num_ports; /* Amount of switch ports */ int is_100_mbits; /* Is it configured to 100 or 1000 mbit/s */ int qbv_support; /* Qbv support on front TSN ports */ -- cgit v1.2.3 From 5dd5f9347a927c169205f7385e5cf4e18c41e21a Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Sun, 21 Mar 2021 01:42:40 +0530 Subject: driver core: Trivial typo fix s/subsytem/subsystem/ Acked-by: Randy Dunlap Signed-off-by: Bhaskar Chowdhury Link: https://lore.kernel.org/r/20210320201240.23745-1-unixbhaskar@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index ba660731bd25..a8ce0dc3b758 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -49,7 +49,7 @@ struct dev_iommu; /** * struct subsys_interface - interfaces to device functions * @name: name of the device function - * @subsys: subsytem of the devices to attach to + * @subsys: subsystem of the devices to attach to * @node: the list of functions registered at the subsystem * @add_dev: device hookup to device function handler * @remove_dev: device hookup to device function handler -- cgit v1.2.3 From 1bd66c1a32ca8e5148eaba2675321637e89a49af Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 20 Mar 2021 13:26:21 +0100 Subject: fs: document mapping helpers Document new helpers we introduced this cycle. Link: https://lore.kernel.org/r/20210320122623.599086-2-christian.brauner@ubuntu.com Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- include/linux/fs.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ec8f3ddf4a6a..33873531ffa6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1574,36 +1574,84 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); } +/** + * kuid_into_mnt - map a kuid down into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * @kuid: kuid to be mapped + * + * Return: @kuid mapped according to @mnt_userns. + * If @kuid has no mapping INVALID_UID is returned. + */ static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns, kuid_t kuid) { return make_kuid(mnt_userns, __kuid_val(kuid)); } +/** + * kgid_into_mnt - map a kgid down into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * @kgid: kgid to be mapped + * + * Return: @kgid mapped according to @mnt_userns. + * If @kgid has no mapping INVALID_GID is returned. + */ static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns, kgid_t kgid) { return make_kgid(mnt_userns, __kgid_val(kgid)); } +/** + * i_uid_into_mnt - map an inode's i_uid down into a mnt_userns + * @mnt_userns: user namespace of the mount the inode was found from + * @inode: inode to map + * + * Return: the inode's i_uid mapped down according to @mnt_userns. + * If the inode's i_uid has no mapping INVALID_UID is returned. + */ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { return kuid_into_mnt(mnt_userns, inode->i_uid); } +/** + * i_gid_into_mnt - map an inode's i_gid down into a mnt_userns + * @mnt_userns: user namespace of the mount the inode was found from + * @inode: inode to map + * + * Return: the inode's i_gid mapped down according to @mnt_userns. + * If the inode's i_gid has no mapping INVALID_GID is returned. + */ static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, const struct inode *inode) { return kgid_into_mnt(mnt_userns, inode->i_gid); } +/** + * kuid_from_mnt - map a kuid up into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * @kuid: kuid to be mapped + * + * Return: @kuid mapped up according to @mnt_userns. + * If @kuid has no mapping INVALID_UID is returned. + */ static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns, kuid_t kuid) { return KUIDT_INIT(from_kuid(mnt_userns, kuid)); } +/** + * kgid_from_mnt - map a kgid up into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * @kgid: kgid to be mapped + * + * Return: @kgid mapped up according to @mnt_userns. + * If @kgid has no mapping INVALID_GID is returned. + */ static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, kgid_t kgid) { -- cgit v1.2.3 From a65e58e791a1690da8de731c8391816a22f5555c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 20 Mar 2021 13:26:22 +0100 Subject: fs: document and rename fsid helpers Vivek pointed out that the fs{g,u}id_into_mnt() naming scheme can be misleading as it could be understood as implying they do the exact same thing as i_{g,u}id_into_mnt(). The original motivation for this naming scheme was to signal to callers that the helpers will always take care to map the k{g,u}id such that the ownership is expressed in terms of the mnt_users. Get rid of the confusion by renaming those helpers to something more sensible. Al suggested mapped_fs{g,u}id() which seems a really good fit. Usually filesystems don't need to bother with these helpers directly only in some cases where they allocate objects that carry {g,u}ids which are either filesystem specific (e.g. xfs quota objects) or don't have a clean set of helpers as inodes have. Link: https://lore.kernel.org/r/20210320122623.599086-3-christian.brauner@ubuntu.com Inspired-by: Vivek Goyal Cc: Christoph Hellwig Cc: Darrick J. Wong Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- include/linux/fs.h | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 33873531ffa6..e34967829183 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1658,12 +1658,36 @@ static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, return KGIDT_INIT(from_kgid(mnt_userns, kgid)); } -static inline kuid_t fsuid_into_mnt(struct user_namespace *mnt_userns) +/** + * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * + * Use this helper to initialize a new vfs or filesystem object based on + * the caller's fsuid. A common example is initializing the i_uid field of + * a newly allocated inode triggered by a creation event such as mkdir or + * O_CREAT. Other examples include the allocation of quotas for a specific + * user. + * + * Return: the caller's current fsuid mapped up according to @mnt_userns. + */ +static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) { return kuid_from_mnt(mnt_userns, current_fsuid()); } -static inline kgid_t fsgid_into_mnt(struct user_namespace *mnt_userns) +/** + * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns + * @mnt_userns: user namespace of the relevant mount + * + * Use this helper to initialize a new vfs or filesystem object based on + * the caller's fsgid. A common example is initializing the i_gid field of + * a newly allocated inode triggered by a creation event such as mkdir or + * O_CREAT. Other examples include the allocation of quotas for a specific + * user. + * + * Return: the caller's current fsgid mapped up according to @mnt_userns. + */ +static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) { return kgid_from_mnt(mnt_userns, current_fsgid()); } -- cgit v1.2.3 From 8e5389132ab429604c1a2459b52f0c849a71cc61 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 20 Mar 2021 13:26:23 +0100 Subject: fs: introduce fsuidgid_has_mapping() helper Don't open-code the checks and instead move them into a clean little helper we can call. This also reduces the risk that if we ever change something we forget to change all locations. Link: https://lore.kernel.org/r/20210320122623.599086-4-christian.brauner@ubuntu.com Inspired-by: Vivek Goyal Cc: Christoph Hellwig Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- include/linux/fs.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e34967829183..e9e7e799425e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1692,6 +1692,26 @@ static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) return kgid_from_mnt(mnt_userns, current_fsgid()); } +/** + * fsuidgid_has_mapping() - check whether caller's fsuid/fsgid is mapped + * @sb: the superblock we want a mapping in + * @mnt_userns: user namespace of the relevant mount + * + * Check whether the caller's fsuid and fsgid have a valid mapping in the + * s_user_ns of the superblock @sb. If the caller is on an idmapped mount map + * the caller's fsuid and fsgid according to the @mnt_userns first. + * + * Return: true if fsuid and fsgid is mapped, false if not. + */ +static inline bool fsuidgid_has_mapping(struct super_block *sb, + struct user_namespace *mnt_userns) +{ + struct user_namespace *s_user_ns = sb->s_user_ns; + + return kuid_has_mapping(s_user_ns, mapped_fsuid(mnt_userns)) && + kgid_has_mapping(s_user_ns, mapped_fsgid(mnt_userns)); +} + extern struct timespec64 current_time(struct inode *inode); /* -- cgit v1.2.3 From db998553cf11dd697485ac6142adbb35d21fff10 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 20 Mar 2021 13:26:24 +0100 Subject: fs: introduce two inode i_{u,g}id initialization helpers Give filesystem two little helpers that do the right thing when initializing the i_uid and i_gid fields on idmapped and non-idmapped mounts. Filesystems shouldn't have to be concerned with too many details. Link: https://lore.kernel.org/r/20210320122623.599086-5-christian.brauner@ubuntu.com Inspired-by: Vivek Goyal Cc: Christoph Hellwig Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- include/linux/fs.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e9e7e799425e..3eaf5f27a0e4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1692,6 +1692,34 @@ static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) return kgid_from_mnt(mnt_userns, current_fsgid()); } +/** + * inode_fsuid_set - initialize inode's i_uid field with callers fsuid + * @inode: inode to initialize + * @mnt_userns: user namespace of the mount the inode was found from + * + * Initialize the i_uid field of @inode. If the inode was found/created via + * an idmapped mount map the caller's fsuid according to @mnt_users. + */ +static inline void inode_fsuid_set(struct inode *inode, + struct user_namespace *mnt_userns) +{ + inode->i_uid = mapped_fsuid(mnt_userns); +} + +/** + * inode_fsgid_set - initialize inode's i_gid field with callers fsgid + * @inode: inode to initialize + * @mnt_userns: user namespace of the mount the inode was found from + * + * Initialize the i_gid field of @inode. If the inode was found/created via + * an idmapped mount map the caller's fsgid according to @mnt_users. + */ +static inline void inode_fsgid_set(struct inode *inode, + struct user_namespace *mnt_userns) +{ + inode->i_gid = mapped_fsgid(mnt_userns); +} + /** * fsuidgid_has_mapping() - check whether caller's fsuid/fsgid is mapped * @sb: the superblock we want a mapping in -- cgit v1.2.3 From 39015399a849843ff8f840b68d16c6ff7c58e0f0 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 4 Feb 2021 18:00:55 +0000 Subject: fs: turn some comments into kernel-doc While reviewing ./include/linux/fs.h, I noticed that three comments can actually be turned into kernel-doc comments. This allows to check the consistency between the descriptions and the functions' signatures in case they may change in the future. A quick validation with the consistency check: ./scripts/kernel-doc -none include/linux/fs.h currently reports no issues in this file. Link: https://lore.kernel.org/r/20210204180059.28360-2-lukas.bulwahn@gmail.com Signed-off-by: Lukas Bulwahn Acked-by: Christian Brauner Signed-off-by: Christian Brauner --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ec8f3ddf4a6a..644ccef39014 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1739,7 +1739,7 @@ static inline void sb_start_pagefault(struct super_block *sb) __sb_start_write(sb, SB_FREEZE_PAGEFAULT); } -/* +/** * sb_start_intwrite - get write access to a superblock for internal fs purposes * @sb: the super we write to * @@ -3161,7 +3161,7 @@ static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, void inode_dio_wait(struct inode *inode); -/* +/** * inode_dio_begin - signal start of a direct I/O requests * @inode: inode the direct I/O happens on * @@ -3173,7 +3173,7 @@ static inline void inode_dio_begin(struct inode *inode) atomic_inc(&inode->i_dio_count); } -/* +/** * inode_dio_end - signal finish of a direct I/O requests * @inode: inode the direct I/O happens on * -- cgit v1.2.3 From 92cb01c74ef13ca01e1af836236b140634967b82 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 1 Mar 2021 13:45:19 +0100 Subject: fs: update kernel-doc for vfs_rename() Commit 9fe61450972d ("namei: introduce struct renamedata") introduces a new struct for vfs_rename() and makes the vfs_rename() kernel-doc argument description out of sync. Move the description of arguments for vfs_rename() to a new kernel-doc for the struct renamedata to make these descriptions checkable against the actual implementation. Link: https://lore.kernel.org/r/20210204180059.28360-3-lukas.bulwahn@gmail.com Signed-off-by: Lukas Bulwahn Acked-by: Christian Brauner Signed-off-by: Christian Brauner --- include/linux/fs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 644ccef39014..e83c0bbc6454 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1782,6 +1782,17 @@ int vfs_rmdir(struct user_namespace *, struct inode *, struct dentry *); int vfs_unlink(struct user_namespace *, struct inode *, struct dentry *, struct inode **); +/** + * struct renamedata - contains all information required for renaming + * @old_mnt_userns: old user namespace of the mount the inode was found from + * @old_dir: parent of source + * @old_dentry: source + * @new_mnt_userns: new user namespace of the mount the inode was found from + * @new_dir: parent of destination + * @new_dentry: destination + * @delegated_inode: returns an inode needing a delegation break + * @flags: rename flags + */ struct renamedata { struct user_namespace *old_mnt_userns; struct inode *old_dir; -- cgit v1.2.3 From 291da9d4a9eb3a1cb0610b7f4480f5b52b1825e7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Mar 2021 09:46:13 +0100 Subject: locking/mutex: Fix non debug version of mutex_lock_io_nested() If CONFIG_DEBUG_LOCK_ALLOC=n then mutex_lock_io_nested() maps to mutex_lock() which is clearly wrong because mutex_lock() lacks the io_schedule_prepare()/finish() invocations. Map it to mutex_lock_io(). Fixes: f21860bac05b ("locking/mutex, sched/wait: Fix the mutex_lock_io_nested() define") Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/878s6fshii.fsf@nanos.tec.linutronix.de --- include/linux/mutex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 0cd631a19727..515cff77a4f4 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -185,7 +185,7 @@ extern void mutex_lock_io(struct mutex *lock); # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) -# define mutex_lock_io_nested(lock, subclass) mutex_lock(lock) +# define mutex_lock_io_nested(lock, subclass) mutex_lock_io(lock) #endif /* -- cgit v1.2.3 From 2d5ba37461013253d2ff0a3641b727fd32ea97a9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 23 Feb 2021 18:44:53 +0100 Subject: usb: ehci: add spurious flag to disable overcurrent checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds an ignore_oc flag which can be set by EHCI controller not supporting or wanting to disable overcurrent checking. The EHCI platform data in include/linux/usb/ehci_pdriver.h is also augmented to take advantage of this new flag. Signed-off-by: Florian Fainelli Signed-off-by: Álvaro Fernández Rojas Link: https://lore.kernel.org/r/20210223174455.1378-2-noltari@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/ehci_pdriver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/ehci_pdriver.h b/include/linux/usb/ehci_pdriver.h index dd742afdc03f..89fc901e778f 100644 --- a/include/linux/usb/ehci_pdriver.h +++ b/include/linux/usb/ehci_pdriver.h @@ -50,6 +50,7 @@ struct usb_ehci_pdata { unsigned no_io_watchdog:1; unsigned reset_on_resume:1; unsigned dma_mask_64:1; + unsigned spurious_oc:1; /* Turn on all power and clocks */ int (*power_on)(struct platform_device *pdev); -- cgit v1.2.3 From aaadc6aea6935e2f36c57056ff756fba0bbc4975 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 18 Mar 2021 16:54:06 +0100 Subject: USB: core: rename usb_driver_claim_interface() data parameter It's been almost twenty years since the interface "private data" pointer was removed in favour of using the driver-data pointer of struct device. Let's rename the driver-data parameter of usb_driver_claim_interface() so that it better reflects how it's used. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210318155406.22399-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 57c1e0ce5eba..b07e90d07ab6 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -841,7 +841,7 @@ extern int usb_free_streams(struct usb_interface *interface, /* used these for multi-interface device registration */ extern int usb_driver_claim_interface(struct usb_driver *driver, - struct usb_interface *iface, void *priv); + struct usb_interface *iface, void *data); /** * usb_interface_claimed - returns true iff an interface is claimed -- cgit v1.2.3 From 0299809be415567366b66f248eed93848b8dc9f3 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 10 Mar 2021 19:42:44 -0800 Subject: usb: core: Track SuperSpeed Plus GenXxY Introduce ssp_rate field to usb_device structure to capture the connected SuperSpeed Plus signaling rate generation and lane count with the corresponding usb_ssp_rate enum. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/b7805d121e5ae4ad5ae144bd860b6ac04ee47436.1615432770.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index b07e90d07ab6..ddd2f5b2a282 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -560,6 +560,7 @@ struct usb3_lpm_parameters { * @speed: device speed: high/full/low (or error) * @rx_lanes: number of rx lanes in use, USB 3.2 adds dual-lane support * @tx_lanes: number of tx lanes in use, USB 3.2 adds dual-lane support + * @ssp_rate: SuperSpeed Plus phy signaling rate and lane count * @tt: Transaction Translator info; used with low/full speed dev, highspeed hub * @ttport: device port on that tt hub * @toggle: one bit for each endpoint, with ([0] = IN, [1] = OUT) endpoints @@ -636,6 +637,7 @@ struct usb_device { enum usb_device_speed speed; unsigned int rx_lanes; unsigned int tx_lanes; + enum usb_ssp_rate ssp_rate; struct usb_tt *tt; int ttport; -- cgit v1.2.3 From f2db85b64f0af1410ccb8ebcc9d7fa38e99feee9 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 2 Mar 2021 13:11:30 -0800 Subject: driver core: Avoid pointless deferred probe attempts There's no point in adding a device to the deferred probe list if we know for sure that it doesn't have a matching driver. So, check if a device can match with a driver before adding it to the deferred probe list. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20210302211133.2244281-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index a8ce0dc3b758..38a2071cf776 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -439,6 +439,9 @@ struct dev_links_info { * @state_synced: The hardware state of this device has been synced to match * the software state of this device by calling the driver/bus * sync_state() callback. + * @can_match: The device has matched with a driver at least once or it is in + * a bus (like AMBA) which can't check for matching drivers until + * other devices probe successfully. * @dma_coherent: this particular device is dma coherent, even if the * architecture supports non-coherent devices. * @dma_ops_bypass: If set to %true then the dma_ops are bypassed for the @@ -545,6 +548,7 @@ struct device { bool offline:1; bool of_node_reused:1; bool state_synced:1; + bool can_match:1; #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) -- cgit v1.2.3 From 0341ce5443949588e93581b49b934cdde2befbf8 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Tue, 23 Mar 2021 15:56:17 +0200 Subject: workqueue: Add resource managed version of delayed work init A few drivers which need a delayed work-queue must cancel work at driver detach. Some of those implement remove() solely for this purpose. Help drivers to avoid unnecessary remove and error-branch implementation by adding managed verision of delayed work initialization. This will also help drivers to avoid mixing manual and devm based unwinding when other resources are handled by devm. Reviewed-by: Hans de Goede Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/51769ea4668198deb798fe47fcfb5f5288d61586.1616506559.git.matti.vaittinen@fi.rohmeurope.com Signed-off-by: Greg Kroah-Hartman --- include/linux/devm-helpers.h | 53 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 include/linux/devm-helpers.h (limited to 'include/linux') diff --git a/include/linux/devm-helpers.h b/include/linux/devm-helpers.h new file mode 100644 index 000000000000..f64e0c9f3763 --- /dev/null +++ b/include/linux/devm-helpers.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __LINUX_DEVM_HELPERS_H +#define __LINUX_DEVM_HELPERS_H + +/* + * Functions which do automatically cancel operations or release resources upon + * driver detach. + * + * These should be helpful to avoid mixing the manual and devm-based resource + * management which can be source of annoying, rarely occurring, + * hard-to-reproduce bugs. + * + * Please take into account that devm based cancellation may be performed some + * time after the remove() is ran. + * + * Thus mixing devm and manual resource management can easily cause problems + * when unwinding operations with dependencies. IRQ scheduling a work in a queue + * is typical example where IRQs are often devm-managed and WQs are manually + * cleaned at remove(). If IRQs are not manually freed at remove() (and this is + * often the case when we use devm for IRQs) we have a period of time after + * remove() - and before devm managed IRQs are freed - where new IRQ may fire + * and schedule a work item which won't be cancelled because remove() was + * already ran. + */ + +#include +#include + +static inline void devm_delayed_work_drop(void *res) +{ + cancel_delayed_work_sync(res); +} + +/** + * devm_delayed_work_autocancel - Resource-managed work allocation + * @dev: Device which lifetime work is bound to + * @pdata: work to be cancelled when driver is detached + * + * Initialize work which is automatically cancelled when driver is detached. + * A few drivers need delayed work which must be cancelled before driver + * is detached to avoid accessing removed resources. + * devm_delayed_work_autocancel() can be used to omit the explicit + * cancelleation when driver is detached. + */ +static inline int devm_delayed_work_autocancel(struct device *dev, + struct delayed_work *w, + work_func_t worker) +{ + INIT_DELAYED_WORK(w, worker); + return devm_add_action(dev, devm_delayed_work_drop, w); +} + +#endif -- cgit v1.2.3 From f2cc020d7876de7583feb52ec939a32419cf9468 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Mar 2021 18:49:35 +0100 Subject: tracing: Fix various typos in comments Fix ~59 single-word typos in the tracing code comments, and fix the grammar in a handful of places. Link: https://lore.kernel.org/r/20210322224546.GA1981273@gmail.com Link: https://lkml.kernel.org/r/20210323174935.GA4176821@gmail.com Reviewed-by: Randy Dunlap Signed-off-by: Ingo Molnar Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 4 ++-- include/linux/trace_events.h | 2 +- include/linux/tracepoint.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 86e5028bfa20..a69f363b61bf 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -33,7 +33,7 @@ /* * If the arch's mcount caller does not support all of ftrace's * features, then it must call an indirect function that - * does. Or at least does enough to prevent any unwelcomed side effects. + * does. Or at least does enough to prevent any unwelcome side effects. */ #if !ARCH_SUPPORTS_FTRACE_OPS # define FTRACE_FORCE_LIST_FUNC 1 @@ -389,7 +389,7 @@ DECLARE_PER_CPU(int, disable_stack_tracer); */ static inline void stack_tracer_disable(void) { - /* Preemption or interupts must be disabled */ + /* Preemption or interrupts must be disabled */ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); this_cpu_inc(disable_stack_tracer); diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 8cba64ce23a4..36e27c1f42e0 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -206,7 +206,7 @@ static inline unsigned int tracing_gen_ctx_dec(void) trace_ctx = tracing_gen_ctx(); /* - * Subtract one from the preeption counter if preemption is enabled, + * Subtract one from the preemption counter if preemption is enabled, * see trace_event_buffer_reserve()for details. */ if (IS_ENABLED(CONFIG_PREEMPTION)) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 9cfb099da58f..13f65420f188 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -465,7 +465,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * * * * The declared 'local variable' is called '__entry' * * - * * __field(pid_t, prev_prid) is equivalent to a standard declariton: + * * __field(pid_t, prev_prid) is equivalent to a standard declaration: * * * * pid_t prev_pid; * * -- cgit v1.2.3 From 39f985c8f667c80a3d1eb19d31138032fa36b09e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 20 Mar 2021 05:40:38 +0000 Subject: fs/cachefiles: Remove wait_bit_key layout dependency Cachefiles was relying on wait_page_key and wait_bit_key being the same layout, which is fragile. Now that wait_page_key is exposed in the pagemap.h header, we can remove that fragility A comment on the need to maintain structure layout equivalence was added by Linus[1] and that is no longer applicable. Fixes: 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Signed-off-by: David Howells Tested-by: kafs-testing@auristor.com cc: linux-cachefs@redhat.com cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/20210320054104.1300774-2-willy@infradead.org/ Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3510ca20ece0150af6b10c77a74ff1b5c198e3e2 [1] --- include/linux/pagemap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 20225b067583..8f4daac6eb4b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -559,7 +559,6 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, return pgoff; } -/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */ struct wait_page_key { struct page *page; int bit_nr; -- cgit v1.2.3 From e5dbd33218bd8d87ab69f730ab90aed5fab7eb26 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 20 Mar 2021 05:40:39 +0000 Subject: mm/writeback: Add wait_on_page_writeback_killable This is the killable version of wait_on_page_writeback. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Signed-off-by: David Howells Tested-by: kafs-testing@auristor.com cc: linux-afs@lists.infradead.org cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/20210320054104.1300774-3-willy@infradead.org --- include/linux/pagemap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 8f4daac6eb4b..8c9947fd62f3 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -682,6 +682,7 @@ static inline int wait_on_page_locked_killable(struct page *page) int put_and_wait_on_page_locked(struct page *page, int state); void wait_on_page_writeback(struct page *page); +int wait_on_page_writeback_killable(struct page *page); extern void end_page_writeback(struct page *page); void wait_for_stable_page(struct page *page); -- cgit v1.2.3 From c0e715bbd50e57319f76d0b757dc282893f2d476 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 23 Mar 2021 01:51:42 +0200 Subject: net: bridge: add helper for retrieving the current bridge port STP state It may happen that we have the following topology with DSA or any other switchdev driver with LAG offload: ip link add br0 type bridge stp_state 1 ip link add bond0 type bond ip link set bond0 master br0 ip link set swp0 master bond0 ip link set swp1 master bond0 STP decides that it should put bond0 into the BLOCKING state, and that's that. The ports that are actively listening for the switchdev port attributes emitted for the bond0 bridge port (because they are offloading it) and have the honor of seeing that switchdev port attribute can react to it, so we can program swp0 and swp1 into the BLOCKING state. But if then we do: ip link set swp2 master bond0 then as far as the bridge is concerned, nothing has changed: it still has one bridge port. But this new bridge port will not see any STP state change notification and will remain FORWARDING, which is how the standalone code leaves it in. We need a function in the bridge driver which retrieves the current STP state, such that drivers can synchronize to it when they may have missed switchdev events. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Reviewed-by: Tobias Waldekranz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index b979005ea39c..920d3a02cc68 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -136,6 +136,7 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev, __u16 vid); void br_fdb_clear_offload(const struct net_device *dev, u16 vid); bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag); +u8 br_port_get_stp_state(const struct net_device *dev); #else static inline struct net_device * br_fdb_find_port(const struct net_device *br_dev, @@ -154,6 +155,11 @@ br_port_flag_is_set(const struct net_device *dev, unsigned long flag) { return false; } + +static inline u8 br_port_get_stp_state(const struct net_device *dev) +{ + return BR_STATE_DISABLED; +} #endif #endif -- cgit v1.2.3 From f1d42ea10056b9050d1c5b8e19995f66c30aeded Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 23 Mar 2021 01:51:43 +0200 Subject: net: bridge: add helper to retrieve the current ageing time The SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME attribute is only emitted from: sysfs/ioctl/netlink -> br_set_ageing_time -> __set_ageing_time therefore not at bridge port creation time, so: (a) switchdev drivers have to hardcode the initial value for the address ageing time, because they didn't get any notification (b) that hardcoded value can be out of sync, if the user changes the ageing time before enslaving the port to the bridge We need a helper in the bridge, such that switchdev drivers can query the current value of the bridge ageing time when they start offloading it. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Reviewed-by: Tobias Waldekranz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 920d3a02cc68..ebd16495459c 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -137,6 +137,7 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev, void br_fdb_clear_offload(const struct net_device *dev, u16 vid); bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag); u8 br_port_get_stp_state(const struct net_device *dev); +clock_t br_get_ageing_time(struct net_device *br_dev); #else static inline struct net_device * br_fdb_find_port(const struct net_device *br_dev, @@ -160,6 +161,11 @@ static inline u8 br_port_get_stp_state(const struct net_device *dev) { return BR_STATE_DISABLED; } + +static inline clock_t br_get_ageing_time(struct net_device *br_dev) +{ + return 0; +} #endif #endif -- cgit v1.2.3 From 4f2673b3a2b6246729a1ff13b8945a040839dbd3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 23 Mar 2021 01:51:44 +0200 Subject: net: bridge: add helper to replay port and host-joined mdb entries I have a system with DSA ports, and udhcpcd is configured to bring interfaces up as soon as they are created. I create a bridge as follows: ip link add br0 type bridge As soon as I create the bridge and udhcpcd brings it up, I also have avahi which automatically starts sending IPv6 packets to advertise some local services, and because of that, the br0 bridge joins the following IPv6 groups due to the code path detailed below: 33:33:ff:6d:c1:9c vid 0 33:33:00:00:00:6a vid 0 33:33:00:00:00:fb vid 0 br_dev_xmit -> br_multicast_rcv -> br_ip6_multicast_add_group -> __br_multicast_add_group -> br_multicast_host_join -> br_mdb_notify This is all fine, but inside br_mdb_notify we have br_mdb_switchdev_host hooked up, and switchdev will attempt to offload the host joined groups to an empty list of ports. Of course nobody offloads them. Then when we add a port to br0: ip link set swp0 master br0 the bridge doesn't replay the host-joined MDB entries from br_add_if, and eventually the host joined addresses expire, and a switchdev notification for deleting it is emitted, but surprise, the original addition was already completely missed. The strategy to address this problem is to replay the MDB entries (both the port ones and the host joined ones) when the new port joins the bridge, similar to what vxlan_fdb_replay does (in that case, its FDB can be populated and only then attached to a bridge that you offload). However there are 2 possibilities: the addresses can be 'pushed' by the bridge into the port, or the port can 'pull' them from the bridge. Considering that in the general case, the new port can be really late to the party, and there may have been many other switchdev ports that already received the initial notification, we would like to avoid delivering duplicate events to them, since they might misbehave. And currently, the bridge calls the entire switchdev notifier chain, whereas for replaying it should just call the notifier block of the new guy. But the bridge doesn't know what is the new guy's notifier block, it just knows where the switchdev notifier chain is. So for simplification, we make this a driver-initiated pull for now, and the notifier block is passed as an argument. To emulate the calling context for mdb objects (deferred and put on the blocking notifier chain), we must iterate under RCU protection through the bridge's mdb entries, queue them, and only call them once we're out of the RCU read-side critical section. There was some opportunity for reuse between br_mdb_switchdev_host_port, br_mdb_notify and the newly added br_mdb_queue_one in how the switchdev mdb object is created, so a helper was created. Suggested-by: Ido Schimmel Signed-off-by: Vladimir Oltean Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index ebd16495459c..f6472969bb44 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -69,6 +69,8 @@ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto); bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto); bool br_multicast_enabled(const struct net_device *dev); bool br_multicast_router(const struct net_device *dev); +int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, + struct notifier_block *nb, struct netlink_ext_ack *extack); #else static inline int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list) @@ -93,6 +95,13 @@ static inline bool br_multicast_router(const struct net_device *dev) { return false; } +static inline int br_mdb_replay(struct net_device *br_dev, + struct net_device *dev, + struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} #endif #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING) -- cgit v1.2.3 From 04846f903b53b32d29453e865646309db29f255a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 23 Mar 2021 01:51:45 +0200 Subject: net: bridge: add helper to replay port and local fdb entries When a switchdev port starts offloading a LAG that is already in a bridge and has an FDB entry pointing to it: ip link set bond0 master br0 bridge fdb add dev bond0 00:01:02:03:04:05 master static ip link set swp0 master bond0 the switchdev driver will have no idea that this FDB entry is there, because it missed the switchdev event emitted at its creation. Ido Schimmel pointed this out during a discussion about challenges with switchdev offloading of stacked interfaces between the physical port and the bridge, and recommended to just catch that condition and deny the CHANGEUPPER event: https://lore.kernel.org/netdev/20210210105949.GB287766@shredder.lan/ But in fact, we might need to deal with the hard thing anyway, which is to replay all FDB addresses relevant to this port, because it isn't just static FDB entries, but also local addresses (ones that are not forwarded but terminated by the bridge). There, we can't just say 'oh yeah, there was an upper already so I'm not joining that'. So, similar to the logic for replaying MDB entries, add a function that must be called by individual switchdev drivers and replays local FDB entries as well as ones pointing towards a bridge port. This time, we use the atomic switchdev notifier block, since that's what FDB entries expect for some reason. Reported-by: Ido Schimmel Signed-off-by: Vladimir Oltean Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index f6472969bb44..b564c4486a45 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -147,6 +147,8 @@ void br_fdb_clear_offload(const struct net_device *dev, u16 vid); bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag); u8 br_port_get_stp_state(const struct net_device *dev); clock_t br_get_ageing_time(struct net_device *br_dev); +int br_fdb_replay(struct net_device *br_dev, struct net_device *dev, + struct notifier_block *nb); #else static inline struct net_device * br_fdb_find_port(const struct net_device *br_dev, @@ -175,6 +177,13 @@ static inline clock_t br_get_ageing_time(struct net_device *br_dev) { return 0; } + +static inline int br_fdb_replay(struct net_device *br_dev, + struct net_device *dev, + struct notifier_block *nb) +{ + return -EOPNOTSUPP; +} #endif #endif -- cgit v1.2.3 From 22f67cdfae6aaa7e841ced17207391fb368c8e9e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 23 Mar 2021 01:51:46 +0200 Subject: net: bridge: add helper to replay VLANs installed on port Currently this simple setup with DSA: ip link add br0 type bridge vlan_filtering 1 ip link add bond0 type bond ip link set bond0 master br0 ip link set swp0 master bond0 will not work because the bridge has created the PVID in br_add_if -> nbp_vlan_init, and it has notified switchdev of the existence of VLAN 1, but that was too early, since swp0 was not yet a lower of bond0, so it had no reason to act upon that notification. We need a helper in the bridge to replay the switchdev VLAN objects that were notified since the bridge port creation, because some of them may have been missed. As opposed to the br_mdb_replay function, the vg->vlan_list write side protection is offered by the rtnl_mutex which is sleepable, so we don't need to queue up the objects in atomic context, we can replay them right away. Signed-off-by: Vladimir Oltean Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index b564c4486a45..2cc35038a8ca 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -111,6 +111,8 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid); int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto); int br_vlan_get_info(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo); +int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, + struct notifier_block *nb, struct netlink_ext_ack *extack); #else static inline bool br_vlan_enabled(const struct net_device *dev) { @@ -137,6 +139,14 @@ static inline int br_vlan_get_info(const struct net_device *dev, u16 vid, { return -EINVAL; } + +static inline int br_vlan_replay(struct net_device *br_dev, + struct net_device *dev, + struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} #endif #if IS_ENABLED(CONFIG_BRIDGE) -- cgit v1.2.3 From 5aa3afe107d9099fc0dea2acf82c3e3c8f0f20e2 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 23 Mar 2021 07:49:23 +0100 Subject: net: make unregister netdev warning timeout configurable netdev_wait_allrefs() issues a warning if refcount does not drop to 0 after 10 seconds. While 10 second wait generally should not happen under normal workload in normal environment, it seems to fire falsely very often during fuzzing and/or in qemu emulation (~10x slower). At least it's not possible to understand if it's really a false positive or not. Automated testing generally bumps all timeouts to very high values to avoid flake failures. Add net.core.netdev_unregister_timeout_secs sysctl to make the timeout configurable for automated testing systems. Lowering the timeout may also be useful for e.g. manual bisection. The default value matches the current behavior. Signed-off-by: Dmitry Vyukov Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=211877 Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7005ad80e8d1..5fa66db0cb5d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4661,6 +4661,7 @@ void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; +extern int netdev_unregister_timeout_secs; extern int weight_p; extern int dev_weight_rx_bias; extern int dev_weight_tx_bias; -- cgit v1.2.3 From ee89646619ba07d054348c0240da4b953cd1e72f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 18 Mar 2021 15:03:18 +0200 Subject: pps: clients: gpio: Get rid of legacy platform data Platform data is a legacy interface to supply device properties to the driver. In this case we even don't have in-kernel users for it. Just remove it for good. Acked-by: Rodolfo Giometti Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210318130321.24227-4-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/pps-gpio.h | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 include/linux/pps-gpio.h (limited to 'include/linux') diff --git a/include/linux/pps-gpio.h b/include/linux/pps-gpio.h deleted file mode 100644 index 7bf49908be06..000000000000 --- a/include/linux/pps-gpio.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * pps-gpio.h -- PPS client for GPIOs - * - * Copyright (C) 2011 James Nuss - */ - -#ifndef _PPS_GPIO_H -#define _PPS_GPIO_H - -struct pps_gpio_platform_data { - struct gpio_desc *gpio_pin; - struct gpio_desc *echo_pin; - bool assert_falling_edge; - bool capture_clear; - unsigned int echo_active_ms; -}; - -#endif /* _PPS_GPIO_H */ -- cgit v1.2.3 From 6bbdc3db76ccc6d9ff1c3d6ad36c8ae5bd67ee1f Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Mon, 22 Mar 2021 05:01:08 +0530 Subject: hv: hyperv.h: a few mundane typo fixes s/sructure/structure/ s/extention/extension/ s/offerred/offered/ s/adversley/adversely/ Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20210321233108.3885240-1-unixbhaskar@gmail.com Signed-off-by: Wei Liu --- include/linux/hyperv.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index f1d74dcf0353..2c18c8e768ef 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -284,7 +284,7 @@ struct vmbus_channel_offer { /* * Pipes: - * The following sructure is an integrated pipe protocol, which + * The following structure is an integrated pipe protocol, which * is implemented on top of standard user-defined data. Pipe * clients have MAX_PIPE_USER_DEFINED_BYTES left for their own * use. @@ -883,11 +883,11 @@ struct vmbus_channel { * Support for sub-channels. For high performance devices, * it will be useful to have multiple sub-channels to support * a scalable communication infrastructure with the host. - * The support for sub-channels is implemented as an extention + * The support for sub-channels is implemented as an extension * to the current infrastructure. * The initial offer is considered the primary channel and this * offer message will indicate if the host supports sub-channels. - * The guest is free to ask for sub-channels to be offerred and can + * The guest is free to ask for sub-channels to be offered and can * open these sub-channels as a normal "primary" channel. However, * all sub-channels will have the same type and instance guids as the * primary channel. Requests sent on a given channel will result in a @@ -951,7 +951,7 @@ struct vmbus_channel { * Clearly, these optimizations improve throughput at the expense of * latency. Furthermore, since the channel is shared for both * control and data messages, control messages currently suffer - * unnecessary latency adversley impacting performance and boot + * unnecessary latency adversely impacting performance and boot * time. To fix this issue, permit tagging the channel as being * in "low latency" mode. In this mode, we will bypass the monitor * mechanism. -- cgit v1.2.3 From 54443ef6f5d10d9c6bb17f1dbeea7eb8d5c9a839 Mon Sep 17 00:00:00 2001 From: JC Kuo Date: Wed, 20 Jan 2021 15:34:01 +0800 Subject: clk: tegra: Add PLLE HW power sequencer control PLLE has a hardware power sequencer logic which is a state machine that can power on/off PLLE without any software intervention. The sequencer has two inputs, one from XUSB UPHY PLL and the other from SATA UPHY PLL. PLLE provides reference clock to XUSB and SATA UPHY PLLs. When both of the downstream PLLs are powered-off, PLLE hardware power sequencer will automatically power off PLLE for power saving. XUSB and SATA UPHY PLLs also have their own hardware power sequencer logic. XUSB UPHY PLL is shared between XUSB SuperSpeed ports and PCIE controllers. The XUSB UPHY PLL hardware power sequencer has inputs from XUSB and PCIE. When all of the XUSB SuperSpeed ports and PCIE controllers are in low power state, XUSB UPHY PLL hardware power sequencer automatically power off PLL and flags idle to PLLE hardware power sequencer. Similar applies to SATA UPHY PLL. PLLE hardware power sequencer has to be enabled after both downstream sequencers are enabled. This commit adds two helper functions: 1. tegra210_plle_hw_sequence_start() for XUSB PADCTL driver to enable PLLE hardware sequencer at proper time. 2. tegra210_plle_hw_sequence_is_enabled() for XUSB PADCTL driver to check whether PLLE hardware sequencer has been enabled or not. Signed-off-by: JC Kuo Acked-by: Thierry Reding Acked-by: Stephen Boyd Signed-off-by: Thierry Reding --- include/linux/clk/tegra.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h index eb016fc9cc0b..f7ff722a03dd 100644 --- a/include/linux/clk/tegra.h +++ b/include/linux/clk/tegra.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2012-2020, NVIDIA CORPORATION. All rights reserved. */ #ifndef __LINUX_CLK_TEGRA_H_ @@ -123,6 +123,8 @@ static inline void tegra_cpu_clock_resume(void) } #endif +extern int tegra210_plle_hw_sequence_start(void); +extern bool tegra210_plle_hw_sequence_is_enabled(void); extern void tegra210_xusb_pll_hw_control_enable(void); extern void tegra210_xusb_pll_hw_sequence_start(void); extern void tegra210_sata_pll_hw_control_enable(void); -- cgit v1.2.3 From ddb94eafab8b597b05904c8277194ea2d6357fa9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 24 Mar 2021 02:30:32 +0100 Subject: net: resolve forwarding path from virtual netdevice and HW destination address This patch adds dev_fill_forward_path() which resolves the path to reach the real netdevice from the IP forwarding side. This function takes as input the netdevice and the destination hardware address and it walks down the devices calling .ndo_fill_forward_path() for each device until the real device is found. For instance, assuming the following topology: IP forwarding / \ br0 eth0 / \ eth1 eth2 . . . ethX ab:cd:ef:ab:cd:ef where eth1 and eth2 are bridge ports and eth0 provides WAN connectivity. ethX is the interface in another box which is connected to the eth1 bridge port. For packets going through IP forwarding to br0 whose destination MAC address is ab:cd:ef:ab:cd:ef, dev_fill_forward_path() provides the following path: br0 -> eth1 .ndo_fill_forward_path for br0 looks up at the FDB for the bridge port from the destination MAC address to get the bridge port eth1. This information allows to create a fast path that bypasses the classic bridge and IP forwarding paths, so packets go directly from the bridge port eth1 to eth0 (wan interface) and vice versa. fast path .------------------------. / \ | IP forwarding | | / \ \/ | br0 eth0 . / \ -> eth1 eth2 . . . ethX ab:cd:ef:ab:cd:ef Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netdevice.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5fa66db0cb5d..03cff88c7292 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -848,6 +848,27 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); +enum net_device_path_type { + DEV_PATH_ETHERNET = 0, +}; + +struct net_device_path { + enum net_device_path_type type; + const struct net_device *dev; +}; + +#define NET_DEVICE_PATH_STACK_MAX 5 + +struct net_device_path_stack { + int num_paths; + struct net_device_path path[NET_DEVICE_PATH_STACK_MAX]; +}; + +struct net_device_path_ctx { + const struct net_device *dev; + const u8 *daddr; +}; + enum tc_setup_type { TC_SETUP_QDISC_MQPRIO, TC_SETUP_CLSU32, @@ -1282,6 +1303,8 @@ struct netdev_net_notifier { * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev); * If a device is paired with a peer device, return the peer instance. * The caller must be under RCU read context. + * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path); + * Get the forwarding path to reach the real device from the HW destination address */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1488,6 +1511,8 @@ struct net_device_ops { int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); struct net_device * (*ndo_get_peer_dev)(struct net_device *dev); + int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, + struct net_device_path *path); }; /** @@ -2870,6 +2895,8 @@ void dev_remove_offload(struct packet_offload *po); int dev_get_iflink(const struct net_device *dev); int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb); +int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, + struct net_device_path_stack *stack); struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); -- cgit v1.2.3 From e4417d6950b06fe6c520e937b337daff093220ff Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 24 Mar 2021 02:30:33 +0100 Subject: net: 8021q: resolve forwarding path for vlan devices Add .ndo_fill_forward_path for vlan devices. For instance, assuming the following topology: IP forwarding / \ eth0.100 eth0 | eth0 . . . ethX ab:cd:ef:ab:cd:ef For packets going through IP forwarding to eth0.100 whose destination MAC address is ab:cd:ef:ab:cd:ef, dev_fill_forward_path() provides the following path: eth0.100 -> eth0 Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 03cff88c7292..8823a56744f1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -850,11 +850,18 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, enum net_device_path_type { DEV_PATH_ETHERNET = 0, + DEV_PATH_VLAN, }; struct net_device_path { enum net_device_path_type type; const struct net_device *dev; + union { + struct { + u16 id; + __be16 proto; + } encap; + }; }; #define NET_DEVICE_PATH_STACK_MAX 5 -- cgit v1.2.3 From ec9d16bab615ceda8ac22a7b4d2c7601bbe172cb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 24 Mar 2021 02:30:34 +0100 Subject: net: bridge: resolve forwarding path for bridge devices Add .ndo_fill_forward_path for bridge devices. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8823a56744f1..a24270b0d200 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -851,6 +851,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, enum net_device_path_type { DEV_PATH_ETHERNET = 0, DEV_PATH_VLAN, + DEV_PATH_BRIDGE, }; struct net_device_path { -- cgit v1.2.3 From bcf2766b1377421b7c9259865b25c1b62a7fa686 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 24 Mar 2021 02:30:35 +0100 Subject: net: bridge: resolve forwarding path for VLAN tag actions in bridge devices Depending on the VLAN settings of the bridge and the port, the bridge can either add or remove a tag. When vlan filtering is enabled, the fdb lookup also needs to know the VLAN tag/proto for the destination address To provide this, keep track of the stack of VLAN tags for the path in the lookup context Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netdevice.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a24270b0d200..344d9c0c9b22 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -862,10 +862,20 @@ struct net_device_path { u16 id; __be16 proto; } encap; + struct { + enum { + DEV_PATH_BR_VLAN_KEEP, + DEV_PATH_BR_VLAN_TAG, + DEV_PATH_BR_VLAN_UNTAG, + } vlan_mode; + u16 vlan_id; + __be16 vlan_proto; + } bridge; }; }; #define NET_DEVICE_PATH_STACK_MAX 5 +#define NET_DEVICE_PATH_VLAN_MAX 2 struct net_device_path_stack { int num_paths; @@ -875,6 +885,12 @@ struct net_device_path_stack { struct net_device_path_ctx { const struct net_device *dev; const u8 *daddr; + + int num_vlans; + struct { + u16 id; + __be16 proto; + } vlan[NET_DEVICE_PATH_VLAN_MAX]; }; enum tc_setup_type { -- cgit v1.2.3 From f6efc675c9dd8d93f826b79ae7e33e03301db609 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 24 Mar 2021 02:30:36 +0100 Subject: net: ppp: resolve forwarding path for bridge pppoe devices Pass on the PPPoE session ID, destination hardware address and the real device. Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ include/linux/ppp_channel.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 344d9c0c9b22..dd54f7cc3f12 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -852,6 +852,7 @@ enum net_device_path_type { DEV_PATH_ETHERNET = 0, DEV_PATH_VLAN, DEV_PATH_BRIDGE, + DEV_PATH_PPPOE, }; struct net_device_path { @@ -861,6 +862,7 @@ struct net_device_path { struct { u16 id; __be16 proto; + u8 h_dest[ETH_ALEN]; } encap; struct { enum { diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h index 98966064ee68..91f9a928344e 100644 --- a/include/linux/ppp_channel.h +++ b/include/linux/ppp_channel.h @@ -28,6 +28,9 @@ struct ppp_channel_ops { int (*start_xmit)(struct ppp_channel *, struct sk_buff *); /* Handle an ioctl call that has come in via /dev/ppp. */ int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long); + int (*fill_forward_path)(struct net_device_path_ctx *, + struct net_device_path *, + const struct ppp_channel *); }; struct ppp_channel { -- cgit v1.2.3 From 0994d492a1b78dff96671ccf6ad8294cc2bd909e Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 24 Mar 2021 02:30:37 +0100 Subject: net: dsa: resolve forwarding path for dsa slave ports Add .ndo_fill_forward_path for dsa slave port devices Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dd54f7cc3f12..90db74132090 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -853,6 +853,7 @@ enum net_device_path_type { DEV_PATH_VLAN, DEV_PATH_BRIDGE, DEV_PATH_PPPOE, + DEV_PATH_DSA, }; struct net_device_path { @@ -873,6 +874,10 @@ struct net_device_path { u16 vlan_id; __be16 vlan_proto; } bridge; + struct { + int port; + u16 proto; + } dsa; }; }; -- cgit v1.2.3 From 26267bf9bb57d504c785d8659adc8e02b6629c95 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 24 Mar 2021 02:30:48 +0100 Subject: netfilter: flowtable: bridge vlan hardware offload and switchdev The switch might have already added the VLAN tag through PVID hardware offload. Keep this extra VLAN in the flowtable but skip it on egress. Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 90db74132090..02fa1da8cd22 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -870,6 +870,7 @@ struct net_device_path { DEV_PATH_BR_VLAN_KEEP, DEV_PATH_BR_VLAN_TAG, DEV_PATH_BR_VLAN_UNTAG, + DEV_PATH_BR_VLAN_UNTAG_HW, } vlan_mode; u16 vlan_id; __be16 vlan_proto; -- cgit v1.2.3 From b0b8b689d78c9666a991e1cc87c3dad4c261007f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Mar 2021 11:56:24 +0000 Subject: genirq: Allow architectures to override set_handle_irq() fallback Some architectures want to provide the generic set_handle_irq() API, but for structural reasons need to provide their own implementation. For example, arm64 needs to do this to provide uniform set_handle_irq() and set_handle_fiq() registration functions. Make this possible by allowing architectures to provide their own implementation of set_handle_irq when CONFIG_GENERIC_IRQ_MULTI_HANDLER is not selected. Signed-off-by: Marc Zyngier [Mark: expand commit message] Signed-off-by: Mark Rutland Tested-by: Hector Martin Cc: James Morse Cc: Thomas Gleixner Cc: Will Deacon Link: https://lore.kernel.org/r/20210315115629.57191-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- include/linux/irq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 2efde6a79b7e..9890180b84fd 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1258,11 +1258,13 @@ int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)); */ extern void (*handle_arch_irq)(struct pt_regs *) __ro_after_init; #else +#ifndef set_handle_irq #define set_handle_irq(handle_irq) \ do { \ (void)handle_irq; \ WARN_ON(1); \ } while (0) #endif +#endif #endif /* _LINUX_IRQ_H */ -- cgit v1.2.3 From 341f67e424e572bfc034daa534c6fa667533e6a4 Mon Sep 17 00:00:00 2001 From: Tan Tee Min Date: Tue, 23 Mar 2021 19:07:34 +0800 Subject: net: stmmac: Add hardware supported cross-timestamp Cross timestamping is supported on Integrated Ethernet Controller in Intel SoC such as EHL and TGL with Always Running Timer. The hardware cross-timestamp result is made available to applications through the PTP_SYS_OFFSET_PRECISE ioctl which calls stmmac_getcrosststamp(). Device time is stored in the MAC Auxiliary register. The 64-bit System time (ART timestamp) is stored in registers that are only addressable by using MDIO space. Signed-off-by: Tan Tee Min Co-developed-by: Wong Vee Khee Signed-off-by: Wong Vee Khee Signed-off-by: David S. Miller --- include/linux/stmmac.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 10abc80b601e..5134e802f39a 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -186,6 +186,8 @@ struct plat_stmmacenet_data { void (*exit)(struct platform_device *pdev, void *priv); struct mac_device_info *(*setup)(void *priv); int (*clks_config)(void *priv, bool enabled); + int (*crosststamp)(ktime_t *device, struct system_counterval_t *system, + void *ctx); void *bsp_priv; struct clk *stmmac_clk; struct clk *pclk; @@ -206,5 +208,7 @@ struct plat_stmmacenet_data { u8 vlan_fail_q; unsigned int eee_usecs_rate; struct pci_dev *pdev; + bool has_crossts; + int int_snapshot_num; }; #endif -- cgit v1.2.3 From 0ef25ed104ac17fa0586fbb076f24a5e8940b966 Mon Sep 17 00:00:00 2001 From: Wong Vee Khee Date: Wed, 24 Mar 2021 00:46:40 +0800 Subject: net: phy: add genphy_c45_loopback Add generic code to enable C45 PHY loopback into the common phy-c45.c file. This will allow C45 PHY drivers aceess this by setting .set_loopback. Suggested-by: Heiner Kallweit Signed-off-by: Wong Vee Khee Reviewed-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 1a12e4436b5b..8e2cf84b2318 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1532,6 +1532,7 @@ int genphy_c45_read_mdix(struct phy_device *phydev); int genphy_c45_pma_read_abilities(struct phy_device *phydev); int genphy_c45_read_status(struct phy_device *phydev); int genphy_c45_config_aneg(struct phy_device *phydev); +int genphy_c45_loopback(struct phy_device *phydev, bool enable); /* Generic C45 PHY driver */ extern struct phy_driver genphy_c45_driver; -- cgit v1.2.3 From 0909fc2b2c41aae50a18a36ac2858d156f521871 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 25 Feb 2021 17:36:06 -0800 Subject: rcu: Provide polling interfaces for Tiny RCU grace periods There is a need for a non-blocking polling interface for RCU grace periods, so this commit supplies start_poll_synchronize_rcu() and poll_state_synchronize_rcu() for this purpose. Note that the existing get_state_synchronize_rcu() may be used if future grace periods are inevitable (perhaps due to a later call_rcu() invocation). The new start_poll_synchronize_rcu() is to be used if future grace periods might not otherwise happen. Finally, poll_state_synchronize_rcu() provides a lockless check for a grace period having elapsed since the corresponding call to either of the get_state_synchronize_rcu() or start_poll_synchronize_rcu(). As with get_state_synchronize_rcu(), the return value from either get_state_synchronize_rcu() or start_poll_synchronize_rcu() is passed in to a later call to either poll_state_synchronize_rcu() or the existing (might_sleep) cond_synchronize_rcu(). [ paulmck: Revert cond_synchronize_rcu() to might_sleep() per Frederic Weisbecker feedback. ] Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 2a97334eb786..35e0be326ffc 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -17,10 +17,9 @@ /* Never flag non-existent other CPUs! */ static inline bool rcu_eqs_special_set(int cpu) { return false; } -static inline unsigned long get_state_synchronize_rcu(void) -{ - return 0; -} +unsigned long get_state_synchronize_rcu(void); +unsigned long start_poll_synchronize_rcu(void); +bool poll_state_synchronize_rcu(unsigned long oldstate); static inline void cond_synchronize_rcu(unsigned long oldstate) { -- cgit v1.2.3 From 5a5586112b929546e16029261a987c9197bfdfa2 Mon Sep 17 00:00:00 2001 From: Ong Boon Leong Date: Wed, 24 Mar 2021 17:07:42 +0800 Subject: net: stmmac: support FPE link partner hand-shaking procedure In order to discover whether remote station supports frame preemption, local station sends verify mPacket and expects response mPacket in return from the remote station. So, we add the functions to send and handle event when verify mPacket and response mPacket are exchanged between the networked stations. The mechanism to handle different FPE states between local and remote station (link partner) is implemented using workqueue which starts a task each time there is some sign of verify & response mPacket exchange as check in FPE IRQ event. The task retries couple of times to try to spot the states that both stations are ready to enter FPE ON. This allows different end points to enable FPE at different time and verify-response mPacket can happen asynchronously. Ultimately, the task will only turn FPE ON when local station have both exchange response in both directions. Thanks to Voon Weifeng for implementing the core functions for detecting FPE events and send mPacket and phylink related change. Signed-off-by: Ong Boon Leong Co-developed-by: Voon Weifeng Signed-off-by: Voon Weifeng Co-developed-by: Tan Tee Min Signed-off-by: Tan Tee Min Co-developed-by: Mohammad Athari Bin Ismail Signed-off-by: Mohammad Athari Bin Ismail Signed-off-by: David S. Miller --- include/linux/stmmac.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 5134e802f39a..febdb43d27e5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -144,6 +144,32 @@ struct stmmac_txq_cfg { int tbs_en; }; +/* FPE link state */ +enum stmmac_fpe_state { + FPE_STATE_OFF = 0, + FPE_STATE_CAPABLE = 1, + FPE_STATE_ENTERING_ON = 2, + FPE_STATE_ON = 3, +}; + +/* FPE link-partner hand-shaking mPacket type */ +enum stmmac_mpacket_type { + MPACKET_VERIFY = 0, + MPACKET_RESPONSE = 1, +}; + +enum stmmac_fpe_task_state_t { + __FPE_REMOVING, + __FPE_TASK_SCHED, +}; + +struct stmmac_fpe_cfg { + bool enable; /* FPE enable */ + bool hs_enable; /* FPE handshake enable */ + enum stmmac_fpe_state lp_fpe_state; /* Link Partner FPE state */ + enum stmmac_fpe_state lo_fpe_state; /* Local station FPE state */ +}; + struct plat_stmmacenet_data { int bus_id; int phy_addr; @@ -155,6 +181,7 @@ struct plat_stmmacenet_data { struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; struct stmmac_est *est; + struct stmmac_fpe_cfg *fpe_cfg; int clk_csr; int has_gmac; int enh_desc; -- cgit v1.2.3 From 8af856d18bfbe89676ade38caa2a5d06f75f211d Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Wed, 24 Mar 2021 13:40:40 +0800 Subject: locking/mutex: Remove repeated declaration Commit 0cd39f4600ed ("locking/seqlock, headers: Untangle the spaghetti monster") introduces 'struct ww_acquire_ctx' again, remove the repeated declaration and move the pre-declarations to the top. Signed-off-by: Shaokun Zhang Signed-off-by: Ingo Molnar Acked-by: Waiman Long Link: https://lore.kernel.org/r/1616564440-61318-1-git-send-email-zhangshaokun@hisilicon.com --- include/linux/mutex.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 0cd631a19727..e7a126796937 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -20,6 +20,7 @@ #include #include +struct ww_class; struct ww_acquire_ctx; /* @@ -65,9 +66,6 @@ struct mutex { #endif }; -struct ww_class; -struct ww_acquire_ctx; - struct ww_mutex { struct mutex base; struct ww_acquire_ctx *ctx; -- cgit v1.2.3 From 3bf30882c3c7b6e376d9d6d04082c9aa2d2ac30a Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 8 Mar 2021 14:50:35 +0800 Subject: drivers/perf: hisi: Add support for HiSilicon SLLC PMU driver HiSilicon's Hip09 is comprised by multi-dies that can be connected by SLLC module (Skyros Link Layer Controller), its has separate PMU registers which the driver can program it freely and interrupt is supported to handle counter overflow. Let's support its driver under the framework of HiSilicon uncore PMU driver. SLLC PMU supports the following filter functions: * tracetag_en: allows user to count data according to tt_req or tt_core set in L3C PMU. * srcid_cmd & srcid_msk: allows user to filter statistics that come from specific CCL/ICL by configuration source ID. * tgtid_hi & tgtid_lo: it also supports event statistics that these operations will go to the CCL/ICL by configuration target ID or target ID range. It's the same as source ID with 11-bit width in the SoC. More introduction is added in documentation: Documentation/admin-guide/perf/hisi-pmu.rst Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-8-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index f14adb882338..5f5ce676532f 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -175,6 +175,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, + CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, -- cgit v1.2.3 From a0ab25cd82eeb68bfa19a4d93a097521af5011b8 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 8 Mar 2021 14:50:36 +0800 Subject: drivers/perf: hisi: Add support for HiSilicon PA PMU driver On HiSilicon Hip09 platform, there is a PA (Protocol Adapter) module on each chip SICL (Super I/O Cluster) which incorporates three Hydra interface and facilitates the cache coherency between the dies on the chip. While PA uncore PMU model is the same as other Hip09 PMU modules and many PMU events are supported. Let's support the PMU driver using the HiSilicon uncore PMU framework. PA PMU supports the following filter functions: * tracetag_en: allows user to count events according to tt_req or tt_core set in L3C PMU. It's the same as other PMUs. * srcid_cmd & srcid_msk: allows user to filter statistics that come from specific CCL/ICL by configuration source ID. * tgtid_cmd & tgtid_msk: it is the similar function to srcid_cmd & srcid_msk. Both are used to check where the data comes from or go to. Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-9-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 5f5ce676532f..8ef744007e38 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -175,6 +175,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, + CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, -- cgit v1.2.3 From d85aecf2844ff02a0e5f077252b2461d4f10c9f0 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 24 Mar 2021 21:37:17 -0700 Subject: hugetlb_cgroup: fix imbalanced css_get and css_put pair for shared mappings The current implementation of hugetlb_cgroup for shared mappings could have different behavior. Consider the following two scenarios: 1.Assume initial css reference count of hugetlb_cgroup is 1: 1.1 Call hugetlb_reserve_pages with from = 1, to = 2. So css reference count is 2 associated with 1 file_region. 1.2 Call hugetlb_reserve_pages with from = 2, to = 3. So css reference count is 3 associated with 2 file_region. 1.3 coalesce_file_region will coalesce these two file_regions into one. So css reference count is 3 associated with 1 file_region now. 2.Assume initial css reference count of hugetlb_cgroup is 1 again: 2.1 Call hugetlb_reserve_pages with from = 1, to = 3. So css reference count is 2 associated with 1 file_region. Therefore, we might have one file_region while holding one or more css reference counts. This inconsistency could lead to imbalanced css_get() and css_put() pair. If we do css_put one by one (i.g. hole punch case), scenario 2 would put one more css reference. If we do css_put all together (i.g. truncate case), scenario 1 will leak one css reference. The imbalanced css_get() and css_put() pair would result in a non-zero reference when we try to destroy the hugetlb cgroup. The hugetlb cgroup directory is removed __but__ associated resource is not freed. This might result in OOM or can not create a new hugetlb cgroup in a busy workload ultimately. In order to fix this, we have to make sure that one file_region must hold exactly one css reference. So in coalesce_file_region case, we should release one css reference before coalescence. Also only put css reference when the entire file_region is removed. The last thing to note is that the caller of region_add() will only hold one reference to h_cg->css for the whole contiguous reservation region. But this area might be scattered when there are already some file_regions reside in it. As a result, many file_regions may share only one h_cg->css reference. In order to ensure that one file_region must hold exactly one css reference, we should do css_get() for each file_region and release the reference held by caller when they are done. [linmiaohe@huawei.com: fix imbalanced css_get and css_put pair for shared mappings] Link: https://lkml.kernel.org/r/20210316023002.53921-1-linmiaohe@huawei.com Link: https://lkml.kernel.org/r/20210301120540.37076-1-linmiaohe@huawei.com Fixes: 075a61d07a8e ("hugetlb_cgroup: add accounting for shared mappings") Reported-by: kernel test robot (auto build test ERROR) Signed-off-by: Miaohe Lin Reviewed-by: Mike Kravetz Cc: Aneesh Kumar K.V Cc: Wanpeng Li Cc: Mina Almasry Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb_cgroup.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 2ad6e92f124a..0bff345c4bc6 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -113,6 +113,11 @@ static inline bool hugetlb_cgroup_disabled(void) return !cgroup_subsys_enabled(hugetlb_cgrp_subsys); } +static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg) +{ + css_put(&h_cg->css); +} + extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr); extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, @@ -138,7 +143,8 @@ extern void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, struct file_region *rg, - unsigned long nr_pages); + unsigned long nr_pages, + bool region_del); extern void hugetlb_cgroup_file_init(void) __init; extern void hugetlb_cgroup_migrate(struct page *oldhpage, @@ -147,7 +153,8 @@ extern void hugetlb_cgroup_migrate(struct page *oldhpage, #else static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, struct file_region *rg, - unsigned long nr_pages) + unsigned long nr_pages, + bool region_del) { } @@ -185,6 +192,10 @@ static inline bool hugetlb_cgroup_disabled(void) return true; } +static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg) +{ +} + static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr) { -- cgit v1.2.3 From cf10bd4c4aff8dd64d1aa7f2a529d0c672bc16af Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 24 Mar 2021 21:37:20 -0700 Subject: kasan: fix per-page tags for non-page_alloc pages To allow performing tag checks on page_alloc addresses obtained via page_address(), tag-based KASAN modes store tags for page_alloc allocations in page->flags. Currently, the default tag value stored in page->flags is 0x00. Therefore, page_address() returns a 0x00ffff... address for pages that were not allocated via page_alloc. This might cause problems. A particular case we encountered is a conflict with KFENCE. If a KFENCE-allocated slab object is being freed via kfree(page_address(page) + offset), the address passed to kfree() will get tagged with 0x00 (as slab pages keep the default per-page tags). This leads to is_kfence_address() check failing, and a KFENCE object ending up in normal slab freelist, which causes memory corruptions. This patch changes the way KASAN stores tag in page-flags: they are now stored xor'ed with 0xff. This way, KASAN doesn't need to initialize per-page flags for every created page, which might be slow. With this change, page_address() returns natively-tagged (with 0xff) pointers for pages that didn't have tags set explicitly. This patch fixes the encountered conflict with KFENCE and prevents more similar issues that can occur in the future. Link: https://lkml.kernel.org/r/1a41abb11c51b264511d9e71c303bb16d5cb367b.1615475452.git.andreyknvl@google.com Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc") Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Cc: Catalin Marinas Cc: Will Deacon Cc: Vincenzo Frascino Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Peter Collingbourne Cc: Evgenii Stepanov Cc: Branislav Rankov Cc: Kevin Brodsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 64a71bf20536..8ba434287387 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1461,16 +1461,28 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) +/* + * KASAN per-page tags are stored xor'ed with 0xff. This allows to avoid + * setting tags for all pages to native kernel tag value 0xff, as the default + * value 0x00 maps to 0xff. + */ + static inline u8 page_kasan_tag(const struct page *page) { - if (kasan_enabled()) - return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; - return 0xff; + u8 tag = 0xff; + + if (kasan_enabled()) { + tag = (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; + tag ^= 0xff; + } + + return tag; } static inline void page_kasan_tag_set(struct page *page, u8 tag) { if (kasan_enabled()) { + tag ^= 0xff; page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; } -- cgit v1.2.3 From c2655835fd8cabdfe7dab737253de3ffb88da126 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 24 Mar 2021 21:37:23 -0700 Subject: mm/mmu_notifiers: ensure range_end() is paired with range_start() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If one or more notifiers fails .invalidate_range_start(), invoke .invalidate_range_end() for "all" notifiers. If there are multiple notifiers, those that did not fail are expecting _start() and _end() to be paired, e.g. KVM's mmu_notifier_count would become imbalanced. Disallow notifiers that can fail _start() from implementing _end() so that it's unnecessary to either track which notifiers rejected _start(), or had already succeeded prior to a failed _start(). Note, the existing behavior of calling _start() on all notifiers even after a previous notifier failed _start() was an unintented "feature". Make it canon now that the behavior is depended on for correctness. As of today, the bug is likely benign: 1. The only caller of the non-blocking notifier is OOM kill. 2. The only notifiers that can fail _start() are the i915 and Nouveau drivers. 3. The only notifiers that utilize _end() are the SGI UV GRU driver and KVM. 4. The GRU driver will never coincide with the i195/Nouveau drivers. 5. An imbalanced kvm->mmu_notifier_count only causes soft lockup in the _guest_, and the guest is already doomed due to being an OOM victim. Fix the bug now to play nice with future usage, e.g. KVM has a potential use case for blocking memslot updates in KVM while an invalidation is in-progress, and failure to unblock would result in said updates being blocked indefinitely and hanging. Found by inspection. Verified by adding a second notifier in KVM that periodically returns -EAGAIN on non-blockable ranges, triggering OOM, and observing that KVM exits with an elevated notifier count. Link: https://lkml.kernel.org/r/20210311180057.1582638-1-seanjc@google.com Fixes: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers") Signed-off-by: Sean Christopherson Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Cc: David Rientjes Cc: Ben Gardon Cc: Michal Hocko Cc: "Jérôme Glisse" Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Dimitri Sivanich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmu_notifier.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index b8200782dede..1a6a9eb6d3fa 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -169,11 +169,11 @@ struct mmu_notifier_ops { * the last refcount is dropped. * * If blockable argument is set to false then the callback cannot - * sleep and has to return with -EAGAIN. 0 should be returned - * otherwise. Please note that if invalidate_range_start approves - * a non-blocking behavior then the same applies to - * invalidate_range_end. - * + * sleep and has to return with -EAGAIN if sleeping would be required. + * 0 should be returned otherwise. Please note that notifiers that can + * fail invalidate_range_start are not allowed to implement + * invalidate_range_end, as there is no mechanism for informing the + * notifier that its start failed. */ int (*invalidate_range_start)(struct mmu_notifier *subscription, const struct mmu_notifier_range *range); -- cgit v1.2.3 From a024b7c2850dddd01e65b8270f0971deaf272f27 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 24 Mar 2021 21:37:50 -0700 Subject: mm: memblock: fix section mismatch warning again Commit 34dc2efb39a2 ("memblock: fix section mismatch warning") marked memblock_bottom_up() and memblock_set_bottom_up() as __init, but they could be referenced from non-init functions like memblock_find_in_range_node() on architectures that enable CONFIG_ARCH_KEEP_MEMBLOCK. For such builds kernel test robot reports: WARNING: modpost: vmlinux.o(.text+0x74fea4): Section mismatch in reference from the function memblock_find_in_range_node() to the function .init.text:memblock_bottom_up() The function memblock_find_in_range_node() references the function __init memblock_bottom_up(). This is often because memblock_find_in_range_node lacks a __init annotation or the annotation of memblock_bottom_up is wrong. Replace __init annotations with __init_memblock annotations so that the appropriate section will be selected depending on CONFIG_ARCH_KEEP_MEMBLOCK. Link: https://lore.kernel.org/lkml/202103160133.UzhgY0wt-lkp@intel.com Link: https://lkml.kernel.org/r/20210316171347.14084-1-rppt@kernel.org Fixes: 34dc2efb39a2 ("memblock: fix section mismatch warning") Signed-off-by: Mike Rapoport Reviewed-by: Arnd Bergmann Reported-by: kernel test robot Reviewed-by: David Hildenbrand Acked-by: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index d13e3cd938b4..5984fff3f175 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -460,7 +460,7 @@ static inline void memblock_free_late(phys_addr_t base, phys_addr_t size) /* * Set the allocation direction to bottom-up or top-down. */ -static inline __init void memblock_set_bottom_up(bool enable) +static inline __init_memblock void memblock_set_bottom_up(bool enable) { memblock.bottom_up = enable; } @@ -470,7 +470,7 @@ static inline __init void memblock_set_bottom_up(bool enable) * if this is true, that said, memblock will allocate memory * in bottom-up direction. */ -static inline __init bool memblock_bottom_up(void) +static inline __init_memblock bool memblock_bottom_up(void) { return memblock.bottom_up; } -- cgit v1.2.3 From 22f8b5df881e9f1302514bbbbbb8649c2051de55 Mon Sep 17 00:00:00 2001 From: Norbert Ciosek Date: Thu, 28 Jan 2021 10:17:02 -0800 Subject: virtchnl: Fix layout of RSS structures Remove padding from RSS structures. Previous layout could lead to unwanted compiler optimizations in loops when iterating over key and lut arrays. Fixes: 65ece6de0114 ("virtchnl: Add missing explicit padding to structures") Signed-off-by: Norbert Ciosek Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 40bad71865ea..532bcbfc4716 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -476,7 +476,6 @@ struct virtchnl_rss_key { u16 vsi_id; u16 key_len; u8 key[1]; /* RSS hash key, packed bytes */ - u8 pad[1]; }; VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_key); @@ -485,7 +484,6 @@ struct virtchnl_rss_lut { u16 vsi_id; u16 lut_entries; u8 lut[1]; /* RSS lookup table */ - u8 pad[1]; }; VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_lut); -- cgit v1.2.3 From 374be283ad429bf703d9036b799331dda793aeb7 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 10 Feb 2021 18:45:59 -0800 Subject: platform/chrome: cros_ec: Add SW_FRONT_PROXIMITY MKBP define Some cros ECs support a front proximity MKBP event via 'EC_MKBP_FRONT_PROXIMITY'. Add this define so it can be used in a future patch. Cc: Dmitry Torokhov Cc: Benson Leung Cc: Guenter Roeck Cc: Douglas Anderson Cc: Gwendal Grignou Acked-by: Enric Balletbo i Serra Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20210211024601.1963379-2-swboyd@chromium.org Signed-off-by: Jonathan Cameron --- include/linux/platform_data/cros_ec_commands.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 5ff8597ceabd..6035d9a98fb8 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -3467,6 +3467,7 @@ struct ec_response_get_next_event_v1 { #define EC_MKBP_LID_OPEN 0 #define EC_MKBP_TABLET_MODE 1 #define EC_MKBP_BASE_ATTACHED 2 +#define EC_MKBP_FRONT_PROXIMITY 3 /* Run keyboard factory test scanning */ #define EC_CMD_KEYBOARD_FACTORY_TEST 0x0068 -- cgit v1.2.3 From 995071d36bb9804b644265450142fcb91c427ee8 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 9 Mar 2021 11:36:13 -0800 Subject: iio: set default trig->dev.parent When allocated with [devm_]iio_trigger_alloc(), set trig device parent to the device the trigger is allocated for by default. It can always be reassigned in the probe routine. Change iio_trigger_alloc() API to add the device pointer to be coherent with devm_iio_trigger_alloc, using similar interface to iio_device_alloc(). Signed-off-by: Gwendal Grignou Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210309193620.2176163-2-gwendal@chromium.org Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 +- include/linux/iio/trigger.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index f8585d01fc76..f2d65e2e88b6 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -693,7 +693,7 @@ static inline void *iio_priv(const struct iio_dev *indio_dev) void iio_device_free(struct iio_dev *indio_dev); struct iio_dev *devm_iio_device_alloc(struct device *parent, int sizeof_priv); __printf(2, 3) -struct iio_trigger *devm_iio_trigger_alloc(struct device *dev, +struct iio_trigger *devm_iio_trigger_alloc(struct device *parent, const char *fmt, ...); /** * iio_buffer_enabled() - helper function to test if the buffer is enabled diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index 055890b6ffcf..096f68dd2e0c 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -161,7 +161,8 @@ void iio_trigger_poll_chained(struct iio_trigger *trig); irqreturn_t iio_trigger_generic_data_rdy_poll(int irq, void *private); -__printf(1, 2) struct iio_trigger *iio_trigger_alloc(const char *fmt, ...); +__printf(2, 3) +struct iio_trigger *iio_trigger_alloc(struct device *parent, const char *fmt, ...); void iio_trigger_free(struct iio_trigger *trig); /** -- cgit v1.2.3 From 635ef601b2387c3215252c9931786524d122c0e7 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 8 Mar 2021 11:02:18 +0100 Subject: iio: Provide iio_read_channel_processed_scale() API Since the old iio_read_channel_processed() would lose precision if we fall back to reading raw and scaling, we introduce a new API that will pass in a scale factor when reading a processed channel: iio_read_channel_processed_scale(). Refactor iio_read_channel_processed() as a special case with scale factor 1. Cc: Peter Rosin Cc: Chris Lesiak Cc: Jonathan Cameron Cc: linux-iio@vger.kernel.org Link: https://lore.kernel.org/linux-iio/20201224011607.1059534-1-linus.walleij@linaro.org/ Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20210308100219.2732156-1-linus.walleij@linaro.org Signed-off-by: Jonathan Cameron --- include/linux/iio/consumer.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 0a90ba8fa1bb..5fa5957586cf 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -241,6 +241,21 @@ int iio_read_channel_average_raw(struct iio_channel *chan, int *val); */ int iio_read_channel_processed(struct iio_channel *chan, int *val); +/** + * iio_read_channel_processed_scale() - read and scale a processed value + * @chan: The channel being queried. + * @val: Value read back. + * @scale: Scale factor to apply during the conversion + * + * Returns an error code or 0. + * + * This function will read a processed value from a channel. This will work + * like @iio_read_channel_processed() but also scale with an additional + * scale factor while attempting to minimize any precision loss. + */ +int iio_read_channel_processed_scale(struct iio_channel *chan, int *val, + unsigned int scale); + /** * iio_write_channel_attribute() - Write values to the device attribute. * @chan: The channel being queried. -- cgit v1.2.3 From 0a21526bc1d41456f1b320cce35c9c66238fb1c9 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 11 Mar 2021 11:10:42 +0200 Subject: iio: kfifo: add devm_iio_triggered_buffer_setup_ext variant This is similar to the {devm_}iio_triggered_buffer_setup_ext variants added via commit 5164c7889857 ("iio: triggered-buffer: add {devm_}iio_triggered_buffer_setup_ext variants"). These can be used to pass extra buffer attributes to the buffer object. This is a bit of temporary mechanism (hopefully) so that drivers that want to allocate a kfifo buffer with extra buffer attributes, don't need to include 'buffer_impl.h' directly. This can also become an API function (in it's own right, unfortunately), but it may be a little less bad vs drivers having to include 'buffer_impl.h'. So, far the drivers that want to pass buffer attributes, all have to do with some HW FIFO attributes, so there may be a chance of unifying them into IIO core somehow (as some standard API). But, until that happens, we just need to let them register their HW FIFO attributes directly (without having to let them include 'buffer_impl.h' directly). Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20210311091042.22417-1-aardelean@deviqon.com Signed-off-by: Jonathan Cameron --- include/linux/iio/kfifo_buf.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/kfifo_buf.h b/include/linux/iio/kfifo_buf.h index 1522896e1daf..ccd2ceae7b25 100644 --- a/include/linux/iio/kfifo_buf.h +++ b/include/linux/iio/kfifo_buf.h @@ -10,9 +10,13 @@ struct device; struct iio_buffer *iio_kfifo_allocate(void); void iio_kfifo_free(struct iio_buffer *r); -int devm_iio_kfifo_buffer_setup(struct device *dev, - struct iio_dev *indio_dev, - int mode_flags, - const struct iio_buffer_setup_ops *setup_ops); +int devm_iio_kfifo_buffer_setup_ext(struct device *dev, + struct iio_dev *indio_dev, + int mode_flags, + const struct iio_buffer_setup_ops *setup_ops, + const struct attribute **buffer_attrs); + +#define devm_iio_kfifo_buffer_setup(dev, indio_dev, mode_flags, setup_ops) \ + devm_iio_kfifo_buffer_setup_ext((dev), (indio_dev), (mode_flags), (setup_ops), NULL) #endif -- cgit v1.2.3 From 80346b2b55fcbb042acd0b90120004da8738101f Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Thu, 18 Mar 2021 11:48:57 -0700 Subject: iio: cros: unify hw fifo attributes without API changes Commit 2e2366c2d141 ("iio: cros_ec: unify hw fifo attributes into the core file") should be reverted as it set buffer extended attributes at the wrong place. However, to revert it will requires to revert more commits: commit 165aea80e2e2 ("iio: cros_ec: use devm_iio_triggered_buffer_setup_ext()") commit 21232b4456ba ("iio: buffer: remove iio_buffer_set_attrs() helper")). and we would still have conflict with more recent development. commit ee708e6baacd ("iio: buffer: introduce support for attaching more IIO buffers") Instead, this commit reverts the first 2 commits without re-adding iio_buffer_set_attrs() and set the buffer extended attributes at the right place: 1. Instead of adding has_fw_fifo, deduct it from the configuration: - EC must support FIFO (EC_FEATURE_MOTION_SENSE_FIFO) set. - sensors send data a regular interval (accelerometer, gyro, magnetomer, barometer, light sensor). - "Legacy accelerometer" is only present on EC without FIFO, so we don't need to set buffer attributes. 2. devm_iio_triggered_buffer_setup_ext() does not need to be called when EC does not support FIFO, as there is no FIFO to manage. 3. Use devm_iio_triggered_buffer_setup_ext() when EC has a FIFO to specify the buffer extended attributes. Fixes: 2e2366c2d141 ("iio: cros_ec: unify hw fifo attributes into the core file") Fixes: 165aea80e2e2 ("iio: cros_ec: use devm_iio_triggered_buffer_setup_ext()") Signed-off-by: Gwendal Grignou Reviewed-by: Alexandru Ardelean Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210318184857.2679181-1-gwendal@chromium.org Signed-off-by: Jonathan Cameron --- include/linux/iio/common/cros_ec_sensors_core.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h index c9b80be82440..7ce8a8adad58 100644 --- a/include/linux/iio/common/cros_ec_sensors_core.h +++ b/include/linux/iio/common/cros_ec_sensors_core.h @@ -96,8 +96,7 @@ struct platform_device; int cros_ec_sensors_core_init(struct platform_device *pdev, struct iio_dev *indio_dev, bool physical_device, cros_ec_sensors_capture_t trigger_capture, - cros_ec_sensorhub_push_data_cb_t push_data, - bool has_hw_fifo); + cros_ec_sensorhub_push_data_cb_t push_data); irqreturn_t cros_ec_sensors_capture(int irq, void *p); int cros_ec_sensors_push_data(struct iio_dev *indio_dev, -- cgit v1.2.3 From 5d9034938720a15fa0f62db3e195c0c473c72c1b Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Thu, 18 Mar 2021 21:22:22 +0100 Subject: bpf: Fix typo 'accesible' into 'accessible' Trivial fix. Signed-off-by: Ricardo Ribalda Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210318202223.164873-8-ribalda@chromium.org --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 39dce9d3c3a5..24678d6ecbcf 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -56,7 +56,7 @@ struct bpf_iter_seq_info { u32 seq_priv_size; }; -/* map is generic key/value storage optionally accesible by eBPF programs */ +/* map is generic key/value storage optionally accessible by eBPF programs */ struct bpf_map_ops { /* funcs callable from userspace (via syscall) */ int (*map_alloc_check)(union bpf_attr *attr); -- cgit v1.2.3 From 8532f613bc78b6e0e32b486e720848d3f5569287 Mon Sep 17 00:00:00 2001 From: Ong Boon Leong Date: Fri, 26 Mar 2021 01:39:14 +0800 Subject: net: stmmac: introduce MSI Interrupt routines for mac, safety, RX & TX Now we introduce MSI interrupt service routines and hook these routines up if stmmac_open() sees valid irq line being requested:- stmmac_mac_interrupt() :- MAC (dev->irq), WOL (wol_irq), LPI (lpi_irq) stmmac_safety_interrupt() :- Safety Feat Correctible Error (sfty_ce_irq) & Uncorrectible Error (sfty_ue_irq) stmmac_msi_intr_rx() :- For all RX MSI irq (rx_irq) stmmac_msi_intr_tx() :- For all TX MSI irq (tx_irq) Each of IRQs will have its unique name so that we can differentiate them easily under /proc/interrupts. Signed-off-by: Ong Boon Leong Signed-off-by: Voon Weifeng Signed-off-by: David S. Miller --- include/linux/stmmac.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index febdb43d27e5..afc12b9385db 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -237,5 +237,13 @@ struct plat_stmmacenet_data { struct pci_dev *pdev; bool has_crossts; int int_snapshot_num; + bool multi_msi_en; + int msi_mac_vec; + int msi_wol_vec; + int msi_lpi_vec; + int msi_sfty_ce_vec; + int msi_sfty_ue_vec; + int msi_rx_base_vec; + int msi_tx_base_vec; }; #endif -- cgit v1.2.3 From 6ccf12ae111e49324b439410066e8cc359aeee6d Mon Sep 17 00:00:00 2001 From: "Wong, Vee Khee" Date: Fri, 26 Mar 2021 01:39:16 +0800 Subject: net: stmmac: use interrupt mode INTM=1 for multi-MSI For interrupt mode INTM=0, TX/RX transfer complete will trigger signal not only on sbd_perch_[tx|rx]_intr_o (Transmit/Receive Per Channel) but also on the sbd_intr_o (Common). As for multi-MSI implementation, setting interrupt mode INTM=1 is more efficient as each TX intr and RX intr (TI/RI) will be handled by TX/RX ISR without the need of calling the common MAC ISR. Updated the TX/RX NORMAL interrupts status checking process as the NIS status bit is not asserted for any RI/TI events for INTM=1. Signed-off-by: Wong, Vee Khee Co-developed-by: Voon Weifeng Signed-off-by: Voon Weifeng Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index afc12b9385db..e338ef7abc00 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -96,6 +96,7 @@ struct stmmac_dma_cfg { int mixed_burst; bool aal; bool eame; + bool multi_msi_en; }; #define AXI_BLEN 7 -- cgit v1.2.3 From cb9444130662c6c13022579c861098f212db2562 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 Mar 2021 11:08:13 -0700 Subject: sysctl: add proc_dou8vec_minmax() Networking has many sysctls that could fit in one u8. This patch adds proc_dou8vec_minmax() for this purpose. Note that the .extra1 and .extra2 fields are pointing to integers, because it makes conversions easier. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/sysctl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 51298a4f4623..d99ca99837de 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -53,6 +53,8 @@ int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_douintvec_minmax(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); +int proc_dou8vec_minmax(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); int proc_dointvec_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); -- cgit v1.2.3 From b910eaaaa4b89976ef02e5d6448f3f73dc671d91 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 22 Mar 2021 22:51:46 -0700 Subject: bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper Jiri Olsa reported a bug ([1]) in kernel where cgroup local storage pointer may be NULL in bpf_get_local_storage() helper. There are two issues uncovered by this bug: (1). kprobe or tracepoint prog incorrectly sets cgroup local storage before prog run, (2). due to change from preempt_disable to migrate_disable, preemption is possible and percpu storage might be overwritten by other tasks. This issue (1) is fixed in [2]. This patch tried to address issue (2). The following shows how things can go wrong: task 1: bpf_cgroup_storage_set() for percpu local storage preemption happens task 2: bpf_cgroup_storage_set() for percpu local storage preemption happens task 1: run bpf program task 1 will effectively use the percpu local storage setting by task 2 which will be either NULL or incorrect ones. Instead of just one common local storage per cpu, this patch fixed the issue by permitting 8 local storages per cpu and each local storage is identified by a task_struct pointer. This way, we allow at most 8 nested preemption between bpf_cgroup_storage_set() and bpf_cgroup_storage_unset(). The percpu local storage slot is released (calling bpf_cgroup_storage_unset()) by the same task after bpf program finished running. bpf_test_run() is also fixed to use the new bpf_cgroup_storage_set() interface. The patch is tested on top of [2] with reproducer in [1]. Without this patch, kernel will emit error in 2-3 minutes. With this patch, after one hour, still no error. [1] https://lore.kernel.org/bpf/CAKH8qBuXCfUz=w8L+Fj74OaUpbosO29niYwTki7e3Ag044_aww@mail.gmail.com/T [2] https://lore.kernel.org/bpf/20210309185028.3763817-1-yhs@fb.com Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Roman Gushchin Link: https://lore.kernel.org/bpf/20210323055146.3334476-1-yhs@fb.com --- include/linux/bpf-cgroup.h | 57 +++++++++++++++++++++++++++++++++++++++------- include/linux/bpf.h | 22 ++++++++++++++---- 2 files changed, 67 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index c42e02b4d84b..6a29fe11485d 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -20,14 +20,25 @@ struct bpf_sock_ops_kern; struct bpf_cgroup_storage; struct ctl_table; struct ctl_table_header; +struct task_struct; #ifdef CONFIG_CGROUP_BPF extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE]; #define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type]) -DECLARE_PER_CPU(struct bpf_cgroup_storage*, - bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); +#define BPF_CGROUP_STORAGE_NEST_MAX 8 + +struct bpf_cgroup_storage_info { + struct task_struct *task; + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; +}; + +/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks + * to use bpf cgroup storage simultaneously. + */ +DECLARE_PER_CPU(struct bpf_cgroup_storage_info, + bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); #define for_each_cgroup_storage_type(stype) \ for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) @@ -161,13 +172,42 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type( return BPF_CGROUP_STORAGE_SHARED; } -static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage - *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) +static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage + *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { enum bpf_cgroup_storage_type stype; + int i, err = 0; + + preempt_disable(); + for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { + if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL)) + continue; + + this_cpu_write(bpf_cgroup_storage_info[i].task, current); + for_each_cgroup_storage_type(stype) + this_cpu_write(bpf_cgroup_storage_info[i].storage[stype], + storage[stype]); + goto out; + } + err = -EBUSY; + WARN_ON_ONCE(1); + +out: + preempt_enable(); + return err; +} + +static inline void bpf_cgroup_storage_unset(void) +{ + int i; + + for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { + if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) + continue; - for_each_cgroup_storage_type(stype) - this_cpu_write(bpf_cgroup_storage[stype], storage[stype]); + this_cpu_write(bpf_cgroup_storage_info[i].task, NULL); + return; + } } struct bpf_cgroup_storage * @@ -448,8 +488,9 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, return -EINVAL; } -static inline void bpf_cgroup_storage_set( - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {} +static inline int bpf_cgroup_storage_set( + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; } +static inline void bpf_cgroup_storage_unset(void) {} static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map) { return 0; } static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 24678d6ecbcf..5a0801b420ca 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1106,6 +1106,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, /* BPF program asks to set CN on the packet. */ #define BPF_RET_SET_CN (1 << 0) +/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY, + * if bpf_cgroup_storage_set() failed, the rest of programs + * will not execute. This should be a really rare scenario + * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of + * preemptions all between bpf_cgroup_storage_set() and + * bpf_cgroup_storage_unset() on the same cpu. + */ #define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \ ({ \ struct bpf_prog_array_item *_item; \ @@ -1118,10 +1125,12 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, _array = rcu_dereference(array); \ _item = &_array->items[0]; \ while ((_prog = READ_ONCE(_item->prog))) { \ - bpf_cgroup_storage_set(_item->cgroup_storage); \ + if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ + break; \ func_ret = func(_prog, ctx); \ _ret &= (func_ret & 1); \ *(ret_flags) |= (func_ret >> 1); \ + bpf_cgroup_storage_unset(); \ _item++; \ } \ rcu_read_unlock(); \ @@ -1142,9 +1151,14 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, goto _out; \ _item = &_array->items[0]; \ while ((_prog = READ_ONCE(_item->prog))) { \ - if (set_cg_storage) \ - bpf_cgroup_storage_set(_item->cgroup_storage); \ - _ret &= func(_prog, ctx); \ + if (!set_cg_storage) { \ + _ret &= func(_prog, ctx); \ + } else { \ + if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ + break; \ + _ret &= func(_prog, ctx); \ + bpf_cgroup_storage_unset(); \ + } \ _item++; \ } \ _out: \ -- cgit v1.2.3 From 7547738d28dd572d40e0e1c1f854c80e3cb41bec Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Tue, 16 Mar 2021 17:07:31 -0400 Subject: oid_registry: Add OIDs for ECDSA with SHA224/256/384/512 Add OIDs for ECDSA with SHA224/256/384/512. Signed-off-by: Stefan Berger Acked-by: Jarkko Sakkinen Signed-off-by: Herbert Xu --- include/linux/oid_registry.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 4462ed2c18cd..b504e2f36b25 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -19,8 +19,12 @@ enum OID { OID_id_dsa_with_sha1, /* 1.2.840.10030.4.3 */ OID_id_dsa, /* 1.2.840.10040.4.1 */ - OID_id_ecdsa_with_sha1, /* 1.2.840.10045.4.1 */ OID_id_ecPublicKey, /* 1.2.840.10045.2.1 */ + OID_id_ecdsa_with_sha1, /* 1.2.840.10045.4.1 */ + OID_id_ecdsa_with_sha224, /* 1.2.840.10045.4.3.1 */ + OID_id_ecdsa_with_sha256, /* 1.2.840.10045.4.3.2 */ + OID_id_ecdsa_with_sha384, /* 1.2.840.10045.4.3.3 */ + OID_id_ecdsa_with_sha512, /* 1.2.840.10045.4.3.4 */ /* PKCS#1 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-1(1)} */ OID_rsaEncryption, /* 1.2.840.113549.1.1.1 */ -- cgit v1.2.3 From d1a303e8616c5ba1260722bb9068bbc0d1704847 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Tue, 16 Mar 2021 17:07:36 -0400 Subject: x509: Detect sm2 keys by their parameters OID Detect whether a key is an sm2 type of key by its OID in the parameters array rather than assuming that everything under OID_id_ecPublicKey is sm2, which is not the case. Cc: David Howells Cc: keyrings@vger.kernel.org Signed-off-by: Stefan Berger Reviewed-by: Tianjia Zhang Tested-by: Tianjia Zhang Signed-off-by: Herbert Xu --- include/linux/oid_registry.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index b504e2f36b25..f32d91895e4d 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -121,6 +121,7 @@ enum OID { }; extern enum OID look_up_OID(const void *data, size_t datasize); +extern int parse_OID(const void *data, size_t datasize, enum OID *oid); extern int sprint_oid(const void *, size_t, char *, size_t); extern int sprint_OID(enum OID, char *, size_t); -- cgit v1.2.3 From 299f561a66939debba70e6d7c67aa01ed32613d9 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Tue, 16 Mar 2021 17:07:37 -0400 Subject: x509: Add support for parsing x509 certs with ECDSA keys Add support for parsing of x509 certificates that contain ECDSA keys, such as NIST P256, that have been signed by a CA using any of the current SHA hash algorithms. Cc: David Howells Cc: keyrings@vger.kernel.org Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu --- include/linux/oid_registry.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index f32d91895e4d..3583908cf1ca 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -20,6 +20,8 @@ enum OID { OID_id_dsa_with_sha1, /* 1.2.840.10030.4.3 */ OID_id_dsa, /* 1.2.840.10040.4.1 */ OID_id_ecPublicKey, /* 1.2.840.10045.2.1 */ + OID_id_prime192v1, /* 1.2.840.10045.3.1.1 */ + OID_id_prime256v1, /* 1.2.840.10045.3.1.7 */ OID_id_ecdsa_with_sha1, /* 1.2.840.10045.4.1 */ OID_id_ecdsa_with_sha224, /* 1.2.840.10045.4.3.1 */ OID_id_ecdsa_with_sha256, /* 1.2.840.10045.4.3.2 */ -- cgit v1.2.3 From 2a8e615436de4cd59a7b0af43590ede899906bdf Mon Sep 17 00:00:00 2001 From: Saulo Alessandre Date: Tue, 16 Mar 2021 17:07:39 -0400 Subject: x509: Add OID for NIST P384 and extend parser for it Prepare the x509 parser to accept NIST P384 certificates and add the OID for ansip384r1, which is the identifier for NIST P384. Summary of changes: * crypto/asymmetric_keys/x509_cert_parser.c - prepare x509 parser to load NIST P384 * include/linux/oid_registry.h - add OID_ansip384r1 Signed-off-by: Saulo Alessandre Tested-by: Stefan Berger Signed-off-by: Herbert Xu --- include/linux/oid_registry.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 3583908cf1ca..cc64d9419746 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -64,6 +64,7 @@ enum OID { OID_certAuthInfoAccess, /* 1.3.6.1.5.5.7.1.1 */ OID_sha1, /* 1.3.14.3.2.26 */ + OID_id_ansip384r1, /* 1.3.132.0.34 */ OID_sha256, /* 2.16.840.1.101.3.4.2.1 */ OID_sha384, /* 2.16.840.1.101.3.4.2.2 */ OID_sha512, /* 2.16.840.1.101.3.4.2.3 */ -- cgit v1.2.3 From 67196fea0fcef92b25608882f62f3985bc59f1fe Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 9 Mar 2021 11:37:31 +0200 Subject: irqdomain: Introduce irq_domain_create_simple() API Linus Walleij pointed out that ird_domain_add_simple() gained additional functionality and can't be anymore replaced with a simple conditional. In preparation to upgrade GPIO library to use fwnode, introduce irq_domain_create_simple() API which is functional equivalent to the existing irq_domain_add_simple(), but takes a pointer to the struct fwnode_handle as a parameter. While at it, amend documentation to mention irq_domain_create_*() functions where it makes sense. Signed-off-by: Andy Shevchenko Acked-by: Marc Zyngier Signed-off-by: Bartosz Golaszewski --- include/linux/irqdomain.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 33cacc8af26d..1ad8d5328715 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -256,11 +256,11 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, irq_hw_number_t hwirq_max, int direct_max, const struct irq_domain_ops *ops, void *host_data); -struct irq_domain *irq_domain_add_simple(struct device_node *of_node, - unsigned int size, - unsigned int first_irq, - const struct irq_domain_ops *ops, - void *host_data); +struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, + unsigned int size, + unsigned int first_irq, + const struct irq_domain_ops *ops, + void *host_data); struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, unsigned int size, unsigned int first_irq, @@ -325,6 +325,15 @@ static inline struct irq_domain *irq_find_host(struct device_node *node) return d; } +static inline struct irq_domain *irq_domain_add_simple(struct device_node *of_node, + unsigned int size, + unsigned int first_irq, + const struct irq_domain_ops *ops, + void *host_data) +{ + return irq_domain_create_simple(of_node_to_fwnode(of_node), size, first_irq, ops, host_data); +} + /** * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain. * @of_node: pointer to interrupt controller's device tree node. -- cgit v1.2.3 From 2d93018fe67d42c44d65a898da2a6a5a0209b9ee Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 23 Mar 2021 15:19:05 -0700 Subject: gpiolib: some edits of kernel docs for clarity Fix a few typos and some punctuation. Also, change CONFIG_OF to CONFIG_OF_GPIO in one comment. Signed-off-by: Randy Dunlap Cc: Linus Walleij Cc: Bartosz Golaszewski Cc: linux-gpio@vger.kernel.org Reviewed-by: Linus Walleij [Bartosz: tweaked the commit message] Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 286de0520574..63283de9daeb 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -227,7 +227,7 @@ struct gpio_irq_chip { /** * @valid_mask: * - * If not %NULL holds bitmask of GPIOs which are valid to be included + * If not %NULL, holds bitmask of GPIOs which are valid to be included * in IRQ domain of the chip. */ unsigned long *valid_mask; @@ -346,7 +346,7 @@ struct gpio_irq_chip { * output. * * A gpio_chip can help platforms abstract various sources of GPIOs so - * they can all be accessed through a common programing interface. + * they can all be accessed through a common programming interface. * Example sources would be SOC controllers, FPGAs, multifunction * chips, dedicated GPIO expanders, and so on. * @@ -435,15 +435,15 @@ struct gpio_chip { /** * @valid_mask: * - * If not %NULL holds bitmask of GPIOs which are valid to be used + * If not %NULL, holds bitmask of GPIOs which are valid to be used * from the chip. */ unsigned long *valid_mask; #if defined(CONFIG_OF_GPIO) /* - * If CONFIG_OF is enabled, then all GPIO controllers described in the - * device tree automatically may have an OF translation + * If CONFIG_OF_GPIO is enabled, then all GPIO controllers described in + * the device tree automatically may have an OF translation */ /** @@ -508,7 +508,7 @@ extern int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, * for GPIOs will fail rudely. * * gpiochip_add_data() must only be called after gpiolib initialization, - * ie after core_initcall(). + * i.e. after core_initcall(). * * If gc->base is negative, this requests dynamic assignment of * a range of valid GPIOs. -- cgit v1.2.3 From 29d96eb261345c8d888e248ae79484e681be2faa Mon Sep 17 00:00:00 2001 From: Roja Rani Yarubandi Date: Wed, 24 Mar 2021 15:48:35 +0530 Subject: soc: qcom-geni-se: Cleanup the code to remove proxy votes This reverts commit 048eb908a1f2 ("soc: qcom-geni-se: Add interconnect support to fix earlycon crash") ICC core and platforms drivers supports sync_state feature, which ensures that the default ICC BW votes from the bootloader is not removed until all it's consumers are probes. The proxy votes were needed in case other QUP child drivers I2C, SPI probes before UART, they can turn off the QUP-CORE clock which is shared resources for all QUP driver, this causes unclocked access to HW from earlycon. Given above support from ICC there is no longer need to maintain proxy votes on QUP-CORE ICC node from QUP wrapper driver for early console usecase, the default votes won't be removed until real console is probed. Cc: stable@vger.kernel.org Fixes: 266cd33b5913 ("interconnect: qcom: Ensure that the floor bandwidth value is enforced") Fixes: 7d3b0b0d8184 ("interconnect: qcom: Use icc_sync_state") Signed-off-by: Roja Rani Yarubandi Signed-off-by: Akash Asthana Reviewed-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/20210324101836.25272-2-rojay@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- include/linux/qcom-geni-se.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h index ec2ad4b0fe14..c4fdb4463f7d 100644 --- a/include/linux/qcom-geni-se.h +++ b/include/linux/qcom-geni-se.h @@ -460,7 +460,5 @@ void geni_icc_set_tag(struct geni_se *se, u32 tag); int geni_icc_enable(struct geni_se *se); int geni_icc_disable(struct geni_se *se); - -void geni_remove_earlycon_icc_vote(void); #endif #endif -- cgit v1.2.3 From 861de02e5f3f2a104eecc5af1d248cb7bf8c5f75 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Mar 2021 11:59:00 +0100 Subject: bpf: Take module reference for trampoline in module Currently module can be unloaded even if there's a trampoline register in it. It's easily reproduced by running in parallel: # while :; do ./test_progs -t module_attach; done # while :; do rmmod bpf_testmod; sleep 0.5; done Taking the module reference in case the trampoline's ip is within the module code. Releasing it when the trampoline's ip is unregistered. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210326105900.151466-1-jolsa@kernel.org --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3625f019767d..fdac0534ce79 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -40,6 +40,7 @@ struct bpf_local_storage; struct bpf_local_storage_map; struct kobject; struct mem_cgroup; +struct module; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -623,6 +624,7 @@ struct bpf_trampoline { /* Executable image of trampoline */ struct bpf_tramp_image *cur_image; u64 selector; + struct module *mod; }; struct bpf_attach_target_info { -- cgit v1.2.3 From e16301fbe1837c9594f9c1957c28fd1bb18fbd15 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 24 Mar 2021 18:51:30 -0700 Subject: bpf: Simplify freeing logic in linfo and jited_linfo This patch simplifies the linfo freeing logic by combining "bpf_prog_free_jited_linfo()" and "bpf_prog_free_unused_jited_linfo()" into the new "bpf_prog_jit_attempt_done()". It is a prep work for the kernel function call support. In a later patch, freeing the kernel function call descriptors will also be done in the "bpf_prog_jit_attempt_done()". "bpf_prog_free_linfo()" is removed since it is only called by "__bpf_prog_put_noref()". The kvfree() are directly called instead. It also takes this chance to s/kcalloc/kvcalloc/ for the jited_linfo allocation. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210325015130.1544323-1-kafai@fb.com --- include/linux/filter.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index b2b85b2cad8e..0d9c710eb050 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -877,8 +877,7 @@ void bpf_prog_free_linfo(struct bpf_prog *prog); void bpf_prog_fill_jited_linfo(struct bpf_prog *prog, const u32 *insn_to_jit_off); int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog); -void bpf_prog_free_jited_linfo(struct bpf_prog *prog); -void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog); +void bpf_prog_jit_attempt_done(struct bpf_prog *prog); struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags); -- cgit v1.2.3 From 34747c4120418143097d4343312a0ca96c986d86 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 24 Mar 2021 18:51:36 -0700 Subject: bpf: Refactor btf_check_func_arg_match This patch moved the subprog specific logic from btf_check_func_arg_match() to the new btf_check_subprog_arg_match(). The core logic is left in btf_check_func_arg_match() which will be reused later to check the kernel function call. The "if (!btf_type_is_ptr(t))" is checked first to improve the indentation which will be useful for a later patch. Some of the "btf_kind_str[]" usages is replaced with the shortcut "btf_type_str(t)". Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210325015136.1544504-1-kafai@fb.com --- include/linux/bpf.h | 4 ++-- include/linux/btf.h | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5a0801b420ca..eaae618a90b5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1545,8 +1545,8 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf_func_model *m); struct bpf_reg_state; -int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs); +int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, + struct bpf_reg_state *regs); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *reg); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, diff --git a/include/linux/btf.h b/include/linux/btf.h index 9c1b52738bbe..8a05687a4ee2 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -141,6 +141,11 @@ static inline bool btf_type_is_enum(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; } +static inline bool btf_type_is_scalar(const struct btf_type *t) +{ + return btf_type_is_int(t) || btf_type_is_enum(t); +} + static inline bool btf_type_is_typedef(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF; -- cgit v1.2.3 From e6ac2450d6dee3121cd8bbf2907b78a68a8a353d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 24 Mar 2021 18:51:42 -0700 Subject: bpf: Support bpf program calling kernel function This patch adds support to BPF verifier to allow bpf program calling kernel function directly. The use case included in this set is to allow bpf-tcp-cc to directly call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those functions have already been used by some kernel tcp-cc implementations. This set will also allow the bpf-tcp-cc program to directly call the kernel tcp-cc implementation, For example, a bpf_dctcp may only want to implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly from the kernel tcp_dctcp.c instead of reimplementing (or copy-and-pasting) them. The tcp-cc kernel functions mentioned above will be white listed for the struct_ops bpf-tcp-cc programs to use in a later patch. The white listed functions are not bounded to a fixed ABI contract. Those functions have already been used by the existing kernel tcp-cc. If any of them has changed, both in-tree and out-of-tree kernel tcp-cc implementations have to be changed. The same goes for the struct_ops bpf-tcp-cc programs which have to be adjusted accordingly. This patch is to make the required changes in the bpf verifier. First change is in btf.c, it adds a case in "btf_check_func_arg_match()". When the passed in "btf->kernel_btf == true", it means matching the verifier regs' states with a kernel function. This will handle the PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET, and PTR_TO_TCP_SOCK to its kernel's btf_id. In the later libbpf patch, the insn calling a kernel function will look like: insn->code == (BPF_JMP | BPF_CALL) insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */ insn->imm == func_btf_id /* btf_id of the running kernel */ [ For the future calling function-in-kernel-module support, an array of module btf_fds can be passed at the load time and insn->off can be used to index into this array. ] At the early stage of verifier, the verifier will collect all kernel function calls into "struct bpf_kfunc_desc". Those descriptors are stored in "prog->aux->kfunc_tab" and will be available to the JIT. Since this "add" operation is similar to the current "add_subprog()" and looking for the same insn->code, they are done together in the new "add_subprog_and_kfunc()". In the "do_check()" stage, the new "check_kfunc_call()" is added to verify the kernel function call instruction: 1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE. A new bpf_verifier_ops "check_kfunc_call" is added to do that. The bpf-tcp-cc struct_ops program will implement this function in a later patch. 2. Call "btf_check_kfunc_args_match()" to ensure the regs can be used as the args of a kernel function. 3. Mark the regs' type, subreg_def, and zext_dst. At the later do_misc_fixups() stage, the new fixup_kfunc_call() will replace the insn->imm with the function address (relative to __bpf_call_base). If needed, the jit can find the btf_func_model by calling the new bpf_jit_find_kfunc_model(prog, insn). With the imm set to the function address, "bpftool prog dump xlated" will be able to display the kernel function calls the same way as it displays other bpf helper calls. gpl_compatible program is required to call kernel function. This feature currently requires JIT. The verifier selftests are adjusted because of the changes in the verbose log in add_subprog_and_kfunc(). Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com --- include/linux/bpf.h | 24 ++++++++++++++++++++++++ include/linux/btf.h | 1 + include/linux/filter.h | 1 + 3 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index eaae618a90b5..b5b7967e3ff3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -427,6 +427,7 @@ enum bpf_reg_type { PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ PTR_TO_FUNC, /* reg points to a bpf program function */ PTR_TO_MAP_KEY, /* reg points to a map element key */ + __BPF_REG_TYPE_MAX, }; /* The information passed from prog-specific *_is_valid_access @@ -480,6 +481,7 @@ struct bpf_verifier_ops { const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); + bool (*check_kfunc_call)(u32 kfunc_btf_id); }; struct bpf_prog_offload_ops { @@ -796,6 +798,8 @@ struct btf_mod_pair { struct module *module; }; +struct bpf_kfunc_desc_tab; + struct bpf_prog_aux { atomic64_t refcnt; u32 used_map_cnt; @@ -832,6 +836,7 @@ struct bpf_prog_aux { struct bpf_prog **func; void *jit_data; /* JIT specific data. arch dependent */ struct bpf_jit_poke_descriptor *poke_tab; + struct bpf_kfunc_desc_tab *kfunc_tab; u32 size_poke_tab; struct bpf_ksym ksym; const struct bpf_prog_ops *ops; @@ -1547,6 +1552,9 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, struct bpf_reg_state; int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); +int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, + const struct btf *btf, u32 func_id, + struct bpf_reg_state *regs); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *reg); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, @@ -1557,6 +1565,10 @@ struct bpf_link *bpf_link_by_id(u32 id); const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); void bpf_task_storage_free(struct task_struct *task); +bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); +const struct btf_func_model * +bpf_jit_find_kfunc_model(const struct bpf_prog *prog, + const struct bpf_insn *insn); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -1737,6 +1749,18 @@ bpf_base_func_proto(enum bpf_func_id func_id) static inline void bpf_task_storage_free(struct task_struct *task) { } + +static inline bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) +{ + return false; +} + +static inline const struct btf_func_model * +bpf_jit_find_kfunc_model(const struct bpf_prog *prog, + const struct bpf_insn *insn) +{ + return NULL; +} #endif /* CONFIG_BPF_SYSCALL */ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, diff --git a/include/linux/btf.h b/include/linux/btf.h index 8a05687a4ee2..3bac66e0183a 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -110,6 +110,7 @@ const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf, const struct btf_type * btf_resolve_size(const struct btf *btf, const struct btf_type *type, u32 *type_size); +const char *btf_type_str(const struct btf_type *t); #define for_each_member(i, struct_type, member) \ for (i = 0, member = btf_type_member(struct_type); \ diff --git a/include/linux/filter.h b/include/linux/filter.h index 0d9c710eb050..eecfd82db648 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -918,6 +918,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); +bool bpf_jit_supports_kfunc_call(void); bool bpf_helper_changes_pkt_data(void *func); static inline bool bpf_dump_raw_ok(const struct cred *cred) -- cgit v1.2.3 From 7bd1590d4eba1583f6ee85e8cfe556505f761e19 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 24 Mar 2021 18:52:52 -0700 Subject: bpf: selftests: Add kfunc_call test This patch adds a few kernel function bpf_kfunc_call_test*() for the selftest's test_run purpose. They will be allowed for tc_cls prog. The selftest calling the kernel function bpf_kfunc_call_test*() is also added in this patch. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210325015252.1551395-1-kafai@fb.com --- include/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b5b7967e3ff3..9fdd839b418c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1532,6 +1532,7 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +bool bpf_prog_test_check_kfunc_call(u32 kfunc_id); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); @@ -1731,6 +1732,11 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, return -ENOTSUPP; } +static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id) +{ + return false; +} + static inline void bpf_map_put(struct bpf_map *map) { } -- cgit v1.2.3 From 8c8239c2c1fb82f171cb22a707f3bb88a2f22109 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 25 Mar 2021 10:47:12 -0600 Subject: of: Add missing 'Return' section in kerneldoc comments Many of the DT kerneldoc comments are lacking a 'Return' section. Let's add the section in cases we have a description of return values. There's still some cases where the return values are not documented. Cc: Frank Rowand Cc: Mauro Carvalho Chehab Signed-off-by: Rob Herring Reviewed-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20210325164713.1296407-8-robh@kernel.org --- include/linux/of.h | 63 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index e9209ef44cc0..ef6b161d1f91 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -424,12 +424,14 @@ extern int of_detach_node(struct device_node *); * @sz: number of array elements to read * * Search for a property in a device node and read 8-bit value(s) from - * it. Returns 0 on success, -EINVAL if the property does not exist, - * -ENODATA if property does not have a value, and -EOVERFLOW if the - * property data isn't large enough. + * it. * * dts entry of array should be like: - * property = /bits/ 8 <0x50 0x60 0x70>; + * ``property = /bits/ 8 <0x50 0x60 0x70>;`` + * + * Return: 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. * * The out_values is modified only if a valid u8 value can be decoded. */ @@ -454,12 +456,14 @@ static inline int of_property_read_u8_array(const struct device_node *np, * @sz: number of array elements to read * * Search for a property in a device node and read 16-bit value(s) from - * it. Returns 0 on success, -EINVAL if the property does not exist, - * -ENODATA if property does not have a value, and -EOVERFLOW if the - * property data isn't large enough. + * it. * * dts entry of array should be like: - * property = /bits/ 16 <0x5000 0x6000 0x7000>; + * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;`` + * + * Return: 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. * * The out_values is modified only if a valid u16 value can be decoded. */ @@ -485,7 +489,9 @@ static inline int of_property_read_u16_array(const struct device_node *np, * @sz: number of array elements to read * * Search for a property in a device node and read 32-bit value(s) from - * it. Returns 0 on success, -EINVAL if the property does not exist, + * it. + * + * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * @@ -513,7 +519,9 @@ static inline int of_property_read_u32_array(const struct device_node *np, * @sz: number of array elements to read * * Search for a property in a device node and read 64-bit value(s) from - * it. Returns 0 on success, -EINVAL if the property does not exist, + * it. + * + * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * @@ -1070,7 +1078,9 @@ static inline bool of_node_is_type(const struct device_node *np, const char *typ * @propname: name of the property to be searched. * * Search for a property in a device node and count the number of u8 elements - * in it. Returns number of elements on sucess, -EINVAL if the property does + * in it. + * + * Return: The number of elements on sucess, -EINVAL if the property does * not exist or its length does not match a multiple of u8 and -ENODATA if the * property does not have a value. */ @@ -1087,7 +1097,9 @@ static inline int of_property_count_u8_elems(const struct device_node *np, * @propname: name of the property to be searched. * * Search for a property in a device node and count the number of u16 elements - * in it. Returns number of elements on sucess, -EINVAL if the property does + * in it. + * + * Return: The number of elements on sucess, -EINVAL if the property does * not exist or its length does not match a multiple of u16 and -ENODATA if the * property does not have a value. */ @@ -1104,7 +1116,9 @@ static inline int of_property_count_u16_elems(const struct device_node *np, * @propname: name of the property to be searched. * * Search for a property in a device node and count the number of u32 elements - * in it. Returns number of elements on sucess, -EINVAL if the property does + * in it. + * + * Return: The number of elements on sucess, -EINVAL if the property does * not exist or its length does not match a multiple of u32 and -ENODATA if the * property does not have a value. */ @@ -1121,7 +1135,9 @@ static inline int of_property_count_u32_elems(const struct device_node *np, * @propname: name of the property to be searched. * * Search for a property in a device node and count the number of u64 elements - * in it. Returns number of elements on sucess, -EINVAL if the property does + * in it. + * + * Return: The number of elements on sucess, -EINVAL if the property does * not exist or its length does not match a multiple of u64 and -ENODATA if the * property does not have a value. */ @@ -1142,7 +1158,7 @@ static inline int of_property_count_u64_elems(const struct device_node *np, * Search for a property in a device tree node and retrieve a list of * terminated string values (pointer to data, not a copy) in that property. * - * If @out_strs is NULL, the number of strings in the property is returned. + * Return: If @out_strs is NULL, the number of strings in the property is returned. */ static inline int of_property_read_string_array(const struct device_node *np, const char *propname, const char **out_strs, @@ -1158,10 +1174,11 @@ static inline int of_property_read_string_array(const struct device_node *np, * @propname: name of the property to be searched. * * Search for a property in a device tree node and retrieve the number of null - * terminated string contain in it. Returns the number of strings on - * success, -EINVAL if the property does not exist, -ENODATA if property - * does not have a value, and -EILSEQ if the string is not null-terminated - * within the length of the property data. + * terminated string contain in it. + * + * Return: The number of strings on success, -EINVAL if the property does not + * exist, -ENODATA if property does not have a value, and -EILSEQ if the string + * is not null-terminated within the length of the property data. */ static inline int of_property_count_strings(const struct device_node *np, const char *propname) @@ -1181,7 +1198,8 @@ static inline int of_property_count_strings(const struct device_node *np, * Search for a property in a device tree node and retrieve a null * terminated string value (pointer to data, not a copy) in the list of strings * contained in that property. - * Returns 0 on success, -EINVAL if the property does not exist, -ENODATA if + * + * Return: 0 on success, -EINVAL if the property does not exist, -ENODATA if * property does not have a value, and -EILSEQ if the string is not * null-terminated within the length of the property data. * @@ -1201,7 +1219,8 @@ static inline int of_property_read_string_index(const struct device_node *np, * @propname: name of the property to be searched. * * Search for a property in a device node. - * Returns true if the property exists false otherwise. + * + * Return: true if the property exists false otherwise. */ static inline bool of_property_read_bool(const struct device_node *np, const char *propname) @@ -1447,7 +1466,7 @@ static inline int of_reconfig_get_state_change(unsigned long action, * of_device_is_system_power_controller - Tells if system-power-controller is found for device_node * @np: Pointer to the given device_node * - * return true if present false otherwise + * Return: true if present false otherwise */ static inline bool of_device_is_system_power_controller(const struct device_node *np) { -- cgit v1.2.3 From c21161e40ee94486f7db701f9b4d3f9c25763e8c Mon Sep 17 00:00:00 2001 From: Ray Chi Date: Sun, 28 Mar 2021 02:28:09 +0800 Subject: power: supply: Fix build error when CONFIG_POWER_SUPPLY is not enabled. The build error happens when CONFIG_POWER_SUPPLY is not enabled. h8300-linux-ld: drivers/usb/dwc3/gadget.o: in function `.L59': >> gadget.c:(.text+0x655): undefined reference to `power_supply_set_property' Fixes: 99288de36020 ("usb: dwc3: add an alternate path in vbus_draw callback") Reported-by: kernel test robot Signed-off-by: Ray Chi Link: https://lore.kernel.org/r/20210327182809.1814480-3-raychi@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/power_supply.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 81a55e974feb..b495b4374cd0 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -426,9 +426,16 @@ static inline int power_supply_is_system_supplied(void) { return -ENOSYS; } extern int power_supply_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val); +#if IS_ENABLED(CONFIG_POWER_SUPPLY) extern int power_supply_set_property(struct power_supply *psy, enum power_supply_property psp, const union power_supply_propval *val); +#else +static inline int power_supply_set_property(struct power_supply *psy, + enum power_supply_property psp, + const union power_supply_propval *val) +{ return 0; } +#endif extern int power_supply_property_is_writeable(struct power_supply *psy, enum power_supply_property psp); extern void power_supply_external_power_changed(struct power_supply *psy); -- cgit v1.2.3 From 7a534c5e4159f9bbac9f3c146dc78e163d8858c2 Mon Sep 17 00:00:00 2001 From: Zhang Yunkai Date: Sat, 13 Mar 2021 02:57:02 -0800 Subject: mtd: rawnand: remove duplicate include in rawnand.h 'linux/mtd/nand.h' included in 'rawnand.h' is duplicated. It is also included in the 17th line. Signed-off-by: Zhang Yunkai Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210313105702.365878-1-zhang.yunkai@zte.com.cn --- include/linux/mtd/rawnand.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 6b3240e44310..93e8f72beba6 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From e3c1f1c92d6ede3cfa09d6a103d3d1c1ef645e35 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Wed, 3 Mar 2021 21:18:19 +0100 Subject: mtd: add OTP (one-time-programmable) erase ioctl This may sound like a contradiction but some SPI-NOR flashes really support erasing their OTP region until it is finally locked. Having the possibility to erase an OTP region might come in handy during development. The ioctl argument follows the OTPLOCK style. Signed-off-by: Michael Walle Acked-by: Vignesh Raghavendra Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210303201819.2752-1-michael@walle.cc --- include/linux/mtd/mtd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index ceabc2cae8a4..4aac200ca8b5 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -337,6 +337,8 @@ struct mtd_info { size_t len, size_t *retlen, u_char *buf); int (*_lock_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len); + int (*_erase_user_prot_reg) (struct mtd_info *mtd, loff_t from, + size_t len); int (*_writev) (struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen); void (*_sync) (struct mtd_info *mtd); @@ -518,6 +520,7 @@ int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, u_char *buf); int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len); +int mtd_erase_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len); int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen); -- cgit v1.2.3 From 28f0be44b263ca4b59ea63c801db3830e65fbe99 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Tue, 23 Mar 2021 11:17:37 +0800 Subject: include: linux: mtd: Remove duplicate include of nand.h linux/mtd/nand.h has been included at line 17. So we remove the duplicate one at line 21. Signed-off-by: Wan Jiabing Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210323031737.259365-1-wanjiabing@vivo.com --- include/linux/mtd/rawnand.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 6b3240e44310..93e8f72beba6 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 48dc16e2e52ca386d97435f257ef8a754becb0b8 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:26 +0000 Subject: firmware: arm_scmi: Extend protocol registration interfaces Extend common protocol registration routines and provide some new generic protocols get/put helpers that can track protocols usage and automatically perform the proper initialization and de-initialization on demand when required. Convert all standard protocols to use this new registration scheme while keeping them all still using the usual initialization logic bound to SCMI devices probing. Link: https://lore.kernel.org/r/20210316124903.35011-2-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index ecb3aad1a964..ea301c579c6f 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -2,7 +2,7 @@ /* * SCMI Message Protocol driver header * - * Copyright (C) 2018 ARM Ltd. + * Copyright (C) 2018-2021 ARM Ltd. */ #ifndef _LINUX_SCMI_PROTOCOL_H @@ -712,9 +712,9 @@ static inline void scmi_driver_unregister(struct scmi_driver *driver) {} #define module_scmi_driver(__scmi_driver) \ module_driver(__scmi_driver, scmi_register, scmi_unregister) -typedef int (*scmi_prot_init_fn_t)(struct scmi_handle *); -int scmi_protocol_register(int protocol_id, scmi_prot_init_fn_t fn); -void scmi_protocol_unregister(int protocol_id); +struct scmi_protocol; +int scmi_protocol_register(const struct scmi_protocol *proto); +void scmi_protocol_unregister(const struct scmi_protocol *proto); /* SCMI Notification API - Custom Event Reports */ enum scmi_notification_events { -- cgit v1.2.3 From 23934efe3748f6d9d8ac0760178a5ef1ed8320f4 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:28 +0000 Subject: firmware: arm_scmi: Introduce devres get/put protocols operations Expose to the SCMI drivers a new devres managed common protocols API based on generic get/put methods and protocol handles. All drivers still keep using the old API, no functional change. Link: https://lore.kernel.org/r/20210316124903.35011-4-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index ea301c579c6f..a9becd356fe8 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -57,6 +57,8 @@ struct scmi_clock_info { }; struct scmi_handle; +struct scmi_device; +struct scmi_protocol_handle; /** * struct scmi_clk_ops - represents the various operations provided @@ -593,6 +595,9 @@ struct scmi_notify_ops { * @sensor_ops: pointer to set of sensor protocol operations * @reset_ops: pointer to set of reset protocol operations * @voltage_ops: pointer to set of voltage protocol operations + * @devm_protocol_get: devres managed method to acquire a protocol and get specific + * operations and a dedicated protocol handler + * @devm_protocol_put: devres managed method to release a protocol * @notify_ops: pointer to set of notifications related operations * @perf_priv: pointer to private data structure specific to performance * protocol(for internal use only) @@ -618,6 +623,12 @@ struct scmi_handle { const struct scmi_sensor_ops *sensor_ops; const struct scmi_reset_ops *reset_ops; const struct scmi_voltage_ops *voltage_ops; + + const void __must_check * + (*devm_protocol_get)(struct scmi_device *sdev, u8 proto, + struct scmi_protocol_handle **ph); + void (*devm_protocol_put)(struct scmi_device *sdev, u8 proto); + const struct scmi_notify_ops *notify_ops; /* for protocol internal use */ void *perf_priv; -- cgit v1.2.3 From 5ad3d1cf7d34c00aa5b425d93373b5c0044aa0a9 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:30 +0000 Subject: firmware: arm_scmi: Introduce new devres notification ops Expose to the SCMI drivers a new alternative devres managed notifications API based on protocol handles. All drivers still keep using the old API, no functional change. Link: https://lore.kernel.org/r/20210316124903.35011-6-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index a9becd356fe8..2b66b243e841 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -544,6 +544,10 @@ struct scmi_voltage_ops { /** * struct scmi_notify_ops - represents notifications' operations provided by * SCMI core + * @devm_event_notifier_register: Managed registration of a notifier_block for + * the requested event + * @devm_event_notifier_unregister: Managed unregistration of a notifier_block + * for the requested event * @register_event_notifier: Register a notifier_block for the requested event * @unregister_event_notifier: Unregister a notifier_block for the requested * event @@ -553,7 +557,9 @@ struct scmi_voltage_ops { * tuple: (proto_id, evt_id, src_id) using the provided register/unregister * interface where: * - * @handle: The handle identifying the platform instance to use + * @sdev: The scmi_device to use when calling the devres managed ops devm_ + * @handle: The handle identifying the platform instance to use, when not + * calling the managed ops devm_ * @proto_id: The protocol ID as in SCMI Specification * @evt_id: The message ID of the desired event as in SCMI Specification * @src_id: A pointer to the desired source ID if different sources are @@ -576,11 +582,21 @@ struct scmi_voltage_ops { * @report: A custom struct describing the specific event delivered */ struct scmi_notify_ops { + int (*devm_event_notifier_register)(struct scmi_device *sdev, + u8 proto_id, u8 evt_id, + const u32 *src_id, + struct notifier_block *nb); + int (*devm_event_notifier_unregister)(struct scmi_device *sdev, + u8 proto_id, u8 evt_id, + const u32 *src_id, + struct notifier_block *nb); int (*register_event_notifier)(const struct scmi_handle *handle, - u8 proto_id, u8 evt_id, u32 *src_id, + u8 proto_id, u8 evt_id, + const u32 *src_id, struct notifier_block *nb); int (*unregister_event_notifier)(const struct scmi_handle *handle, - u8 proto_id, u8 evt_id, u32 *src_id, + u8 proto_id, u8 evt_id, + const u32 *src_id, struct notifier_block *nb); }; -- cgit v1.2.3 From 1fec5e6b5233a08ebf43011703b8baf9c9856862 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:36 +0000 Subject: firmware: arm_scmi: Port perf protocol to new protocols interface Convert internals of protocol implementation to use protocol handles and expose a new protocol operations interface for SCMI driver using the new get/put common operations, while keeping the old handle->perf_ops still around to ease transition. Remove handle->perf_priv now unused. Link: https://lore.kernel.org/r/20210316124903.35011-12-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 2b66b243e841..c658aec78e15 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -85,7 +85,7 @@ struct scmi_clk_ops { }; /** - * struct scmi_perf_ops - represents the various operations provided + * struct scmi_perf_proto_ops - represents the various operations provided * by SCMI Performance Protocol * * @limits_set: sets limits on the performance level of a domain @@ -102,6 +102,31 @@ struct scmi_clk_ops { * @est_power_get: gets the estimated power cost for a given performance domain * at a given frequency */ +struct scmi_perf_proto_ops { + int (*limits_set)(const struct scmi_protocol_handle *ph, u32 domain, + u32 max_perf, u32 min_perf); + int (*limits_get)(const struct scmi_protocol_handle *ph, u32 domain, + u32 *max_perf, u32 *min_perf); + int (*level_set)(const struct scmi_protocol_handle *ph, u32 domain, + u32 level, bool poll); + int (*level_get)(const struct scmi_protocol_handle *ph, u32 domain, + u32 *level, bool poll); + int (*device_domain_id)(struct device *dev); + int (*transition_latency_get)(const struct scmi_protocol_handle *ph, + struct device *dev); + int (*device_opps_add)(const struct scmi_protocol_handle *ph, + struct device *dev); + int (*freq_set)(const struct scmi_protocol_handle *ph, u32 domain, + unsigned long rate, bool poll); + int (*freq_get)(const struct scmi_protocol_handle *ph, u32 domain, + unsigned long *rate, bool poll); + int (*est_power_get)(const struct scmi_protocol_handle *ph, u32 domain, + unsigned long *rate, unsigned long *power); + bool (*fast_switch_possible)(const struct scmi_protocol_handle *ph, + struct device *dev); + bool (*power_scale_mw_get)(const struct scmi_protocol_handle *ph); +}; + struct scmi_perf_ops { int (*limits_set)(const struct scmi_handle *handle, u32 domain, u32 max_perf, u32 min_perf); @@ -615,8 +640,6 @@ struct scmi_notify_ops { * operations and a dedicated protocol handler * @devm_protocol_put: devres managed method to release a protocol * @notify_ops: pointer to set of notifications related operations - * @perf_priv: pointer to private data structure specific to performance - * protocol(for internal use only) * @clk_priv: pointer to private data structure specific to clock * protocol(for internal use only) * @power_priv: pointer to private data structure specific to power @@ -647,7 +670,6 @@ struct scmi_handle { const struct scmi_notify_ops *notify_ops; /* for protocol internal use */ - void *perf_priv; void *clk_priv; void *power_priv; void *sensor_priv; -- cgit v1.2.3 From f58315a49ce98007870e1206ca5d79fee09cf36e Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:38 +0000 Subject: firmware: arm_scmi: Remove legacy scmi_perf_ops protocol interface Now that all the SCMI driver users have been migrated to the new interface remove the legacy interface and all the transient code. Link: https://lore.kernel.org/r/20210316124903.35011-14-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index c658aec78e15..c71d7c297125 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -127,31 +127,6 @@ struct scmi_perf_proto_ops { bool (*power_scale_mw_get)(const struct scmi_protocol_handle *ph); }; -struct scmi_perf_ops { - int (*limits_set)(const struct scmi_handle *handle, u32 domain, - u32 max_perf, u32 min_perf); - int (*limits_get)(const struct scmi_handle *handle, u32 domain, - u32 *max_perf, u32 *min_perf); - int (*level_set)(const struct scmi_handle *handle, u32 domain, - u32 level, bool poll); - int (*level_get)(const struct scmi_handle *handle, u32 domain, - u32 *level, bool poll); - int (*device_domain_id)(struct device *dev); - int (*transition_latency_get)(const struct scmi_handle *handle, - struct device *dev); - int (*device_opps_add)(const struct scmi_handle *handle, - struct device *dev); - int (*freq_set)(const struct scmi_handle *handle, u32 domain, - unsigned long rate, bool poll); - int (*freq_get)(const struct scmi_handle *handle, u32 domain, - unsigned long *rate, bool poll); - int (*est_power_get)(const struct scmi_handle *handle, u32 domain, - unsigned long *rate, unsigned long *power); - bool (*fast_switch_possible)(const struct scmi_handle *handle, - struct device *dev); - bool (*power_scale_mw_get)(const struct scmi_handle *handle); -}; - /** * struct scmi_power_ops - represents the various operations provided * by SCMI Power Protocol @@ -631,7 +606,6 @@ struct scmi_notify_ops { * @dev: pointer to the SCMI device * @version: pointer to the structure containing SCMI version information * @power_ops: pointer to set of power protocol operations - * @perf_ops: pointer to set of performance protocol operations * @clk_ops: pointer to set of clock protocol operations * @sensor_ops: pointer to set of sensor protocol operations * @reset_ops: pointer to set of reset protocol operations @@ -656,7 +630,6 @@ struct scmi_notify_ops { struct scmi_handle { struct device *dev; struct scmi_revision_info *version; - const struct scmi_perf_ops *perf_ops; const struct scmi_clk_ops *clk_ops; const struct scmi_power_ops *power_ops; const struct scmi_sensor_ops *sensor_ops; -- cgit v1.2.3 From 9bc8069c85678e9a61584b58951b7c5fcdd5d357 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:39 +0000 Subject: firmware: arm_scmi: Port power protocol to new protocols interface Convert internals of protocol implementation to use protocol handles and expose a new protocol operations interface for SCMI driver using the new get/put common operations, while keeping the old handle->power_ops still around to ease transition. Remove handle->power_priv now unused. Link: https://lore.kernel.org/r/20210316124903.35011-15-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index c71d7c297125..dfb3d2f5e0d3 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -128,7 +128,7 @@ struct scmi_perf_proto_ops { }; /** - * struct scmi_power_ops - represents the various operations provided + * struct scmi_power_proto_ops - represents the various operations provided * by SCMI Power Protocol * * @num_domains_get: get the count of power domains provided by SCMI @@ -136,9 +136,9 @@ struct scmi_perf_proto_ops { * @state_set: sets the power state of a power domain * @state_get: gets the power state of a power domain */ -struct scmi_power_ops { - int (*num_domains_get)(const struct scmi_handle *handle); - char *(*name_get)(const struct scmi_handle *handle, u32 domain); +struct scmi_power_proto_ops { + int (*num_domains_get)(const struct scmi_protocol_handle *ph); + char *(*name_get)(const struct scmi_protocol_handle *ph, u32 domain); #define SCMI_POWER_STATE_TYPE_SHIFT 30 #define SCMI_POWER_STATE_ID_MASK (BIT(28) - 1) #define SCMI_POWER_STATE_PARAM(type, id) \ @@ -146,6 +146,15 @@ struct scmi_power_ops { ((id) & SCMI_POWER_STATE_ID_MASK)) #define SCMI_POWER_STATE_GENERIC_ON SCMI_POWER_STATE_PARAM(0, 0) #define SCMI_POWER_STATE_GENERIC_OFF SCMI_POWER_STATE_PARAM(1, 0) + int (*state_set)(const struct scmi_protocol_handle *ph, u32 domain, + u32 state); + int (*state_get)(const struct scmi_protocol_handle *ph, u32 domain, + u32 *state); +}; + +struct scmi_power_ops { + int (*num_domains_get)(const struct scmi_handle *handle); + char *(*name_get)(const struct scmi_handle *handle, u32 domain); int (*state_set)(const struct scmi_handle *handle, u32 domain, u32 state); int (*state_get)(const struct scmi_handle *handle, u32 domain, @@ -616,8 +625,6 @@ struct scmi_notify_ops { * @notify_ops: pointer to set of notifications related operations * @clk_priv: pointer to private data structure specific to clock * protocol(for internal use only) - * @power_priv: pointer to private data structure specific to power - * protocol(for internal use only) * @sensor_priv: pointer to private data structure specific to sensors * protocol(for internal use only) * @reset_priv: pointer to private data structure specific to reset @@ -644,7 +651,6 @@ struct scmi_handle { const struct scmi_notify_ops *notify_ops; /* for protocol internal use */ void *clk_priv; - void *power_priv; void *sensor_priv; void *reset_priv; void *voltage_priv; -- cgit v1.2.3 From 0f84576a62c4b42ae7110f1c27bbb51183f232fb Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:41 +0000 Subject: firmware: arm_scmi: Remove legacy scmi_power_ops protocol interface Now that all the SCMI driver users have been migrated to the new interface remove the legacy interface and all the transient code. Link: https://lore.kernel.org/r/20210316124903.35011-17-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index dfb3d2f5e0d3..2328d36b7597 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -152,15 +152,6 @@ struct scmi_power_proto_ops { u32 *state); }; -struct scmi_power_ops { - int (*num_domains_get)(const struct scmi_handle *handle); - char *(*name_get)(const struct scmi_handle *handle, u32 domain); - int (*state_set)(const struct scmi_handle *handle, u32 domain, - u32 state); - int (*state_get)(const struct scmi_handle *handle, u32 domain, - u32 *state); -}; - /** * scmi_sensor_reading - represent a timestamped read * @@ -614,7 +605,6 @@ struct scmi_notify_ops { * * @dev: pointer to the SCMI device * @version: pointer to the structure containing SCMI version information - * @power_ops: pointer to set of power protocol operations * @clk_ops: pointer to set of clock protocol operations * @sensor_ops: pointer to set of sensor protocol operations * @reset_ops: pointer to set of reset protocol operations @@ -638,7 +628,6 @@ struct scmi_handle { struct device *dev; struct scmi_revision_info *version; const struct scmi_clk_ops *clk_ops; - const struct scmi_power_ops *power_ops; const struct scmi_sensor_ops *sensor_ops; const struct scmi_reset_ops *reset_ops; const struct scmi_voltage_ops *voltage_ops; -- cgit v1.2.3 From 887281c7519d6e291be5353449d1e1bc00aa63af Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:42 +0000 Subject: firmware: arm_scmi: Port clock protocol to new protocols interface Convert internals of protocol implementation to use protocol handles and expose a new protocol operations interface for SCMI driver using the new get/put common operations, while keeping the old handle->clk_ops still around to ease transition. Remove handle->clock_priv now unused. Link: https://lore.kernel.org/r/20210316124903.35011-18-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 2328d36b7597..b19797b23c5f 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -61,7 +61,7 @@ struct scmi_device; struct scmi_protocol_handle; /** - * struct scmi_clk_ops - represents the various operations provided + * struct scmi_clk_proto_ops - represents the various operations provided * by SCMI Clock Protocol * * @count_get: get the count of clocks provided by SCMI @@ -71,8 +71,21 @@ struct scmi_protocol_handle; * @enable: enables the specified clock * @disable: disables the specified clock */ +struct scmi_clk_proto_ops { + int (*count_get)(const struct scmi_protocol_handle *ph); + + const struct scmi_clock_info *(*info_get) + (const struct scmi_protocol_handle *ph, u32 clk_id); + int (*rate_get)(const struct scmi_protocol_handle *ph, u32 clk_id, + u64 *rate); + int (*rate_set)(const struct scmi_protocol_handle *ph, u32 clk_id, + u64 rate); + int (*enable)(const struct scmi_protocol_handle *ph, u32 clk_id); + int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id); +}; + struct scmi_clk_ops { - int (*count_get)(const struct scmi_handle *handle); + int (*count_get)(const struct scmi_handle *hamdle); const struct scmi_clock_info *(*info_get) (const struct scmi_handle *handle, u32 clk_id); @@ -613,8 +626,6 @@ struct scmi_notify_ops { * operations and a dedicated protocol handler * @devm_protocol_put: devres managed method to release a protocol * @notify_ops: pointer to set of notifications related operations - * @clk_priv: pointer to private data structure specific to clock - * protocol(for internal use only) * @sensor_priv: pointer to private data structure specific to sensors * protocol(for internal use only) * @reset_priv: pointer to private data structure specific to reset @@ -639,7 +650,6 @@ struct scmi_handle { const struct scmi_notify_ops *notify_ops; /* for protocol internal use */ - void *clk_priv; void *sensor_priv; void *reset_priv; void *voltage_priv; -- cgit v1.2.3 From 7b2d92a3c8e5e8a7ef2710a928a1011bcebc2b91 Mon Sep 17 00:00:00 2001 From: Guoqing Chi Date: Mon, 22 Mar 2021 09:30:24 +0800 Subject: iio:imu:mpu6050: Modify matricies to matrices The plural of "matrix" is "matrices". Signed-off-by: Guoqing Chi Acked-by: Randy Dunlap Acked-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20210322013024.1849-1-chi962464zy@163.com Signed-off-by: Jonathan Cameron --- include/linux/platform_data/invensense_mpu6050.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/invensense_mpu6050.h b/include/linux/platform_data/invensense_mpu6050.h index 93974f4cfba1..f05b37521f67 100644 --- a/include/linux/platform_data/invensense_mpu6050.h +++ b/include/linux/platform_data/invensense_mpu6050.h @@ -12,7 +12,7 @@ * mounting matrix retrieved from device-tree) * * Contains platform specific information on how to configure the MPU6050 to - * work on this platform. The orientation matricies are 3x3 rotation matricies + * work on this platform. The orientation matrices are 3x3 rotation matrices * that are applied to the data to rotate from the mounting orientation to the * platform orientation. The values must be one of 0, 1, or -1 and each row and * column should have exactly 1 non-zero value. -- cgit v1.2.3 From 9c7d24693d864f90b27aad5d15fbfe226c02898b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Wed, 24 Mar 2021 09:19:02 +0100 Subject: gpio: guard gpiochip_irqchip_add_domain() with GPIOLIB_IRQCHIP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current code doesn't check if GPIOLIB_IRQCHIP is enabled, which results in a compilation error when trying to build gpio-regmap if CONFIG_GPIOLIB_IRQCHIP isn't enabled. Fixes: 6a45b0e2589f ("gpiolib: Introduce gpiochip_irqchip_add_domain()") Suggested-by: Michael Walle Signed-off-by: Álvaro Fernández Rojas Reviewed-by: Linus Walleij Reviewed-by: Michael Walle Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20210324081923.20379-2-noltari@gmail.com Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 286de0520574..ecf0032a0995 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -624,8 +624,17 @@ void gpiochip_irq_domain_deactivate(struct irq_domain *domain, bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gc, unsigned int offset); +#ifdef CONFIG_GPIOLIB_IRQCHIP int gpiochip_irqchip_add_domain(struct gpio_chip *gc, struct irq_domain *domain); +#else +static inline int gpiochip_irqchip_add_domain(struct gpio_chip *gc, + struct irq_domain *domain) +{ + WARN_ON(1); + return -EINVAL; +} +#endif int gpiochip_generic_request(struct gpio_chip *gc, unsigned int offset); void gpiochip_generic_free(struct gpio_chip *gc, unsigned int offset); -- cgit v1.2.3 From d46bf9ec4596654f36245e3b14765bcb422be6ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Wed, 24 Mar 2021 09:19:03 +0100 Subject: gpio: regmap: set gpio_chip of_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is needed for properly registering GPIO regmap as a child of a regmap pin controller. Signed-off-by: Álvaro Fernández Rojas Reviewed-by: Michael Walle Reviewed-by: Andy Shevchenko Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20210324081923.20379-3-noltari@gmail.com Signed-off-by: Linus Walleij --- include/linux/gpio/regmap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/regmap.h b/include/linux/gpio/regmap.h index ad76f3d0a6ba..334dd928042b 100644 --- a/include/linux/gpio/regmap.h +++ b/include/linux/gpio/regmap.h @@ -4,6 +4,7 @@ #define _LINUX_GPIO_REGMAP_H struct device; +struct fwnode_handle; struct gpio_regmap; struct irq_domain; struct regmap; @@ -16,6 +17,8 @@ struct regmap; * @parent: The parent device * @regmap: The regmap used to access the registers * given, the name of the device is used + * @fwnode: (Optional) The firmware node. + * If not given, the fwnode of the parent is used. * @label: (Optional) Descriptive name for GPIO controller. * If not given, the name of the device is used. * @ngpio: Number of GPIOs @@ -57,6 +60,7 @@ struct regmap; struct gpio_regmap_config { struct device *parent; struct regmap *regmap; + struct fwnode_handle *fwnode; const char *label; int ngpio; -- cgit v1.2.3 From 0f00b82e5413571ed225ddbccad6882d7ea60bc7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 Mar 2021 08:45:50 +0100 Subject: block: remove the revalidate_disk method No implementations left. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210308074550.422714-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bc6bc8383b43..b4241f73f7a8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1870,7 +1870,6 @@ struct block_device_operations { unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); void (*unlock_native_capacity) (struct gendisk *); - int (*revalidate_disk) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); int (*set_read_only)(struct block_device *bdev, bool ro); /* this callback is with swap_lock and sometimes page table lock held */ -- cgit v1.2.3 From c15380b72d7ae821ee090ba5a56fc6310828dbda Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 26 Mar 2021 16:29:30 +0100 Subject: locking/rtmutex: Remove rt_mutex_timed_lock() rt_mutex_timed_lock() has no callers since: c051b21f71d1f ("rtmutex: Confine deadlock logic to futex") Remove it. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210326153943.061103415@linutronix.de --- include/linux/rtmutex.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 6fd615a0eea9..32f4a3538c3c 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -115,9 +115,6 @@ extern void rt_mutex_lock(struct rt_mutex *lock); #endif extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); -extern int rt_mutex_timed_lock(struct rt_mutex *lock, - struct hrtimer_sleeper *timeout); - extern int rt_mutex_trylock(struct rt_mutex *lock); extern void rt_mutex_unlock(struct rt_mutex *lock); -- cgit v1.2.3 From 2d445c3e4a8216cfa9703998124c13250cc13e5e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 26 Mar 2021 16:29:31 +0100 Subject: locking/rtmutex: Remove rtmutex deadlock tester leftovers The following debug members of 'struct rtmutex' are unused: - save_state: No users - file,line: Printed if ::name is NULL. This is only used for non-futex locks so ::name is never NULL - magic: Assigned to NULL by rt_mutex_destroy(), no further usage Remove them along with unused inline and macro leftovers related to the long gone deadlock tester. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210326153943.195064296@linutronix.de --- include/linux/rtmutex.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 32f4a3538c3c..48b334b9eb87 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -32,10 +32,7 @@ struct rt_mutex { struct rb_root_cached waiters; struct task_struct *owner; #ifdef CONFIG_DEBUG_RT_MUTEXES - int save_state; - const char *name, *file; - int line; - void *magic; + const char *name; #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; @@ -60,7 +57,7 @@ struct hrtimer_sleeper; #ifdef CONFIG_DEBUG_RT_MUTEXES # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ - , .name = #mutexname, .file = __FILE__, .line = __LINE__ + , .name = #mutexname # define rt_mutex_init(mutex) \ do { \ -- cgit v1.2.3 From 6d41c675a5394057f6fb1dc97cc0a0e360f2c2f8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 26 Mar 2021 16:29:32 +0100 Subject: locking/rtmutex: Remove output from deadlock detector The rtmutex specific deadlock detector predates lockdep coverage of rtmutex and since commit f5694788ad8da ("rt_mutex: Add lockdep annotations") it contains a lot of redundant functionality: - lockdep will detect an potential deadlock before rtmutex-debug has a chance to do so - the deadlock debugging is restricted to rtmutexes which are not associated to futexes and have an active waiter, which is covered by lockdep already Remove the redundant functionality and move actual deadlock WARN() into the deadlock code path. The latter needs a seperate cleanup. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210326153943.320398604@linutronix.de --- include/linux/rtmutex.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 48b334b9eb87..0725c4b45749 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -31,9 +31,6 @@ struct rt_mutex { raw_spinlock_t wait_lock; struct rb_root_cached waiters; struct task_struct *owner; -#ifdef CONFIG_DEBUG_RT_MUTEXES - const char *name; -#endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif @@ -56,8 +53,6 @@ struct hrtimer_sleeper; #endif #ifdef CONFIG_DEBUG_RT_MUTEXES -# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ - , .name = #mutexname # define rt_mutex_init(mutex) \ do { \ @@ -67,7 +62,6 @@ do { \ extern void rt_mutex_debug_task_free(struct task_struct *tsk); #else -# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) # define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL, NULL) # define rt_mutex_debug_task_free(t) do { } while (0) #endif @@ -83,7 +77,6 @@ do { \ { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ , .waiters = RB_ROOT_CACHED \ , .owner = NULL \ - __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} #define DEFINE_RT_MUTEX(mutexname) \ -- cgit v1.2.3 From 199cacd1a625cfc499d624b98b10dc763062f7dd Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 26 Mar 2021 16:29:33 +0100 Subject: locking/rtmutex: Consolidate rt_mutex_init() rt_mutex_init() only initializes lockdep if CONFIG_DEBUG_RT_MUTEXES is enabled, which is fine because all lockdep variants select it, but there is no reason to do so. Move the function outside of the CONFIG_DEBUG_RT_MUTEXES block which removes #ifdeffery. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210326153943.437405350@linutronix.de --- include/linux/rtmutex.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 0725c4b45749..243fabc2c85f 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -43,6 +43,7 @@ struct hrtimer_sleeper; extern int rt_mutex_debug_check_no_locks_freed(const void *from, unsigned long len); extern void rt_mutex_debug_check_no_locks_held(struct task_struct *task); + extern void rt_mutex_debug_task_free(struct task_struct *tsk); #else static inline int rt_mutex_debug_check_no_locks_freed(const void *from, unsigned long len) @@ -50,22 +51,15 @@ struct hrtimer_sleeper; return 0; } # define rt_mutex_debug_check_no_locks_held(task) do { } while (0) +# define rt_mutex_debug_task_free(t) do { } while (0) #endif -#ifdef CONFIG_DEBUG_RT_MUTEXES - -# define rt_mutex_init(mutex) \ +#define rt_mutex_init(mutex) \ do { \ static struct lock_class_key __key; \ __rt_mutex_init(mutex, __func__, &__key); \ } while (0) - extern void rt_mutex_debug_task_free(struct task_struct *tsk); -#else -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL, NULL) -# define rt_mutex_debug_task_free(t) do { } while (0) -#endif - #ifdef CONFIG_DEBUG_LOCK_ALLOC #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \ , .dep_map = { .name = #mutexname } -- cgit v1.2.3 From 8188d74e68174b11ff7c4a635ffc8fd31eacc6b9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Mar 2021 16:29:34 +0100 Subject: locking/rtmutex: Remove empty and unused debug stubs No users or useless and therefore just ballast. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210326153943.549192485@linutronix.de --- include/linux/rtmutex.h | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 243fabc2c85f..d1672de9ca89 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -40,18 +40,9 @@ struct rt_mutex_waiter; struct hrtimer_sleeper; #ifdef CONFIG_DEBUG_RT_MUTEXES - extern int rt_mutex_debug_check_no_locks_freed(const void *from, - unsigned long len); - extern void rt_mutex_debug_check_no_locks_held(struct task_struct *task); - extern void rt_mutex_debug_task_free(struct task_struct *tsk); +extern void rt_mutex_debug_task_free(struct task_struct *tsk); #else - static inline int rt_mutex_debug_check_no_locks_freed(const void *from, - unsigned long len) - { - return 0; - } -# define rt_mutex_debug_check_no_locks_held(task) do { } while (0) -# define rt_mutex_debug_task_free(t) do { } while (0) +static inline void rt_mutex_debug_task_free(struct task_struct *tsk) { } #endif #define rt_mutex_init(mutex) \ @@ -88,7 +79,6 @@ static inline int rt_mutex_is_locked(struct rt_mutex *lock) } extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key); -extern void rt_mutex_destroy(struct rt_mutex *lock); #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass); -- cgit v1.2.3 From 1a1c130ab7575498eed5bcf7220037ae09cd1f8a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 23 Mar 2021 20:26:52 +0100 Subject: ACPI: tables: x86: Reserve memory occupied by ACPI tables The following problem has been reported by George Kennedy: Since commit 7fef431be9c9 ("mm/page_alloc: place pages to tail in __free_pages_core()") the following use after free occurs intermittently when ACPI tables are accessed. BUG: KASAN: use-after-free in ibft_init+0x134/0xc49 Read of size 4 at addr ffff8880be453004 by task swapper/0/1 CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.12.0-rc1-7a7fd0d #1 Call Trace: dump_stack+0xf6/0x158 print_address_description.constprop.9+0x41/0x60 kasan_report.cold.14+0x7b/0xd4 __asan_report_load_n_noabort+0xf/0x20 ibft_init+0x134/0xc49 do_one_initcall+0xc4/0x3e0 kernel_init_freeable+0x5af/0x66b kernel_init+0x16/0x1d0 ret_from_fork+0x22/0x30 ACPI tables mapped via kmap() do not have their mapped pages reserved and the pages can be "stolen" by the buddy allocator. Apparently, on the affected system, the ACPI table in question is not located in "reserved" memory, like ACPI NVS or ACPI Data, that will not be used by the buddy allocator, so the memory occupied by that table has to be explicitly reserved to prevent the buddy allocator from using it. In order to address this problem, rearrange the initialization of the ACPI tables on x86 to locate the initial tables earlier and reserve the memory occupied by them. The other architectures using ACPI should not be affected by this change. Link: https://lore.kernel.org/linux-acpi/1614802160-29362-1-git-send-email-george.kennedy@oracle.com/ Reported-by: George Kennedy Tested-by: George Kennedy Signed-off-by: Rafael J. Wysocki Reviewed-by: Mike Rapoport Cc: 5.10+ # 5.10+ --- include/linux/acpi.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fcdaab723916..3bdcfc4401b7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -222,10 +222,14 @@ void __iomem *__acpi_map_table(unsigned long phys, unsigned long size); void __acpi_unmap_table(void __iomem *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); +void acpi_boot_table_prepare (void); void acpi_boot_table_init (void); int acpi_mps_check (void); int acpi_numa_init (void); +int acpi_locate_initial_tables (void); +void acpi_reserve_initial_tables (void); +void acpi_table_init_complete (void); int acpi_table_init (void); int acpi_table_parse(char *id, acpi_tbl_table_handler handler); int __init acpi_table_parse_entries(char *id, unsigned long table_size, @@ -814,9 +818,12 @@ static inline int acpi_boot_init(void) return 0; } +static inline void acpi_boot_table_prepare(void) +{ +} + static inline void acpi_boot_table_init(void) { - return; } static inline int acpi_mps_check(void) -- cgit v1.2.3 From 8058dfa05ab765153f20020fc4ea3b296e391a00 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sat, 27 Mar 2021 07:56:42 +0100 Subject: soc: qcom: address kernel-doc warnings The command: find ./include/linux/soc/qcom/ | xargs ./scripts/kernel-doc -none reports: ./include/linux/soc/qcom/qmi.h:26: warning: cannot understand function prototype: 'struct qmi_header ' ./include/linux/soc/qcom/qmi.h:101: warning: cannot understand function prototype: 'struct qmi_response_type_v01 ' ./include/linux/soc/qcom/irq.h:19: warning: expecting prototype for QCOM specific IRQ domain flags that distinguishes the handling of wakeup(). Prototype was for IRQ_DOMAIN_FLAG_QCOM_PDC_WAKEUP() instead ./include/linux/soc/qcom/apr.h:126: warning: Function parameter or member '__apr_driver' not described in 'module_apr_driver' ./include/linux/soc/qcom/apr.h:126: warning: Excess function parameter '__aprbus_driver' description in 'module_apr_driver' ./include/linux/soc/qcom/llcc-qcom.h:43: warning: cannot understand function prototype: 'struct llcc_slice_desc ' ./include/linux/soc/qcom/llcc-qcom.h:60: warning: cannot understand function prototype: 'struct llcc_edac_reg_data ' ./include/linux/soc/qcom/llcc-qcom.h:86: warning: cannot understand function prototype: 'struct llcc_drv_data ' Address all those warnings by: - prefixing kernel-doc descriptions for structs with the keyword 'struct', - turning a kernel-doc comment that does not follow the kernel-doc syntax into a normal comment, and - correcting a parameter name in a kernel-doc comment. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20210327065642.11969-3-lukas.bulwahn@gmail.com Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/apr.h | 2 +- include/linux/soc/qcom/irq.h | 2 +- include/linux/soc/qcom/llcc-qcom.h | 6 +++--- include/linux/soc/qcom/qmi.h | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h index 7f0bc3cf4d61..137f9f2ac4c3 100644 --- a/include/linux/soc/qcom/apr.h +++ b/include/linux/soc/qcom/apr.h @@ -113,7 +113,7 @@ void apr_driver_unregister(struct apr_driver *drv); /** * module_apr_driver() - Helper macro for registering a aprbus driver - * @__aprbus_driver: aprbus_driver struct + * @__apr_driver: apr_driver struct * * Helper macro for aprbus drivers which do not do anything special in * module init/exit. This eliminates a lot of boilerplate. Each module diff --git a/include/linux/soc/qcom/irq.h b/include/linux/soc/qcom/irq.h index 9e1ece58e55b..72b9231e9fdd 100644 --- a/include/linux/soc/qcom/irq.h +++ b/include/linux/soc/qcom/irq.h @@ -7,7 +7,7 @@ #define GPIO_NO_WAKE_IRQ ~0U -/** +/* * QCOM specific IRQ domain flags that distinguishes the handling of wakeup * capable interrupts by different interrupt controllers. * diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 64fc582ae415..437c9df13229 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -35,7 +35,7 @@ #define LLCC_WRCACHE 31 /** - * llcc_slice_desc - Cache slice descriptor + * struct llcc_slice_desc - Cache slice descriptor * @slice_id: llcc slice id * @slice_size: Size allocated for the llcc slice */ @@ -45,7 +45,7 @@ struct llcc_slice_desc { }; /** - * llcc_edac_reg_data - llcc edac registers data for each error type + * struct llcc_edac_reg_data - llcc edac registers data for each error type * @name: Name of the error * @synd_reg: Syndrome register address * @count_status_reg: Status register address to read the error count @@ -69,7 +69,7 @@ struct llcc_edac_reg_data { }; /** - * llcc_drv_data - Data associated with the llcc driver + * struct llcc_drv_data - Data associated with the llcc driver * @regmap: regmap associated with the llcc device * @bcast_regmap: regmap associated with llcc broadcast offset * @cfg: pointer to the data structure for slice configuration diff --git a/include/linux/soc/qcom/qmi.h b/include/linux/soc/qcom/qmi.h index e712f94b89fc..b1f80e756d2a 100644 --- a/include/linux/soc/qcom/qmi.h +++ b/include/linux/soc/qcom/qmi.h @@ -16,7 +16,7 @@ struct socket; /** - * qmi_header - wireformat header of QMI messages + * struct qmi_header - wireformat header of QMI messages * @type: type of message * @txn_id: transaction id * @msg_id: message id @@ -93,7 +93,7 @@ struct qmi_elem_info { #define QMI_ERR_NOT_SUPPORTED_V01 94 /** - * qmi_response_type_v01 - common response header (decoded) + * struct qmi_response_type_v01 - common response header (decoded) * @result: result of the transaction * @error: error value, when @result is QMI_RESULT_FAILURE_V01 */ -- cgit v1.2.3 From 886ce97a36a05e7a9c9d5d894e72d31f50146f5d Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 23 Mar 2021 13:07:00 +0800 Subject: soundwire: add definition for DPn BlockPackingMode For some reason we don't have an enum for this concept. Add definitions following Table 102 of the SoundWire 1.2 specification. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20210323050701.23760-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index eaa1486bdca9..350436db6ddb 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -125,6 +125,12 @@ enum sdw_dpn_grouping { SDW_BLK_GRP_CNT_4 = 3, }; +/* block packing mode enum */ +enum sdw_dpn_pkg_mode { + SDW_BLK_PKG_PER_PORT = 0, + SDW_BLK_PKG_PER_CHANNEL = 1 +}; + /** * enum sdw_stream_type: data stream type * -- cgit v1.2.3 From f318482a1c57315d0efccd2861f153f55c2117c6 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 19 Mar 2021 15:21:32 +0100 Subject: can: dev: can_free_echo_skb(): extend to return can frame length In order to implement byte queue limits (bql) in CAN drivers, the length of the CAN frame needs to be passed into the networking stack even if the transmission failed for some reason. To avoid to calculate this length twice, extend can_free_echo_skb() to return that value. Convert all users of this function, too. This patch is the natural extension of commit: | 9420e1d495e2 ("can: dev: can_get_echo_skb(): extend to return can | frame length") Link: https://lore.kernel.org/r/20210319142700.305648-3-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index d438eb058069..d311bc369a39 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -23,7 +23,8 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr, unsigned int *frame_len_ptr); unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx, unsigned int *frame_len_ptr); -void can_free_echo_skb(struct net_device *dev, unsigned int idx); +void can_free_echo_skb(struct net_device *dev, unsigned int idx, + unsigned int *frame_len_ptr); struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); struct sk_buff *alloc_canfd_skb(struct net_device *dev, struct canfd_frame **cfd); -- cgit v1.2.3 From 289ea9e4ae595545e736a63ccaadba65f880e9a4 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 24 Feb 2021 09:20:04 +0900 Subject: can: add new CAN FD bittiming parameters: Transmitter Delay Compensation (TDC) At high bit rates, the propagation delay from the TX pin to the RX pin of the transceiver causes measurement errors: the sample point on the RX pin might occur on the previous bit. This issue is addressed in ISO 11898-1 section 11.3.3 "Transmitter delay compensation" (TDC). This patch adds two new structures: can_tdc and can_tdc_const in order to implement this TDC. The structures are then added to can_priv. A controller supports TDC if an only if can_priv::tdc_const is not NULL. TDC is active if and only if: - fd flag is on - can_priv::tdc.tdco is not zero. It is the driver responsibility to check those two conditions are met. No new controller modes are introduced (i.e. no CAN_CTRL_MODE_TDC) in order not to be redundant with above logic. The names of the parameters are chosen to match existing CAN controllers specification. References: - Bosch C_CAN FD8: https://www.bosch-semiconductors.com/media/ip_modules/pdf_2/c_can_fd8/users_manual_c_can_fd8_r210_1.pdf - Microchip CAN FD Controller Module: http://ww1.microchip.com/downloads/en/DeviceDoc/MCP251XXFD-CAN-FD-Controller-Module-Family-Reference-Manual-20005678B.pdf - SAM E701/S70/V70/V71 Family: https://www.mouser.com/datasheet/2/268/60001527A-1284321.pdf Link: https://lore.kernel.org/r/20210224002008.4158-2-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- include/linux/can/bittiming.h | 65 +++++++++++++++++++++++++++++++++++++++++++ include/linux/can/dev.h | 3 ++ 2 files changed, 68 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index 707575c668f4..b31a49f19b47 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright (c) 2020 Pengutronix, Marc Kleine-Budde + * Copyright (c) 2021 Vincent Mailhol */ #ifndef _CAN_BITTIMING_H @@ -10,6 +11,70 @@ #define CAN_SYNC_SEG 1 +/* + * struct can_tdc - CAN FD Transmission Delay Compensation parameters + * + * At high bit rates, the propagation delay from the TX pin to the RX + * pin of the transceiver causes measurement errors: the sample point + * on the RX pin might occur on the previous bit. + * + * To solve this issue, ISO 11898-1 introduces in section 11.3.3 + * "Transmitter delay compensation" a SSP (Secondary Sample Point) + * equal to the distance, in time quanta, from the start of the bit + * time on the TX pin to the actual measurement on the RX pin. + * + * This structure contains the parameters to calculate that SSP. + * + * @tdcv: Transmitter Delay Compensation Value. Distance, in time + * quanta, from when the bit is sent on the TX pin to when it is + * received on the RX pin of the transmitter. Possible options: + * + * O: automatic mode. The controller dynamically measure @tdcv + * for each transmitted CAN FD frame. + * + * Other values: manual mode. Use the fixed provided value. + * + * @tdco: Transmitter Delay Compensation Offset. Offset value, in time + * quanta, defining the distance between the start of the bit + * reception on the RX pin of the transceiver and the SSP + * position such as SSP = @tdcv + @tdco. + * + * If @tdco is zero, then TDC is disabled and both @tdcv and + * @tdcf should be ignored. + * + * @tdcf: Transmitter Delay Compensation Filter window. Defines the + * minimum value for the SSP position in time quanta. If SSP is + * less than @tdcf, then no delay compensations occur and the + * normal sampling point is used instead. The feature is enabled + * if and only if @tdcv is set to zero (automatic mode) and @tdcf + * is configured to a value greater than @tdco. + */ +struct can_tdc { + u32 tdcv; + u32 tdco; + u32 tdcf; +}; + +/* + * struct can_tdc_const - CAN hardware-dependent constant for + * Transmission Delay Compensation + * + * @tdcv_max: Transmitter Delay Compensation Value maximum value. + * Should be set to zero if the controller does not support + * manual mode for tdcv. + * @tdco_max: Transmitter Delay Compensation Offset maximum value. + * Should not be zero. If the controller does not support TDC, + * then the pointer to this structure should be NULL. + * @tdcf_max: Transmitter Delay Compensation Filter window maximum + * value. Should be set to zero if the controller does not + * support this feature. + */ +struct can_tdc_const { + u32 tdcv_max; + u32 tdco_max; + u32 tdcf_max; +}; + #ifdef CONFIG_CAN_CALC_BITTIMING int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc); diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index ac4d83a1ab81..4795da0eb949 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -42,6 +42,9 @@ struct can_priv { struct can_bittiming bittiming, data_bittiming; const struct can_bittiming_const *bittiming_const, *data_bittiming_const; + struct can_tdc tdc; + const struct can_tdc_const *tdc_const; + const u16 *termination_const; unsigned int termination_const_cnt; u16 termination; -- cgit v1.2.3 From 4c9258dd26fdb3bacb35e767fa55c9a03a78a08e Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 24 Feb 2021 09:20:05 +0900 Subject: can: dev: reorder struct can_priv members for better packing Save eight bytes of holes on x86-64 architectures by reordering struct can_priv members. Before: $ pahole -C can_priv drivers/net/can/dev/dev.o struct can_priv { struct net_device * dev; /* 0 8 */ struct can_device_stats can_stats; /* 8 24 */ struct can_bittiming bittiming; /* 32 32 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct can_bittiming data_bittiming; /* 64 32 */ const struct can_bittiming_const * bittiming_const; /* 96 8 */ const struct can_bittiming_const * data_bittiming_const; /* 104 8 */ struct can_tdc tdc; /* 112 12 */ /* XXX 4 bytes hole, try to pack */ /* --- cacheline 2 boundary (128 bytes) --- */ const struct can_tdc_const * tdc_const; /* 128 8 */ const u16 * termination_const; /* 136 8 */ unsigned int termination_const_cnt; /* 144 4 */ u16 termination; /* 148 2 */ /* XXX 2 bytes hole, try to pack */ const u32 * bitrate_const; /* 152 8 */ unsigned int bitrate_const_cnt; /* 160 4 */ /* XXX 4 bytes hole, try to pack */ const u32 * data_bitrate_const; /* 168 8 */ unsigned int data_bitrate_const_cnt; /* 176 4 */ u32 bitrate_max; /* 180 4 */ struct can_clock clock; /* 184 4 */ enum can_state state; /* 188 4 */ /* --- cacheline 3 boundary (192 bytes) --- */ u32 ctrlmode; /* 192 4 */ u32 ctrlmode_supported; /* 196 4 */ u32 ctrlmode_static; /* 200 4 */ int restart_ms; /* 204 4 */ struct delayed_work restart_work; /* 208 168 */ /* XXX last struct has 4 bytes of padding */ /* --- cacheline 5 boundary (320 bytes) was 56 bytes ago --- */ int (*do_set_bittiming)(struct net_device *); /* 376 8 */ /* --- cacheline 6 boundary (384 bytes) --- */ int (*do_set_data_bittiming)(struct net_device *); /* 384 8 */ int (*do_set_mode)(struct net_device *, enum can_mode); /* 392 8 */ int (*do_set_termination)(struct net_device *, u16); /* 400 8 */ int (*do_get_state)(const struct net_device *, enum can_state *); /* 408 8 */ int (*do_get_berr_counter)(const struct net_device *, struct can_berr_counter *); /* 416 8 */ unsigned int echo_skb_max; /* 424 4 */ /* XXX 4 bytes hole, try to pack */ struct sk_buff * * echo_skb; /* 432 8 */ /* size: 440, cachelines: 7, members: 31 */ /* sum members: 426, holes: 4, sum holes: 14 */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 56 bytes */ }; After: $ pahole -C can_priv drivers/net/can/dev/dev.o struct can_priv { struct net_device * dev; /* 0 8 */ struct can_device_stats can_stats; /* 8 24 */ const struct can_bittiming_const * bittiming_const; /* 32 8 */ const struct can_bittiming_const * data_bittiming_const; /* 40 8 */ struct can_bittiming bittiming; /* 48 32 */ /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */ struct can_bittiming data_bittiming; /* 80 32 */ const struct can_tdc_const * tdc_const; /* 112 8 */ struct can_tdc tdc; /* 120 12 */ /* --- cacheline 2 boundary (128 bytes) was 4 bytes ago --- */ unsigned int bitrate_const_cnt; /* 132 4 */ const u32 * bitrate_const; /* 136 8 */ const u32 * data_bitrate_const; /* 144 8 */ unsigned int data_bitrate_const_cnt; /* 152 4 */ u32 bitrate_max; /* 156 4 */ struct can_clock clock; /* 160 4 */ unsigned int termination_const_cnt; /* 164 4 */ const u16 * termination_const; /* 168 8 */ u16 termination; /* 176 2 */ /* XXX 2 bytes hole, try to pack */ enum can_state state; /* 180 4 */ u32 ctrlmode; /* 184 4 */ u32 ctrlmode_supported; /* 188 4 */ /* --- cacheline 3 boundary (192 bytes) --- */ u32 ctrlmode_static; /* 192 4 */ int restart_ms; /* 196 4 */ struct delayed_work restart_work; /* 200 168 */ /* XXX last struct has 4 bytes of padding */ /* --- cacheline 5 boundary (320 bytes) was 48 bytes ago --- */ int (*do_set_bittiming)(struct net_device *); /* 368 8 */ int (*do_set_data_bittiming)(struct net_device *); /* 376 8 */ /* --- cacheline 6 boundary (384 bytes) --- */ int (*do_set_mode)(struct net_device *, enum can_mode); /* 384 8 */ int (*do_set_termination)(struct net_device *, u16); /* 392 8 */ int (*do_get_state)(const struct net_device *, enum can_state *); /* 400 8 */ int (*do_get_berr_counter)(const struct net_device *, struct can_berr_counter *); /* 408 8 */ unsigned int echo_skb_max; /* 416 4 */ /* XXX 4 bytes hole, try to pack */ struct sk_buff * * echo_skb; /* 424 8 */ /* size: 432, cachelines: 7, members: 31 */ /* sum members: 426, holes: 2, sum holes: 6 */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 48 bytes */ }; Link: https://lore.kernel.org/r/20210224002008.4158-3-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 4795da0eb949..27b275e463da 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -39,22 +39,23 @@ struct can_priv { struct net_device *dev; struct can_device_stats can_stats; - struct can_bittiming bittiming, data_bittiming; const struct can_bittiming_const *bittiming_const, *data_bittiming_const; - struct can_tdc tdc; + struct can_bittiming bittiming, data_bittiming; const struct can_tdc_const *tdc_const; + struct can_tdc tdc; - const u16 *termination_const; - unsigned int termination_const_cnt; - u16 termination; - const u32 *bitrate_const; unsigned int bitrate_const_cnt; + const u32 *bitrate_const; const u32 *data_bitrate_const; unsigned int data_bitrate_const_cnt; u32 bitrate_max; struct can_clock clock; + unsigned int termination_const_cnt; + const u16 *termination_const; + u16 termination; + enum can_state state; /* CAN controller features - see include/uapi/linux/can/netlink.h */ -- cgit v1.2.3 From c25cc7993243fdc00ab7e608e3764819538015ab Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 24 Feb 2021 09:20:08 +0900 Subject: can: bittiming: add calculation for CAN FD Transmitter Delay Compensation (TDC) The logic for the tdco calculation is to just reuse the normal sample point: tdco = sp. Because the sample point is expressed in tenth of percent and the tdco is expressed in time quanta, a conversion is needed. At the end, ssp = tdcv + tdco = tdcv + sp. Another popular method is to set tdco to the middle of the bit: tdc->tdco = can_bit_time(dbt) / 2 During benchmark tests, we could not find a clear advantages for one of the two methods. The tdco calculation is triggered each time the data_bittiming is changed so that users relying on automated calculation can use the netlink interface the exact same way without need of new parameters. For example, a command such as: ip link set canX type can bitrate 500000 dbitrate 4000000 fd on would trigger the calculation. The user using CONFIG_CAN_CALC_BITTIMING who does not want automated calculation needs to manually set tdco to zero. For example with: ip link set canX type can tdco 0 bitrate 500000 dbitrate 4000000 fd on (if the tdco parameter is provided in a previous command, it will be overwritten). If tdcv is set to zero (default), it is automatically calculated by the transiver for each frame. As such, there is no code in the kernel to calculate it. tdcf has no automated calculation functions because we could not figure out a formula for this parameter. Link: https://lore.kernel.org/r/20210224002008.4158-6-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- include/linux/can/bittiming.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index b31a49f19b47..3c4cad7b52c0 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -78,6 +78,8 @@ struct can_tdc_const { #ifdef CONFIG_CAN_CALC_BITTIMING int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc); + +void can_calc_tdco(struct net_device *dev); #else /* !CONFIG_CAN_CALC_BITTIMING */ static inline int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, @@ -86,6 +88,10 @@ can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, netdev_err(dev, "bit-timing calculation not available\n"); return -EINVAL; } + +static inline void can_calc_tdco(struct net_device *dev) +{ +} #endif /* CONFIG_CAN_CALC_BITTIMING */ int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt, -- cgit v1.2.3 From 1d7750760b70ba8b0e641146eee1b3a343d1b292 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Sat, 6 Mar 2021 14:40:40 +0900 Subject: can: bittiming: add CAN_KBPS, CAN_MBPS and CAN_MHZ macros Add three macro to simplify the readability of big bit timing numbers: - CAN_KBPS: kilobits per second (one thousand) - CAN_MBPS: megabits per second (one million) - CAN_MHZ: megahertz per second (one million) Example: u32 bitrate_max = 8 * CAN_MBPS; struct can_clock clock = {.freq = 80 * CAN_MHZ}; instead of: u32 bitrate_max = 8000000; struct can_clock clock = {.freq = 80000000}; Apply the new macro to driver/net/can/dev/bittiming.c. Link: https://lore.kernel.org/r/20210306054040.76483-1-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- include/linux/can/bittiming.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index 3c4cad7b52c0..ae7a3411167c 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -11,6 +11,14 @@ #define CAN_SYNC_SEG 1 + +/* Kilobits and Megabits per second */ +#define CAN_KBPS 1000UL +#define CAN_MBPS 1000000UL + +/* Megahertz */ +#define CAN_MHZ 1000000UL + /* * struct can_tdc - CAN FD Transmission Delay Compensation parameters * -- cgit v1.2.3 From e9ce2ce17da626d930812199568bd426b2832f57 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 12 Feb 2021 14:16:10 +0100 Subject: mmc: core: Drop reference counting of the bus_ops When the mmc_rescan work is enabled for execution (host->rescan_disable), it's the only instance per mmc host that is allowed to set/clear the host->bus_ops pointer. Besides the mmc_rescan work, there are a couple of scenarios when the host->bus_ops pointer may be accessed. Typically, those can be described as as below: *) Upper mmc driver layers (like the mmc block device driver or an SDIO functional driver) needs to execute a host->bus_ops callback. This can be considered as safe without having to use some special locking mechanism, because they operate on top of the struct mmc_card. As long as there is a card to operate upon, the mmc core guarantees that there is a host->bus_ops assigned as well. Note that, upper layer mmc drivers are of course responsible to clean up from themselves from their ->remove() callbacks, otherwise things would fall apart anyways. **) Via the mmc host instance, we may need to force a removal of an inserted mmc card. This happens when a mmc host driver gets unbind, for example. In this case, we protect the host->bus_ops pointer from concurrent accesses, by disabling the mmc_rescan work upfront (host->rescan_disable). See mmc_stop_host() for example. This said, it seems like the reference counting of the host->bus_ops pointer at some point have become superfluous. As this is an old mechanism of the mmc core, it a bit difficult to digest the history of when that could have happened. However, let's drop the reference counting to avoid unnecessary code-paths and lockings. Cc: Pierre Ossman Cc: Russell King Signed-off-by: Ulf Hansson Reviewed-by: Adrian Hunter Link: https://lore.kernel.org/r/20210212131610.236843-1-ulf.hansson@linaro.org --- include/linux/mmc/host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 26a3c7bc29ae..a001ad2f5f23 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -423,7 +423,6 @@ struct mmc_host { /* group bitfields together to minimize padding */ unsigned int use_spi_crc:1; unsigned int claimed:1; /* host exclusively claimed */ - unsigned int bus_dead:1; /* bus has been released */ unsigned int doing_init_tune:1; /* initial tuning in progress */ unsigned int can_retune:1; /* re-tuning can be used */ unsigned int doing_retune:1; /* re-tuning in progress */ @@ -454,7 +453,6 @@ struct mmc_host { struct mmc_slot slot; const struct mmc_bus_ops *bus_ops; /* current bus driver */ - unsigned int bus_refs; /* reference counter */ unsigned int sdio_irqs; struct task_struct *sdio_irq_thread; -- cgit v1.2.3 From 18bbda900ffa7770b93daa1bc1ce3be39e643101 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 11 Mar 2021 17:41:57 +0800 Subject: mmc: sdio: fix a typo in the comment of SDIO_SD_REV_3_00 I believe "Spev" is typo, should be "Spec". Signed-off-by: Jisheng Zhang Link: https://lore.kernel.org/r/20210311174157.561dada9@xhacker.debian Signed-off-by: Ulf Hansson --- include/linux/mmc/sdio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h index e28769991e82..2a05d1ac4f0e 100644 --- a/include/linux/mmc/sdio.h +++ b/include/linux/mmc/sdio.h @@ -82,7 +82,7 @@ #define SDIO_SD_REV_1_01 0 /* SD Physical Spec Version 1.01 */ #define SDIO_SD_REV_1_10 1 /* SD Physical Spec Version 1.10 */ #define SDIO_SD_REV_2_00 2 /* SD Physical Spec Version 2.00 */ -#define SDIO_SD_REV_3_00 3 /* SD Physical Spev Version 3.00 */ +#define SDIO_SD_REV_3_00 3 /* SD Physical Spec Version 3.00 */ #define SDIO_CCCR_IOEx 0x02 #define SDIO_CCCR_IORx 0x03 -- cgit v1.2.3 From 17a17bf50612e6048a9975450cf1bd30f93815b5 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 10 Mar 2021 16:29:00 +0100 Subject: mmc: core: Fix hanging on I/O during system suspend for removable cards The mmc core uses a PM notifier to temporarily during system suspend, turn off the card detection mechanism for removal/insertion of (e)MMC/SD/SDIO cards. Additionally, the notifier may be used to remove an SDIO card entirely, if a corresponding SDIO functional driver don't have the system suspend/resume callbacks assigned. This behaviour has been around for a very long time. However, a recent bug report tells us there are problems with this approach. More precisely, when receiving the PM_SUSPEND_PREPARE notification, we may end up hanging on I/O to be completed, thus also preventing the system from getting suspended. In the end what happens, is that the cancel_delayed_work_sync() in mmc_pm_notify() ends up waiting for mmc_rescan() to complete - and since mmc_rescan() wants to claim the host, it needs to wait for the I/O to be completed first. Typically, this problem is triggered in Android, if there is ongoing I/O while the user decides to suspend, resume and then suspend the system again. This due to that after the resume, an mmc_rescan() work gets punted to the workqueue, which job is to verify that the card remains inserted after the system has resumed. To fix this problem, userspace needs to become frozen to suspend the I/O, prior to turning off the card detection mechanism. Therefore, let's drop the PM notifiers for mmc subsystem altogether and rely on the card detection to be turned off/on as a part of the system_freezable_wq, that we are already using. Moreover, to allow and SDIO card to be removed during system suspend, let's manage this from a ->prepare() callback, assigned at the mmc_host_class level. In this way, we can use the parent device (the mmc_host_class device), to remove the card device that is the child, in the device_prepare() phase. Reported-by: Kiwoong Kim Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20210310152900.149380-1-ulf.hansson@linaro.org Reviewed-by: Kiwoong Kim --- include/linux/mmc/host.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index a001ad2f5f23..17d7b326af29 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -302,9 +302,6 @@ struct mmc_host { u32 ocr_avail_sdio; /* SDIO-specific OCR */ u32 ocr_avail_sd; /* SD-specific OCR */ u32 ocr_avail_mmc; /* MMC-specific OCR */ -#ifdef CONFIG_PM_SLEEP - struct notifier_block pm_notify; -#endif struct wakeup_source *ws; /* Enable consume of uevents */ u32 max_current_330; u32 max_current_300; -- cgit v1.2.3 From 137e68659e90b242049bfd1fab2700bbcf476140 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:44 +0000 Subject: firmware: arm_scmi: Remove legacy scmi_clk_ops protocol interface Now that all the SCMI driver users have been migrated to the new interface remove the legacy interface and all the transient code. Link: https://lore.kernel.org/r/20210316124903.35011-20-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index b19797b23c5f..e2291b7787f8 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -84,19 +84,6 @@ struct scmi_clk_proto_ops { int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id); }; -struct scmi_clk_ops { - int (*count_get)(const struct scmi_handle *hamdle); - - const struct scmi_clock_info *(*info_get) - (const struct scmi_handle *handle, u32 clk_id); - int (*rate_get)(const struct scmi_handle *handle, u32 clk_id, - u64 *rate); - int (*rate_set)(const struct scmi_handle *handle, u32 clk_id, - u64 rate); - int (*enable)(const struct scmi_handle *handle, u32 clk_id); - int (*disable)(const struct scmi_handle *handle, u32 clk_id); -}; - /** * struct scmi_perf_proto_ops - represents the various operations provided * by SCMI Performance Protocol @@ -618,7 +605,6 @@ struct scmi_notify_ops { * * @dev: pointer to the SCMI device * @version: pointer to the structure containing SCMI version information - * @clk_ops: pointer to set of clock protocol operations * @sensor_ops: pointer to set of sensor protocol operations * @reset_ops: pointer to set of reset protocol operations * @voltage_ops: pointer to set of voltage protocol operations @@ -638,7 +624,6 @@ struct scmi_notify_ops { struct scmi_handle { struct device *dev; struct scmi_revision_info *version; - const struct scmi_clk_ops *clk_ops; const struct scmi_sensor_ops *sensor_ops; const struct scmi_reset_ops *reset_ops; const struct scmi_voltage_ops *voltage_ops; -- cgit v1.2.3 From 7e029344223824dbf21e759bf9c0d08f633edb16 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:45 +0000 Subject: firmware: arm_scmi: Port reset protocol to new protocols interface Convert internals of protocol implementation to use protocol handles and expose a new protocol operations interface for SCMI driver using the new get/put common operations, while keeping the old handle->reset_ops still around to ease transition. Remove handle->reset_priv now unused. Link: https://lore.kernel.org/r/20210316124903.35011-21-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index e2291b7787f8..234b03a3f038 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -464,7 +464,7 @@ struct scmi_sensor_ops { }; /** - * struct scmi_reset_ops - represents the various operations provided + * struct scmi_reset_proto_ops - represents the various operations provided * by SCMI Reset Protocol * * @num_domains_get: get the count of reset domains provided by SCMI @@ -474,6 +474,15 @@ struct scmi_sensor_ops { * @assert: explicitly assert reset signal of the specified reset domain * @deassert: explicitly deassert reset signal of the specified reset domain */ +struct scmi_reset_proto_ops { + int (*num_domains_get)(const struct scmi_protocol_handle *ph); + char *(*name_get)(const struct scmi_protocol_handle *ph, u32 domain); + int (*latency_get)(const struct scmi_protocol_handle *ph, u32 domain); + int (*reset)(const struct scmi_protocol_handle *ph, u32 domain); + int (*assert)(const struct scmi_protocol_handle *ph, u32 domain); + int (*deassert)(const struct scmi_protocol_handle *ph, u32 domain); +}; + struct scmi_reset_ops { int (*num_domains_get)(const struct scmi_handle *handle); char *(*name_get)(const struct scmi_handle *handle, u32 domain); @@ -614,8 +623,6 @@ struct scmi_notify_ops { * @notify_ops: pointer to set of notifications related operations * @sensor_priv: pointer to private data structure specific to sensors * protocol(for internal use only) - * @reset_priv: pointer to private data structure specific to reset - * protocol(for internal use only) * @voltage_priv: pointer to private data structure specific to voltage * protocol(for internal use only) * @notify_priv: pointer to private data structure specific to notifications @@ -636,7 +643,6 @@ struct scmi_handle { const struct scmi_notify_ops *notify_ops; /* for protocol internal use */ void *sensor_priv; - void *reset_priv; void *voltage_priv; void *notify_priv; void *system_priv; -- cgit v1.2.3 From 497ef0cbc6d166cef129b180dd292eec744a04fb Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:47 +0000 Subject: firmware: arm_scmi: Remove legacy scmi_reset_ops protocol interface Now that all the SCMI driver users have been migrated to the new interface remove the legacy interface and all the transient code. Link: https://lore.kernel.org/r/20210316124903.35011-23-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 234b03a3f038..c5bf9dc707aa 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -483,15 +483,6 @@ struct scmi_reset_proto_ops { int (*deassert)(const struct scmi_protocol_handle *ph, u32 domain); }; -struct scmi_reset_ops { - int (*num_domains_get)(const struct scmi_handle *handle); - char *(*name_get)(const struct scmi_handle *handle, u32 domain); - int (*latency_get)(const struct scmi_handle *handle, u32 domain); - int (*reset)(const struct scmi_handle *handle, u32 domain); - int (*assert)(const struct scmi_handle *handle, u32 domain); - int (*deassert)(const struct scmi_handle *handle, u32 domain); -}; - /** * struct scmi_voltage_info - describe one available SCMI Voltage Domain * @@ -615,7 +606,6 @@ struct scmi_notify_ops { * @dev: pointer to the SCMI device * @version: pointer to the structure containing SCMI version information * @sensor_ops: pointer to set of sensor protocol operations - * @reset_ops: pointer to set of reset protocol operations * @voltage_ops: pointer to set of voltage protocol operations * @devm_protocol_get: devres managed method to acquire a protocol and get specific * operations and a dedicated protocol handler @@ -632,7 +622,6 @@ struct scmi_handle { struct device *dev; struct scmi_revision_info *version; const struct scmi_sensor_ops *sensor_ops; - const struct scmi_reset_ops *reset_ops; const struct scmi_voltage_ops *voltage_ops; const void __must_check * -- cgit v1.2.3 From 9694a7f623593425a1559bb7a82bee91dfdd843b Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:48 +0000 Subject: firmware: arm_scmi: Port sensor protocol to new protocols interface Convert internals of protocol implementation to use protocol handles and expose a new protocol operations interface for SCMI driver using the new get/put common operations, while keeping the old handle->sensor_ops still around to ease transition. Remove handle->sensor_priv now unused. Link: https://lore.kernel.org/r/20210316124903.35011-24-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index c5bf9dc707aa..ead06db7be18 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -431,7 +431,7 @@ enum scmi_sensor_class { }; /** - * struct scmi_sensor_ops - represents the various operations provided + * struct scmi_sensor_proto_ops - represents the various operations provided * by SCMI Sensor Protocol * * @count_get: get the count of sensors provided by SCMI @@ -446,6 +446,23 @@ enum scmi_sensor_class { * @config_get: Get sensor current configuration * @config_set: Set sensor current configuration */ +struct scmi_sensor_proto_ops { + int (*count_get)(const struct scmi_protocol_handle *ph); + const struct scmi_sensor_info *(*info_get) + (const struct scmi_protocol_handle *ph, u32 sensor_id); + int (*trip_point_config)(const struct scmi_protocol_handle *ph, + u32 sensor_id, u8 trip_id, u64 trip_value); + int (*reading_get)(const struct scmi_protocol_handle *ph, u32 sensor_id, + u64 *value); + int (*reading_get_timestamped)(const struct scmi_protocol_handle *ph, + u32 sensor_id, u8 count, + struct scmi_sensor_reading *readings); + int (*config_get)(const struct scmi_protocol_handle *ph, + u32 sensor_id, u32 *sensor_config); + int (*config_set)(const struct scmi_protocol_handle *ph, + u32 sensor_id, u32 sensor_config); +}; + struct scmi_sensor_ops { int (*count_get)(const struct scmi_handle *handle); const struct scmi_sensor_info *(*info_get) @@ -611,8 +628,6 @@ struct scmi_notify_ops { * operations and a dedicated protocol handler * @devm_protocol_put: devres managed method to release a protocol * @notify_ops: pointer to set of notifications related operations - * @sensor_priv: pointer to private data structure specific to sensors - * protocol(for internal use only) * @voltage_priv: pointer to private data structure specific to voltage * protocol(for internal use only) * @notify_priv: pointer to private data structure specific to notifications @@ -631,7 +646,6 @@ struct scmi_handle { const struct scmi_notify_ops *notify_ops; /* for protocol internal use */ - void *sensor_priv; void *voltage_priv; void *notify_priv; void *system_priv; -- cgit v1.2.3 From f3690d9729105963661d06c8cfd752ff1fa2499c Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:51 +0000 Subject: firmware: arm_scmi: Remove legacy scmi_sensor_ops protocol interface Now that all the SCMI driver users have been migrated to the new interface remove the legacy interface and all the transient code. Link: https://lore.kernel.org/r/20210316124903.35011-27-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index ead06db7be18..3ec0ac30fe60 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -463,23 +463,6 @@ struct scmi_sensor_proto_ops { u32 sensor_id, u32 sensor_config); }; -struct scmi_sensor_ops { - int (*count_get)(const struct scmi_handle *handle); - const struct scmi_sensor_info *(*info_get) - (const struct scmi_handle *handle, u32 sensor_id); - int (*trip_point_config)(const struct scmi_handle *handle, - u32 sensor_id, u8 trip_id, u64 trip_value); - int (*reading_get)(const struct scmi_handle *handle, u32 sensor_id, - u64 *value); - int (*reading_get_timestamped)(const struct scmi_handle *handle, - u32 sensor_id, u8 count, - struct scmi_sensor_reading *readings); - int (*config_get)(const struct scmi_handle *handle, - u32 sensor_id, u32 *sensor_config); - int (*config_set)(const struct scmi_handle *handle, - u32 sensor_id, u32 sensor_config); -}; - /** * struct scmi_reset_proto_ops - represents the various operations provided * by SCMI Reset Protocol @@ -622,7 +605,6 @@ struct scmi_notify_ops { * * @dev: pointer to the SCMI device * @version: pointer to the structure containing SCMI version information - * @sensor_ops: pointer to set of sensor protocol operations * @voltage_ops: pointer to set of voltage protocol operations * @devm_protocol_get: devres managed method to acquire a protocol and get specific * operations and a dedicated protocol handler @@ -636,7 +618,6 @@ struct scmi_notify_ops { struct scmi_handle { struct device *dev; struct scmi_revision_info *version; - const struct scmi_sensor_ops *sensor_ops; const struct scmi_voltage_ops *voltage_ops; const void __must_check * -- cgit v1.2.3 From b46d852718c1ba725e0a8b06bd0a039f85465838 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:52 +0000 Subject: firmware: arm_scmi: Port systempower protocol to new protocols interface Convert internals of protocol implementation to use protocol handles and expose a new protocol operations interface for SCMI driver using the new get/put common operations. Remove handle->system_priv now unused. Link: https://lore.kernel.org/r/20210316124903.35011-28-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 3ec0ac30fe60..17b82c76cf7a 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -629,7 +629,6 @@ struct scmi_handle { /* for protocol internal use */ void *voltage_priv; void *notify_priv; - void *system_priv; }; enum scmi_std_protocol { -- cgit v1.2.3 From fe4894d968f4333f3d425221f03add8666881d72 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:53 +0000 Subject: firmware: arm_scmi: Port voltage protocol to new protocols interface Convert internals of protocol implementation to use protocol handles and expose a new protocol operations interface for SCMI driver using the new get/put common operations, while keeping the old handle->voltage_ops still around to ease transition. Remove handle->voltage_priv now unused. Link: https://lore.kernel.org/r/20210316124903.35011-29-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 17b82c76cf7a..00fdf0c5786b 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -515,7 +515,7 @@ struct scmi_voltage_info { }; /** - * struct scmi_voltage_ops - represents the various operations provided + * struct scmi_voltage_proto_ops - represents the various operations provided * by SCMI Voltage Protocol * * @num_domains_get: get the count of voltage domains provided by SCMI @@ -525,14 +525,28 @@ struct scmi_voltage_info { * @level_set: set the voltage level for the specified domain * @level_get: get the voltage level of the specified domain */ +struct scmi_voltage_proto_ops { + int (*num_domains_get)(const struct scmi_protocol_handle *ph); + const struct scmi_voltage_info __must_check *(*info_get) + (const struct scmi_protocol_handle *ph, u32 domain_id); + int (*config_set)(const struct scmi_protocol_handle *ph, u32 domain_id, + u32 config); +#define SCMI_VOLTAGE_ARCH_STATE_OFF 0x0 +#define SCMI_VOLTAGE_ARCH_STATE_ON 0x7 + int (*config_get)(const struct scmi_protocol_handle *ph, u32 domain_id, + u32 *config); + int (*level_set)(const struct scmi_protocol_handle *ph, u32 domain_id, + u32 flags, s32 volt_uV); + int (*level_get)(const struct scmi_protocol_handle *ph, u32 domain_id, + s32 *volt_uV); +}; + struct scmi_voltage_ops { int (*num_domains_get)(const struct scmi_handle *handle); const struct scmi_voltage_info __must_check *(*info_get) (const struct scmi_handle *handle, u32 domain_id); int (*config_set)(const struct scmi_handle *handle, u32 domain_id, u32 config); -#define SCMI_VOLTAGE_ARCH_STATE_OFF 0x0 -#define SCMI_VOLTAGE_ARCH_STATE_ON 0x7 int (*config_get)(const struct scmi_handle *handle, u32 domain_id, u32 *config); int (*level_set)(const struct scmi_handle *handle, u32 domain_id, @@ -610,8 +624,6 @@ struct scmi_notify_ops { * operations and a dedicated protocol handler * @devm_protocol_put: devres managed method to release a protocol * @notify_ops: pointer to set of notifications related operations - * @voltage_priv: pointer to private data structure specific to voltage - * protocol(for internal use only) * @notify_priv: pointer to private data structure specific to notifications * (for internal use only) */ @@ -626,8 +638,6 @@ struct scmi_handle { void (*devm_protocol_put)(struct scmi_device *sdev, u8 proto); const struct scmi_notify_ops *notify_ops; - /* for protocol internal use */ - void *voltage_priv; void *notify_priv; }; -- cgit v1.2.3 From c3ed5e953ef0cdd599e70558e2b26696f1fbe77d Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:48:55 +0000 Subject: firmware: arm_scmi: Remove legacy scmi_voltage_ops protocol interface Now that all the SCMI driver users have been migrated to the new interface remove the legacy interface and all the transient code. Link: https://lore.kernel.org/r/20210316124903.35011-31-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 00fdf0c5786b..284dda52006e 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -541,20 +541,6 @@ struct scmi_voltage_proto_ops { s32 *volt_uV); }; -struct scmi_voltage_ops { - int (*num_domains_get)(const struct scmi_handle *handle); - const struct scmi_voltage_info __must_check *(*info_get) - (const struct scmi_handle *handle, u32 domain_id); - int (*config_set)(const struct scmi_handle *handle, u32 domain_id, - u32 config); - int (*config_get)(const struct scmi_handle *handle, u32 domain_id, - u32 *config); - int (*level_set)(const struct scmi_handle *handle, u32 domain_id, - u32 flags, s32 volt_uV); - int (*level_get)(const struct scmi_handle *handle, u32 domain_id, - s32 *volt_uV); -}; - /** * struct scmi_notify_ops - represents notifications' operations provided by * SCMI core @@ -619,7 +605,6 @@ struct scmi_notify_ops { * * @dev: pointer to the SCMI device * @version: pointer to the structure containing SCMI version information - * @voltage_ops: pointer to set of voltage protocol operations * @devm_protocol_get: devres managed method to acquire a protocol and get specific * operations and a dedicated protocol handler * @devm_protocol_put: devres managed method to release a protocol @@ -630,7 +615,6 @@ struct scmi_notify_ops { struct scmi_handle { struct device *dev; struct scmi_revision_info *version; - const struct scmi_voltage_ops *voltage_ops; const void __must_check * (*devm_protocol_get)(struct scmi_device *sdev, u8 proto, -- cgit v1.2.3 From a02d7c93c1f3cc892b69b50069ae757c92c03b0d Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:49:00 +0000 Subject: firmware: arm_scmi: Make notify_priv really private Notification private data is currently accessible via handle->notify_priv, this data was indeed meant to be private to the notification core support and not to be accessible by SCMI drivers. Make it private hiding it inside instance descriptor struct scmi_info and accessible only via dedicated helpers. Link: https://lore.kernel.org/r/20210316124903.35011-36-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 284dda52006e..114890bd7af0 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -609,8 +609,6 @@ struct scmi_notify_ops { * operations and a dedicated protocol handler * @devm_protocol_put: devres managed method to release a protocol * @notify_ops: pointer to set of notifications related operations - * @notify_priv: pointer to private data structure specific to notifications - * (for internal use only) */ struct scmi_handle { struct device *dev; @@ -622,7 +620,6 @@ struct scmi_handle { void (*devm_protocol_put)(struct scmi_device *sdev, u8 proto); const struct scmi_notify_ops *notify_ops; - void *notify_priv; }; enum scmi_std_protocol { -- cgit v1.2.3 From aa1fd3e4cb976248651c319bb0ef929345958cf4 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:49:01 +0000 Subject: firmware: arm_scmi: Rename non devres notify_ops Rename non devres managed notify_ops to use a naming pattern which exposes the performed action verb as last token. No functional change. Link: https://lore.kernel.org/r/20210316124903.35011-37-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 114890bd7af0..b80496d519f3 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -548,8 +548,8 @@ struct scmi_voltage_proto_ops { * the requested event * @devm_event_notifier_unregister: Managed unregistration of a notifier_block * for the requested event - * @register_event_notifier: Register a notifier_block for the requested event - * @unregister_event_notifier: Unregister a notifier_block for the requested + * @event_notifier_register: Register a notifier_block for the requested event + * @event_notifier_unregister: Unregister a notifier_block for the requested * event * * A user can register/unregister its own notifier_block against the wanted @@ -590,11 +590,11 @@ struct scmi_notify_ops { u8 proto_id, u8 evt_id, const u32 *src_id, struct notifier_block *nb); - int (*register_event_notifier)(const struct scmi_handle *handle, + int (*event_notifier_register)(const struct scmi_handle *handle, u8 proto_id, u8 evt_id, const u32 *src_id, struct notifier_block *nb); - int (*unregister_event_notifier)(const struct scmi_handle *handle, + int (*event_notifier_unregister)(const struct scmi_handle *handle, u8 proto_id, u8 evt_id, const u32 *src_id, struct notifier_block *nb); -- cgit v1.2.3 From f5800e0bf6f9b7ff7dfa4db8b0681bd062eb16a4 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 16 Mar 2021 12:49:02 +0000 Subject: firmware: arm_scmi: Add protocol modularization support Extend SCMI protocols accounting mechanism to address possible module usage and add the support to possibly define new protocols as loadable modules. Keep the standard protocols built into the SCMI core. Link: https://lore.kernel.org/r/20210316124903.35011-38-cristian.marussi@arm.com Tested-by: Florian Fainelli Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index b80496d519f3..79d0a1237e6c 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -704,6 +704,18 @@ static inline void scmi_driver_unregister(struct scmi_driver *driver) {} #define module_scmi_driver(__scmi_driver) \ module_driver(__scmi_driver, scmi_register, scmi_unregister) +/** + * module_scmi_protocol() - Helper macro for registering a scmi protocol + * @__scmi_protocol: scmi_protocol structure + * + * Helper macro for scmi drivers to set up proper module init / exit + * functions. Replaces module_init() and module_exit() and keeps people from + * printing pointless things to the kernel log when their driver is loaded. + */ +#define module_scmi_protocol(__scmi_protocol) \ + module_driver(__scmi_protocol, \ + scmi_protocol_register, scmi_protocol_unregister) + struct scmi_protocol; int scmi_protocol_register(const struct scmi_protocol *proto); void scmi_protocol_unregister(const struct scmi_protocol *proto); -- cgit v1.2.3 From 01ad444e3be719f8ad13f136a9b0d301806183c8 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 30 Mar 2021 15:47:17 +0100 Subject: soundwire: export sdw_compare_devid, sdw_extract_slave_id and sdw_slave_add Exporting these three functions makes sense as it can be used by other controllers like Qualcomm during auto-enumeration! Reported-by: kernel test robot Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210330144719.13284-8-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 350436db6ddb..5ff9a8f37e91 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -1039,5 +1039,7 @@ int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value); int sdw_read_no_pm(struct sdw_slave *slave, u32 addr); int sdw_nread(struct sdw_slave *slave, u32 addr, size_t count, u8 *val); int sdw_nwrite(struct sdw_slave *slave, u32 addr, size_t count, u8 *val); +int sdw_compare_devid(struct sdw_slave *slave, struct sdw_slave_id id); +void sdw_extract_slave_id(struct sdw_bus *bus, u64 addr, struct sdw_slave_id *id); #endif /* __SOUNDWIRE_H */ -- cgit v1.2.3 From a24f98176d1efae2c37d3438c57a624d530d9c33 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 29 Mar 2021 16:38:27 +0300 Subject: gpu: host1x: Use different lock classes for each client To avoid false lockdep warnings, give each client lock a different lock class, passed from the initialization site by macro. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index ce59a6a6a008..9eb77c87a83b 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -320,7 +320,14 @@ static inline struct host1x_device *to_host1x_device(struct device *dev) int host1x_device_init(struct host1x_device *device); int host1x_device_exit(struct host1x_device *device); -int host1x_client_register(struct host1x_client *client); +int __host1x_client_register(struct host1x_client *client, + struct lock_class_key *key); +#define host1x_client_register(class) \ + ({ \ + static struct lock_class_key __key; \ + __host1x_client_register(class, &__key); \ + }) + int host1x_client_unregister(struct host1x_client *client); int host1x_client_suspend(struct host1x_client *client); -- cgit v1.2.3 From df59d0a461bc5935232bf56a279e4d7a71c566a5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 30 Mar 2021 13:40:27 -0400 Subject: XArray: Add xa_limit_16b A 16-bit limit is a more common limit than I had realised. Make it generally available. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 92c0160b3352..a91e3d90df8a 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -229,9 +229,10 @@ static inline int xa_err(void *entry) * * This structure is used either directly or via the XA_LIMIT() macro * to communicate the range of IDs that are valid for allocation. - * Two common ranges are predefined for you: + * Three common ranges are predefined for you: * * xa_limit_32b - [0 - UINT_MAX] * * xa_limit_31b - [0 - INT_MAX] + * * xa_limit_16b - [0 - USHRT_MAX] */ struct xa_limit { u32 max; @@ -242,6 +243,7 @@ struct xa_limit { #define xa_limit_32b XA_LIMIT(0, UINT_MAX) #define xa_limit_31b XA_LIMIT(0, INT_MAX) +#define xa_limit_16b XA_LIMIT(0, USHRT_MAX) typedef unsigned __bitwise xa_mark_t; #define XA_MARK_0 ((__force xa_mark_t)0U) -- cgit v1.2.3 From 86cec7ece3e62517e2bc0fd796a8a8da4193e7e5 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 29 Mar 2021 16:38:28 +0300 Subject: gpu: host1x: Allow syncpoints without associated client Syncpoints don't need to be associated with any client, so remove the property, and expose host1x_syncpt_alloc. This will allow allocating syncpoints without prior knowledge of the engine that it will be used with. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 9eb77c87a83b..7137ce0e35d4 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -154,6 +154,9 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client, unsigned long flags); void host1x_syncpt_free(struct host1x_syncpt *sp); +struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, + unsigned long flags, + const char *name); struct host1x_syncpt_base *host1x_syncpt_get_base(struct host1x_syncpt *sp); u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base); -- cgit v1.2.3 From 913d55037616659c04763e756f948fcbaef0bbee Mon Sep 17 00:00:00 2001 From: He Fengqing Date: Tue, 30 Mar 2021 02:48:43 +0000 Subject: bpf: Remove unused bpf_load_pointer Remove unused bpf_load_pointer function in filter.h. The last user of it has been removed with 24dea04767e6 ("bpf, x32: remove ld_abs/ld_ind"). Signed-off-by: He Fengqing Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20210330024843.3479844-1-hefengqing@huawei.com --- include/linux/filter.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index eecfd82db648..9a09547bc7ba 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1246,15 +1246,6 @@ static inline u16 bpf_anc_helper(const struct sock_filter *ftest) void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size); -static inline void *bpf_load_pointer(const struct sk_buff *skb, int k, - unsigned int size, void *buffer) -{ - if (k >= 0) - return skb_header_pointer(skb, k, size, buffer); - - return bpf_internal_load_pointer_neg_helper(skb, k, size); -} - static inline int bpf_tell_extensions(void) { return SKF_AD_MAX; -- cgit v1.2.3 From 78352f73dc5047f3f744764cc45912498c52f3c9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Mar 2021 12:28:52 +0200 Subject: udp: never accept GSO_FRAGLIST packets Currently the UDP protocol delivers GSO_FRAGLIST packets to the sockets without the expected segmentation. This change addresses the issue introducing and maintaining a couple of new fields to explicitly accept SKB_GSO_UDP_L4 or GSO_FRAGLIST packets. Additionally updates udp_unexpected_gso() accordingly. UDP sockets enabling UDP_GRO stil keep accept_udp_fraglist zeroed. v1 -> v2: - use 2 bits instead of a whole GSO bitmask (Willem) Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Signed-off-by: Paolo Abeni Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/udp.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index aa84597bdc33..ae58ff3b6b5b 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -51,7 +51,9 @@ struct udp_sock { * different encapsulation layer set * this */ - gro_enabled:1; /* Can accept GRO packets */ + gro_enabled:1, /* Request GRO aggregation */ + accept_udp_l4:1, + accept_udp_fraglist:1; /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -131,8 +133,16 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) { - return !udp_sk(sk)->gro_enabled && skb_is_gso(skb) && - skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4; + if (!skb_is_gso(skb)) + return false; + + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4) + return true; + + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist) + return true; + + return false; } #define udp_portaddr_for_each_entry(__sk, list) \ -- cgit v1.2.3 From d18931a92a0b5feddd8a39d097b90ae2867db02f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Mar 2021 12:28:53 +0200 Subject: vxlan: allow L4 GRO passthrough When passing up an UDP GSO packet with L4 aggregation, there is no need to segment it at the vxlan level. We can propagate the packet untouched and let it be segmented later, if needed. Introduce an helper to allow let the UDP socket to accept any L4 aggregation and use it in the vxlan driver. v1 -> v2: - updated to use the newly introduced UDP socket 'accept*' fields Signed-off-by: Paolo Abeni Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/udp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index ae58ff3b6b5b..ae66dadd8543 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -145,6 +145,12 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) return false; } +static inline void udp_allow_gso(struct sock *sk) +{ + udp_sk(sk)->accept_udp_l4 = 1; + udp_sk(sk)->accept_udp_fraglist = 1; +} + #define udp_portaddr_for_each_entry(__sk, list) \ hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) -- cgit v1.2.3 From 61431a5907fc36d0738e9a547c7e1556349a03e9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Mar 2021 16:43:43 -0700 Subject: net: ensure mac header is set in virtio_net_hdr_to_skb() Commit 924a9bc362a5 ("net: check if protocol extracted by virtio_net_hdr_set_proto is correct") added a call to dev_parse_header_protocol() but mac_header is not yet set. This means that eth_hdr() reads complete garbage, and syzbot complained about it [1] This patch resets mac_header earlier, to get more coverage about this change. Audit of virtio_net_hdr_to_skb() callers shows that this change should be safe. [1] BUG: KASAN: use-after-free in eth_header_parse_protocol+0xdc/0xe0 net/ethernet/eth.c:282 Read of size 2 at addr ffff888017a6200b by task syz-executor313/8409 CPU: 1 PID: 8409 Comm: syz-executor313 Not tainted 5.12.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x141/0x1d7 lib/dump_stack.c:120 print_address_description.constprop.0.cold+0x5b/0x2f8 mm/kasan/report.c:232 __kasan_report mm/kasan/report.c:399 [inline] kasan_report.cold+0x7c/0xd8 mm/kasan/report.c:416 eth_header_parse_protocol+0xdc/0xe0 net/ethernet/eth.c:282 dev_parse_header_protocol include/linux/netdevice.h:3177 [inline] virtio_net_hdr_to_skb.constprop.0+0x99d/0xcd0 include/linux/virtio_net.h:83 packet_snd net/packet/af_packet.c:2994 [inline] packet_sendmsg+0x2325/0x52b0 net/packet/af_packet.c:3031 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:674 sock_no_sendpage+0xf3/0x130 net/core/sock.c:2860 kernel_sendpage.part.0+0x1ab/0x350 net/socket.c:3631 kernel_sendpage net/socket.c:3628 [inline] sock_sendpage+0xe5/0x140 net/socket.c:947 pipe_to_sendpage+0x2ad/0x380 fs/splice.c:364 splice_from_pipe_feed fs/splice.c:418 [inline] __splice_from_pipe+0x43e/0x8a0 fs/splice.c:562 splice_from_pipe fs/splice.c:597 [inline] generic_splice_sendpage+0xd4/0x140 fs/splice.c:746 do_splice_from fs/splice.c:767 [inline] do_splice+0xb7e/0x1940 fs/splice.c:1079 __do_splice+0x134/0x250 fs/splice.c:1144 __do_sys_splice fs/splice.c:1350 [inline] __se_sys_splice fs/splice.c:1332 [inline] __x64_sys_splice+0x198/0x250 fs/splice.c:1332 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 Fixes: 924a9bc362a5 ("net: check if protocol extracted by virtio_net_hdr_set_proto is correct") Signed-off-by: Eric Dumazet Cc: Balazs Nemeth Cc: Willem de Bruijn Reported-by: syzbot Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 6b5fcfa1e555..98775d7fa696 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -62,6 +62,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, return -EINVAL; } + skb_reset_mac_header(skb); + if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { u16 start = __virtio16_to_cpu(little_endian, hdr->csum_start); u16 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); -- cgit v1.2.3 From 6e085e0ac9cf16298b5fefe0b1893f98ef765812 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Dec 2020 14:09:24 +0800 Subject: arm/arm64: Probe for the presence of KVM hypervisor Although the SMCCC specification provides some limited functionality for describing the presence of hypervisor and firmware services, this is generally applicable only to functions designated as "Arm Architecture Service Functions" and no portable discovery mechanism is provided for standard hypervisor services, despite having a designated range of function identifiers reserved by the specification. In an attempt to avoid the need for additional firmware changes every time a new function is added, introduce a UID to identify the service provider as being compatible with KVM. Once this has been established, additional services can be discovered via a feature bitmap. Reviewed-by: Steven Price Signed-off-by: Will Deacon Signed-off-by: Jianyong Wu [maz: move code to its own file, plug it into PSCI] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201209060932.212364-2-jianyong.wu@arm.com --- include/linux/arm-smccc.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 62c54234576c..1a27bd9493fe 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -55,6 +55,8 @@ #define ARM_SMCCC_OWNER_TRUSTED_OS 50 #define ARM_SMCCC_OWNER_TRUSTED_OS_END 63 +#define ARM_SMCCC_FUNC_QUERY_CALL_UID 0xff01 + #define ARM_SMCCC_QUIRK_NONE 0 #define ARM_SMCCC_QUIRK_QCOM_A6 1 /* Save/restore register a6 */ @@ -87,6 +89,29 @@ ARM_SMCCC_SMC_32, \ 0, 0x7fff) +#define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_FUNC_QUERY_CALL_UID) + +/* KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 */ +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU + +/* KVM "vendor specific" services */ +#define ARM_SMCCC_KVM_FUNC_FEATURES 0 +#define ARM_SMCCC_KVM_FUNC_FEATURES_2 127 +#define ARM_SMCCC_KVM_NUM_FUNCS 128 + +#define ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_FEATURES) + #define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED 1 /* Paravirtualised time calls (defined by ARM DEN0057A) */ -- cgit v1.2.3 From 66ac7985b2af310aaca14869d6e43b0290e98c07 Mon Sep 17 00:00:00 2001 From: Carl Yin Date: Mon, 29 Mar 2021 18:28:21 -0700 Subject: bus: mhi: core: Add support for Flash Programmer execution environment MHI WWAN modems support downloading firmware to NAND or eMMC using Firehose protocol with process as follows: 1. Modem boots up, enters AMSS execution environment and the device later enters EDL (Emergency Download) mode through any mechanism host can use such as a diag command. 2. Modem enters SYS_ERROR, MHI host handles SYS_ERROR transition. 3. EDL image for device to enter 'Flash Programmer' execution environment is then flashed via BHI interface from host. 4. Modem enters MHI READY -> M0 and sends the Flash Programmer execution environment change to host. 5. Following that, EDL/FIREHOSE channels (34, 35) are made available from the host. 6. User space tool for downloading firmware image to modem over the EDL channels using Firehose protocol. Link to USB flashing tool: https://git.linaro.org/landing-teams/working/qualcomm/qdl.git/ Make the necessary changes to allow for this sequence to occur and allow using the Flash Programmer execution environment. Signed-off-by: Carl Yin Co-developed-by: Bhaumik Bhatt Signed-off-by: Bhaumik Bhatt Reviewed-by: Loic Poulain Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1617067704-28850-5-git-send-email-bbhatt@codeaurora.org Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 8f5bf409f663..b16afd36b444 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -117,6 +117,7 @@ struct mhi_link_info { * @MHI_EE_WFW: WLAN firmware mode * @MHI_EE_PTHRU: Passthrough * @MHI_EE_EDL: Embedded downloader + * @MHI_EE_FP: Flash Programmer Environment */ enum mhi_ee_type { MHI_EE_PBL, @@ -126,7 +127,8 @@ enum mhi_ee_type { MHI_EE_WFW, MHI_EE_PTHRU, MHI_EE_EDL, - MHI_EE_MAX_SUPPORTED = MHI_EE_EDL, + MHI_EE_FP, + MHI_EE_MAX_SUPPORTED = MHI_EE_FP, MHI_EE_DISABLE_TRANSITION, /* local EE, not related to mhi spec */ MHI_EE_NOT_SUPPORTED, MHI_EE_MAX, -- cgit v1.2.3 From 4c5014456305482412b35a081ca0fb4fefd69764 Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Wed, 24 Mar 2021 12:28:53 -0700 Subject: regmap-irq: Introduce virtual regs to handle more config regs Add "virtual" registers support to handle any irq configuration registers in addition to the ones the framework currently supports (status, mask, unmask, wake, type and ack). These are non-standard registers that further configure irq type on some devices, so enable the framework to add a variable number of them. Signed-off-by: Guru Das Srinagesh Link: https://lore.kernel.org/r/a1787067004b0e11cb960319082764397469215a.1616613838.git.gurus@codeaurora.org Signed-off-by: Mark Brown --- include/linux/regmap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 18910bd809f7..97ec73383e47 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1393,6 +1393,7 @@ struct regmap_irq_sub_irq_map { * Using zero value is possible with @use_ack bit. * @wake_base: Base address for wake enables. If zero unsupported. * @type_base: Base address for irq type. If zero unsupported. + * @virt_reg_base: Base addresses for extra config regs. * @irq_reg_stride: Stride to use for chips where registers are not contiguous. * @init_ack_masked: Ack all masked interrupts once during initalization. * @mask_invert: Inverted mask register: cleared bits are masked out. @@ -1417,6 +1418,8 @@ struct regmap_irq_sub_irq_map { * assigned based on the index in the array of the interrupt. * @num_irqs: Number of descriptors. * @num_type_reg: Number of type registers. + * @num_virt_regs: Number of non-standard irq configuration registers. + * If zero unsupported. * @type_reg_stride: Stride to use for chips where type registers are not * contiguous. * @handle_pre_irq: Driver specific callback to handle interrupt from device @@ -1444,6 +1447,7 @@ struct regmap_irq_chip { unsigned int ack_base; unsigned int wake_base; unsigned int type_base; + unsigned int *virt_reg_base; unsigned int irq_reg_stride; bool mask_writeonly:1; bool init_ack_masked:1; @@ -1464,6 +1468,7 @@ struct regmap_irq_chip { int num_irqs; int num_type_reg; + int num_virt_regs; unsigned int type_reg_stride; int (*handle_pre_irq)(void *irq_drv_data); -- cgit v1.2.3 From 394409aafd017adfcffd075595cb01cc456a9327 Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Wed, 24 Mar 2021 12:28:54 -0700 Subject: regmap-irq: Add driver callback to configure virtual regs Enable drivers to configure and modify "virtual" registers, which are non-standard registers that further configure irq type on some devices. Since they are non-standard, enable drivers to configure them according to their particular idiosyncrasies by specifying an optional callback function while registering with the framework. Signed-off-by: Guru Das Srinagesh Link: https://lore.kernel.org/r/07e058cdec2297d15c95c825aa0263064d962d5a.1616613838.git.gurus@codeaurora.org Signed-off-by: Mark Brown --- include/linux/regmap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 97ec73383e47..f87a11a5cc4a 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1426,6 +1426,8 @@ struct regmap_irq_sub_irq_map { * before regmap_irq_handler process the interrupts. * @handle_post_irq: Driver specific callback to handle interrupt from device * after handling the interrupts in regmap_irq_handler(). + * @set_type_virt: Driver specific callback to extend regmap_irq_set_type() + * and configure virt regs. * @irq_drv_data: Driver specific IRQ data which is passed as parameter when * driver specific pre/post interrupt handler is called. * @@ -1473,6 +1475,8 @@ struct regmap_irq_chip { int (*handle_pre_irq)(void *irq_drv_data); int (*handle_post_irq)(void *irq_drv_data); + int (*set_type_virt)(unsigned int **buf, unsigned int type, + unsigned long hwirq, int reg); void *irq_drv_data; }; -- cgit v1.2.3 From 4179e576b56d82e5ce007b9f548efb90605e2713 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 30 Mar 2021 18:49:05 +0200 Subject: spi: pl022: Drop custom per-chip cs_control Drop the custom cs_control() assigned through platform data, we have no in-tree users and the only out-of-tree use I have ever seen of this facility is to pull GPIO lines, which is something the driver can already do for us. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20210330164907.2346010-1-linus.walleij@linaro.org Signed-off-by: Mark Brown --- include/linux/amba/pl022.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h index 131b27c97209..29274cedefde 100644 --- a/include/linux/amba/pl022.h +++ b/include/linux/amba/pl022.h @@ -265,8 +265,6 @@ struct pl022_ssp_controller { * @duplex: Microwire interface: Full/Half duplex * @clkdelay: on the PL023 variant, the delay in feeback clock cycles * before sampling the incoming line - * @cs_control: function pointer to board-specific function to - * assert/deassert I/O port to control HW generation of devices chip-select. */ struct pl022_config_chip { enum ssp_interface iface; @@ -280,7 +278,6 @@ struct pl022_config_chip { enum ssp_microwire_wait_state wait_state; enum ssp_duplex duplex; enum ssp_clkdelay clkdelay; - void (*cs_control) (u32 control); }; #endif /* _SSP_PL022_H */ -- cgit v1.2.3 From 77f983a9df421fa00ca6a2f494dc79f8afca75a2 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 30 Mar 2021 18:49:06 +0200 Subject: spi: pl022: Use GPIOs looked up by the core The SPI core looks up GPIO lines from the device tree, so let's stop trying to do that on our own and rely on the core to do this for us. In addition to the GPIO line we also need to keep track of the chip select index separately, as the native chip select needs this index. The driver was reusing the same GPIO array for native chip select indices, so keep this in a separate state variable instead. The facility to pass in custom GPIO lines from the platform data can go, because even if we do have out-of-tree code that want to use platform data, they can soon pass in GPIOs using machine GPIO descriptor tables which will be available after the next step when we convert the driver to using GPIO descriptors. The implicit inclusion of is made explicit as we no longer need to include . Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20210330164907.2346010-2-linus.walleij@linaro.org Signed-off-by: Mark Brown --- include/linux/amba/pl022.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h index 29274cedefde..9bf58aac0df2 100644 --- a/include/linux/amba/pl022.h +++ b/include/linux/amba/pl022.h @@ -223,10 +223,6 @@ struct dma_chan; /** * struct pl022_ssp_master - device.platform_data for SPI controller devices. * @bus_id: identifier for this bus - * @num_chipselect: chipselects are used to distinguish individual - * SPI slaves, and are numbered from zero to num_chipselects - 1. - * each slave has a chipselect signal, but it's common that not - * every chipselect is connected to a slave. * @enable_dma: if true enables DMA driven transfers. * @dma_rx_param: parameter to locate an RX DMA channel. * @dma_tx_param: parameter to locate a TX DMA channel. @@ -235,18 +231,15 @@ struct dma_chan; * indicates no delay and the device will be suspended immediately. * @rt: indicates the controller should run the message pump with realtime * priority to minimise the transfer latency on the bus. - * @chipselects: list of chip select gpios */ struct pl022_ssp_controller { u16 bus_id; - u8 num_chipselect; u8 enable_dma:1; bool (*dma_filter)(struct dma_chan *chan, void *filter_param); void *dma_rx_param; void *dma_tx_param; int autosuspend_delay; bool rt; - int *chipselects; }; /** -- cgit v1.2.3 From 2aed4f5ab04af922a7cf1b616701845c9ed2473f Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 29 Mar 2021 16:38:32 +0300 Subject: gpu: host1x: Cleanup and refcounting for syncpoints Add reference counting for allocated syncpoints to allow keeping them allocated while jobs are referencing them. Additionally, clean up various places using syncpoint IDs to use host1x_syncpt pointers instead. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 7137ce0e35d4..107aea29bccb 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -142,7 +142,9 @@ struct host1x_syncpt_base; struct host1x_syncpt; struct host1x; -struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, u32 id); +struct host1x_syncpt *host1x_syncpt_get_by_id(struct host1x *host, u32 id); +struct host1x_syncpt *host1x_syncpt_get_by_id_noref(struct host1x *host, u32 id); +struct host1x_syncpt *host1x_syncpt_get(struct host1x_syncpt *sp); u32 host1x_syncpt_id(struct host1x_syncpt *sp); u32 host1x_syncpt_read_min(struct host1x_syncpt *sp); u32 host1x_syncpt_read_max(struct host1x_syncpt *sp); @@ -153,7 +155,7 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, u32 *value); struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client, unsigned long flags); -void host1x_syncpt_free(struct host1x_syncpt *sp); +void host1x_syncpt_put(struct host1x_syncpt *sp); struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, unsigned long flags, const char *name); @@ -221,7 +223,7 @@ struct host1x_job { dma_addr_t *reloc_addr_phys; /* Sync point id, number of increments and end related to the submit */ - u32 syncpt_id; + struct host1x_syncpt *syncpt; u32 syncpt_incrs; u32 syncpt_end; -- cgit v1.2.3 From f5ba33fb9690566c382624637125827b5512e766 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 29 Mar 2021 16:38:34 +0300 Subject: gpu: host1x: Reserve VBLANK syncpoints at initialization On T20-T148 chips, the bootloader can set up a boot splash screen with DC configured to increment syncpoint 26/27 at VBLANK. Because of this we shouldn't allow these syncpoints to be allocated until DC has been reset and will no longer increment them in the background. As such, on these chips, reserve those two syncpoints at initialization, and only mark them free once the DC driver has indicated it's safe to do so. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 107aea29bccb..e0a41c2b4c7a 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -163,6 +163,9 @@ struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, struct host1x_syncpt_base *host1x_syncpt_get_base(struct host1x_syncpt *sp); u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base); +void host1x_syncpt_release_vblank_reservation(struct host1x_client *client, + u32 syncpt_id); + /* * host1x channel */ -- cgit v1.2.3 From 933deb8c7b8e3f83e3dbd0b08e3cad51350d44c4 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 26 Mar 2021 15:51:37 +0100 Subject: gpu: host1x: Add early init and late exit callbacks These callbacks can be used by client drivers to run code during early init and during late exit. Early init callbacks are run prior to the regular init callbacks while late exit callbacks run after the regular exit callbacks. Signed-off-by: Thierry Reding --- include/linux/host1x.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index e0a41c2b4c7a..232e1bd507a7 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -25,14 +25,18 @@ u64 host1x_get_dma_mask(struct host1x *host1x); /** * struct host1x_client_ops - host1x client operations + * @early_init: host1x client early initialization code * @init: host1x client initialization code * @exit: host1x client tear down code + * @late_exit: host1x client late tear down code * @suspend: host1x client suspend code * @resume: host1x client resume code */ struct host1x_client_ops { + int (*early_init)(struct host1x_client *client); int (*init)(struct host1x_client *client); int (*exit)(struct host1x_client *client); + int (*late_exit)(struct host1x_client *client); int (*suspend)(struct host1x_client *client); int (*resume)(struct host1x_client *client); }; -- cgit v1.2.3 From 1fd3dde5e270ad08f1406f921c9a2cda154fcea9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 30 Mar 2021 12:43:16 -0500 Subject: PCI: Add pci_disable_parity() Add pci_disable_parity() to disable reporting of parity errors for a device by clearing PCI_COMMAND_PARITY. The device will still set PCI_STATUS_DETECTED_PARITY when it detects a parity error or receives a Poisoned TLP, but it will not set PCI_STATUS_PARITY, which means it will not assert PERR# (conventional PCI) or report Poisoned TLPs (PCIe). Based-on: https://lore.kernel.org/linux-arm-kernel/d375987c-ea4f-dd98-4ef8-99b2fbfe7c33@gmail.com/ Based-on-patch-by: Heiner Kallweit Link: https://lore.kernel.org/r/20210330174318.1289680-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 86c799c97b77..4eaa773115da 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1201,6 +1201,7 @@ int __must_check pci_set_mwi(struct pci_dev *dev); int __must_check pcim_set_mwi(struct pci_dev *dev); int pci_try_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); +void pci_disable_parity(struct pci_dev *dev); void pci_intx(struct pci_dev *dev, int enable); bool pci_check_and_mask_intx(struct pci_dev *dev); bool pci_check_and_unmask_intx(struct pci_dev *dev); -- cgit v1.2.3 From 9af723be863904c746a6a6bf4f3686087b16b9ff Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 30 Dec 2020 12:43:34 -0500 Subject: svcrdma: Remove sc_read_complete_q Now that svc_rdma_recvfrom() waits for Read completion, sc_read_complete_q is no longer used. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 6e621e1f56b8..b72f75091404 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -106,7 +106,6 @@ struct svcxprt_rdma { wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ unsigned long sc_flags; - struct list_head sc_read_complete_q; struct work_struct sc_work; struct llist_head sc_recv_ctxts; @@ -140,7 +139,6 @@ struct svc_rdma_recv_ctxt { bool rc_temp; u32 rc_byte_len; unsigned int rc_page_count; - unsigned int rc_hdr_count; u32 rc_inv_rkey; __be32 rc_msgtype; -- cgit v1.2.3 From 5533c4f4b996b7fc36d16b5e0807ebbc08c93af4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 13 Jan 2021 09:31:50 -0500 Subject: svcrdma: Remove svc_rdma_recv_ctxt::rc_pages and ::rc_arg These fields are no longer used. The size of struct svc_rdma_recv_ctxt is now less than 300 bytes on x86_64, down from 2440 bytes. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index b72f75091404..3184465de3a0 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -134,7 +134,6 @@ struct svc_rdma_recv_ctxt { struct rpc_rdma_cid rc_cid; struct ib_sge rc_recv_sge; void *rc_recv_buf; - struct xdr_buf rc_arg; struct xdr_stream rc_stream; bool rc_temp; u32 rc_byte_len; @@ -148,8 +147,6 @@ struct svc_rdma_recv_ctxt { struct svc_rdma_chunk *rc_cur_result_payload; struct svc_rdma_pcl rc_write_pcl; struct svc_rdma_pcl rc_reply_pcl; - - struct page *rc_pages[RPCSVC_MAXPAGES]; }; struct svc_rdma_send_ctxt { -- cgit v1.2.3 From 5c701e71961af0ec8227ea615f1646dbe98aea1a Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Sat, 27 Mar 2021 10:54:47 +0800 Subject: netfilter: ipset: Remove duplicate declaration struct ip_set is declared twice. One is declared at 79th line, so remove the duplicate. Signed-off-by: Wan Jiabing Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 46d9a0c26c67..10279c4830ac 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -124,8 +124,6 @@ struct ip_set_ext { bool target; }; -struct ip_set; - #define ext_timeout(e, s) \ ((unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT])) #define ext_counter(e, s) \ -- cgit v1.2.3 From 19c28b1374fb1073a9ec873a6c10bf5f16b10b9d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Mar 2021 16:58:37 +0200 Subject: netfilter: add helper function to set up the nfnetlink header and use it This patch adds a helper function to set up the netlink and nfnetlink headers. Update existing codebase to use it. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index f6267e2883f2..791d516e1e88 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -57,6 +57,33 @@ static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type) return subsys << 8 | msg_type; } +static inline void nfnl_fill_hdr(struct nlmsghdr *nlh, u8 family, u8 version, + __be16 res_id) +{ + struct nfgenmsg *nfmsg; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = version; + nfmsg->res_id = res_id; +} + +static inline struct nlmsghdr *nfnl_msg_put(struct sk_buff *skb, u32 portid, + u32 seq, int type, int flags, + u8 family, u8 version, + __be16 res_id) +{ + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); + if (!nlh) + return NULL; + + nfnl_fill_hdr(nlh, family, version, res_id); + + return nlh; +} + void nfnl_lock(__u8 subsys_id); void nfnl_unlock(__u8 subsys_id); #ifdef CONFIG_PROVE_LOCKING -- cgit v1.2.3 From 53366a9f917a8601dcad0fd9768d5956cd2f99a6 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 30 Mar 2021 16:38:17 +0200 Subject: USB: serial: drop unused suspending flag The suspending flag was added back in 2009 but no users ever followed. Remove it. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- include/linux/usb/serial.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 952272002e48..7efba6caaadc 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -146,7 +146,6 @@ struct usb_serial { struct usb_serial_driver *type; struct usb_interface *interface; unsigned char disconnected:1; - unsigned char suspending:1; unsigned char attached:1; unsigned char minors_reserved:1; unsigned char num_ports; -- cgit v1.2.3 From 5de03c99691d5b0b6253fda1d1d3bbc8239aadb8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 30 Mar 2021 16:38:19 +0200 Subject: USB: serial: add support for multi-interface functions A single USB function can be implemented using a group of interfaces and this is for example commonly used for Communication Class devices. Add support for multi-interface functions to USB serial core and export an interface that allows drivers to claim a second sibling interface. The interface could easily be extended to allow claiming further interfaces if ever needed. When a driver claims a sibling interface in probe(), core allocates resources for any bulk in, bulk out, interrupt in and interrupt out endpoints found also on the sibling interface. Disconnect is implemented so that unbinding either interface will release the other interface while disconnect() is called precisely once. Similarly, suspend() is called when the first sibling interface is suspended and resume() is called when the last sibling interface is resumed by USB core. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- include/linux/usb/serial.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 7efba6caaadc..e9b90577f50b 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -130,6 +130,8 @@ static inline void usb_set_serial_port_data(struct usb_serial_port *port, * @dev: pointer to the struct usb_device for this device * @type: pointer to the struct usb_serial_driver for this device * @interface: pointer to the struct usb_interface for this device + * @sibling: pointer to the struct usb_interface of any sibling interface + * @suspend_count: number of suspended (sibling) interfaces * @num_ports: the number of ports this device has * @num_interrupt_in: number of interrupt in endpoints we have * @num_interrupt_out: number of interrupt out endpoints we have @@ -145,6 +147,8 @@ struct usb_serial { struct usb_device *dev; struct usb_serial_driver *type; struct usb_interface *interface; + struct usb_interface *sibling; + unsigned int suspend_count; unsigned char disconnected:1; unsigned char attached:1; unsigned char minors_reserved:1; @@ -334,6 +338,9 @@ static inline void usb_serial_console_disconnect(struct usb_serial *serial) {} /* Functions needed by other parts of the usbserial core */ struct usb_serial_port *usb_serial_port_get_by_minor(unsigned int minor); void usb_serial_put(struct usb_serial *serial); + +int usb_serial_claim_interface(struct usb_serial *serial, struct usb_interface *intf); + int usb_serial_generic_open(struct tty_struct *tty, struct usb_serial_port *port); int usb_serial_generic_write_start(struct usb_serial_port *port, gfp_t mem_flags); int usb_serial_generic_write(struct tty_struct *tty, struct usb_serial_port *port, -- cgit v1.2.3 From 26dbc7e299c7ebbb6a95e2c620b21b5280b37c57 Mon Sep 17 00:00:00 2001 From: Andrew Scull Date: Thu, 18 Mar 2021 14:33:08 +0000 Subject: bug: Factor out a getter for a bug's file line There is some non-trivial config-based logic to get the file name and line number associated with a bug. Factor this out to a getter that can be resused. Signed-off-by: Andrew Scull Cc: Peter Zijlstra Cc: "Steven Rostedt (VMware)" Reviewed-by: Steven Rostedt (VMware) Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210318143311.839894-3-ascull@google.com --- include/linux/bug.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index f639bd0122f3..e3841bee4c8d 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -36,6 +36,9 @@ static inline int is_warning_bug(const struct bug_entry *bug) return bug->flags & BUGFLAG_WARNING; } +void bug_get_file_line(struct bug_entry *bug, const char **file, + unsigned int *line); + struct bug_entry *find_bug(unsigned long bugaddr); enum bug_trap_type report_bug(unsigned long bug_addr, struct pt_regs *regs); -- cgit v1.2.3 From d6644a1c2e17febf261fd692bb32271e5779bbd2 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Thu, 1 Apr 2021 14:59:04 +0800 Subject: spi: Remove repeated struct declaration struct spi_transfer is declared twice. One is declared at 24th line. The blew one is not needed though. Remove the duplicate. Signed-off-by: Wan Jiabing Link: https://lore.kernel.org/r/20210401065904.994121-1-wanjiabing@vivo.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 6d5b1866cc89..4b192e74f024 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -247,7 +247,6 @@ static inline void *spi_get_drvdata(struct spi_device *spi) } struct spi_message; -struct spi_transfer; /** * struct spi_driver - Host side "protocol" driver -- cgit v1.2.3 From b01fd6e802b6d0a635176f943315670b679d8d7b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 30 Mar 2021 19:32:23 -0700 Subject: skmsg: Introduce a spinlock to protect ingress_msg Currently we rely on lock_sock to protect ingress_msg, it is too big for this, we can actually just use a spinlock to protect this list like protecting other skb queues. __tcp_bpf_recvmsg() is still special because of peeking, it still has to use lock_sock. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Acked-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210331023237.41094-3-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 6c09d94be2e9..f2d45a73b2b2 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -89,6 +89,7 @@ struct sk_psock { #endif struct sk_buff_head ingress_skb; struct list_head ingress_msg; + spinlock_t ingress_lock; unsigned long state; struct list_head link; spinlock_t link_lock; @@ -284,7 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk) static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { + spin_lock_bh(&psock->ingress_lock); list_add_tail(&msg->list, &psock->ingress_msg); + spin_unlock_bh(&psock->ingress_lock); +} + +static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock) +{ + struct sk_msg *msg; + + spin_lock_bh(&psock->ingress_lock); + msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); + if (msg) + list_del(&msg->list); + spin_unlock_bh(&psock->ingress_lock); + return msg; +} + +static inline struct sk_msg *sk_psock_peek_msg(struct sk_psock *psock) +{ + struct sk_msg *msg; + + spin_lock_bh(&psock->ingress_lock); + msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); + spin_unlock_bh(&psock->ingress_lock); + return msg; +} + +static inline struct sk_msg *sk_psock_next_msg(struct sk_psock *psock, + struct sk_msg *msg) +{ + struct sk_msg *ret; + + spin_lock_bh(&psock->ingress_lock); + if (list_is_last(&msg->list, &psock->ingress_msg)) + ret = NULL; + else + ret = list_next_entry(msg, list); + spin_unlock_bh(&psock->ingress_lock); + return ret; } static inline bool sk_psock_queue_empty(const struct sk_psock *psock) @@ -292,6 +331,13 @@ static inline bool sk_psock_queue_empty(const struct sk_psock *psock) return psock ? list_empty(&psock->ingress_msg) : true; } +static inline void kfree_sk_msg(struct sk_msg *msg) +{ + if (msg->skb) + consume_skb(msg->skb); + kfree(msg); +} + static inline void sk_psock_report_error(struct sk_psock *psock, int err) { struct sock *sk = psock->sk; -- cgit v1.2.3 From 0739cd28f2645e814586c7536ba5da9825cb8029 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 30 Mar 2021 19:32:24 -0700 Subject: net: Introduce skb_send_sock() for sock_map We only have skb_send_sock_locked() which requires callers to use lock_sock(). Introduce a variant skb_send_sock() which locks on its own, callers do not need to lock it any more. This will save us from adding a ->sendmsg_locked for each protocol. To reuse the code, pass function pointers to __skb_send_sock() and build skb_send_sock() and skb_send_sock_locked() on top. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20210331023237.41094-4-xiyou.wangcong@gmail.com --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c8def85fcc22..dbf820a50a39 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3626,6 +3626,7 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, unsigned int flags); int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, int len); +int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, -- cgit v1.2.3 From 799aa7f98d53e0f541fa6b4dc9aa47b4ff2178e3 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 30 Mar 2021 19:32:25 -0700 Subject: skmsg: Avoid lock_sock() in sk_psock_backlog() We do not have to lock the sock to avoid losing sk_socket, instead we can purge all the ingress queues when we close the socket. Sending or receiving packets after orphaning socket makes no sense. We do purge these queues when psock refcnt reaches zero but here we want to purge them explicitly in sock_map_close(). There are also some nasty race conditions on testing bit SK_PSOCK_TX_ENABLED and queuing/canceling the psock work, we can expand psock->ingress_lock a bit to protect them too. As noticed by John, we still have to lock the psock->work, because the same work item could be running concurrently on different CPU's. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210331023237.41094-5-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index f2d45a73b2b2..7382c4b518d7 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -99,6 +99,7 @@ struct sk_psock { void (*saved_write_space)(struct sock *sk); void (*saved_data_ready)(struct sock *sk); struct proto *sk_proto; + struct mutex work_mutex; struct sk_psock_work_state work_state; struct work_struct work; union { @@ -347,6 +348,7 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err) } struct sk_psock *sk_psock_init(struct sock *sk, int node); +void sk_psock_stop(struct sk_psock *psock, bool wait); #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); -- cgit v1.2.3 From 7786dfc41a74e0567557b5c4a28fc8482f5f5691 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 30 Mar 2021 19:32:26 -0700 Subject: skmsg: Use rcu work for destroying psock The RCU callback sk_psock_destroy() only queues work psock->gc, so we can just switch to rcu work to simplify the code. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210331023237.41094-6-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 7382c4b518d7..e7aba150539d 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -102,10 +102,7 @@ struct sk_psock { struct mutex work_mutex; struct sk_psock_work_state work_state; struct work_struct work; - union { - struct rcu_head rcu; - struct work_struct gc; - }; + struct rcu_work rwork; }; int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, -- cgit v1.2.3 From a7ba4558e69a3c2ae4ca521f015832ef44799538 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 30 Mar 2021 19:32:30 -0700 Subject: sock_map: Introduce BPF_SK_SKB_VERDICT Reusing BPF_SK_SKB_STREAM_VERDICT is possible but its name is confusing and more importantly we still want to distinguish them from user-space. So we can just reuse the stream verdict code but introduce a new type of eBPF program, skb_verdict. Users are not allowed to attach stream_verdict and skb_verdict programs to the same map. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210331023237.41094-10-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index e7aba150539d..c83dbc2d81d9 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -58,6 +58,7 @@ struct sk_psock_progs { struct bpf_prog *msg_parser; struct bpf_prog *stream_parser; struct bpf_prog *stream_verdict; + struct bpf_prog *skb_verdict; }; enum sk_psock_state_bits { @@ -487,6 +488,7 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs) psock_set_prog(&progs->msg_parser, NULL); psock_set_prog(&progs->stream_parser, NULL); psock_set_prog(&progs->stream_verdict, NULL); + psock_set_prog(&progs->skb_verdict, NULL); } int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb); -- cgit v1.2.3 From 8a59f9d1e3d4340659fdfee8879dc09a6f2546e1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 30 Mar 2021 19:32:31 -0700 Subject: sock: Introduce sk->sk_prot->psock_update_sk_prot() Currently sockmap calls into each protocol to update the struct proto and replace it. This certainly won't work when the protocol is implemented as a module, for example, AF_UNIX. Introduce a new ops sk->sk_prot->psock_update_sk_prot(), so each protocol can implement its own way to replace the struct proto. This also helps get rid of symbol dependencies on CONFIG_INET. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210331023237.41094-11-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c83dbc2d81d9..5e800ddc2dc6 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -99,6 +99,7 @@ struct sk_psock { void (*saved_close)(struct sock *sk, long timeout); void (*saved_write_space)(struct sock *sk); void (*saved_data_ready)(struct sock *sk); + int (*psock_update_sk_prot)(struct sock *sk, bool restore); struct proto *sk_proto; struct mutex work_mutex; struct sk_psock_work_state work_state; @@ -395,25 +396,12 @@ static inline void sk_psock_cork_free(struct sk_psock *psock) } } -static inline void sk_psock_update_proto(struct sock *sk, - struct sk_psock *psock, - struct proto *ops) -{ - /* Pairs with lockless read in sk_clone_lock() */ - WRITE_ONCE(sk->sk_prot, ops); -} - static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { sk->sk_prot->unhash = psock->saved_unhash; - if (inet_csk_has_ulp(sk)) { - tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); - } else { - sk->sk_write_space = psock->saved_write_space; - /* Pairs with lockless read in sk_clone_lock() */ - WRITE_ONCE(sk->sk_prot, psock->sk_proto); - } + if (psock->psock_update_sk_prot) + psock->psock_update_sk_prot(sk, true); } static inline void sk_psock_set_state(struct sk_psock *psock, -- cgit v1.2.3 From 2bc793e3272a13e337416c057cb81c5396ad91d1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 30 Mar 2021 19:32:33 -0700 Subject: skmsg: Extract __tcp_bpf_recvmsg() and tcp_bpf_wait_data() Although these two functions are only used by TCP, they are not specific to TCP at all, both operate on skmsg and ingress_msg, so fit in net/core/skmsg.c very well. And we will need them for non-TCP, so rename and move them to skmsg.c and export them to modules. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210331023237.41094-13-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 5e800ddc2dc6..f78e90a04a69 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -125,6 +125,10 @@ int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes); int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes); +int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags, + long timeo, int *err); +int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, + int len, int flags); static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes) { -- cgit v1.2.3 From 1decdb335c366fc0a1bae0db55c138c613cc9a1f Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Tue, 30 Mar 2021 11:40:55 +0800 Subject: tracing: Remove duplicate struct declaration in trace_events.h struct trace_array is declared twice. One has been declared at forward declaration. Remove the duplicate. Link: https://lkml.kernel.org/r/20210330034056.2266969-1-wanjiabing@vivo.com Signed-off-by: Wan Jiabing Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 36e27c1f42e0..ad413b382a3c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -404,7 +404,6 @@ trace_get_fields(struct trace_event_call *event_call) return event_call->class->get_fields(event_call); } -struct trace_array; struct trace_subsystem_dir; enum { -- cgit v1.2.3 From f3ef7202ef7c705d640d1aeec3b286a641ac9186 Mon Sep 17 00:00:00 2001 From: "Yordan Karadzhov (VMware)" Date: Mon, 29 Mar 2021 16:03:31 +0300 Subject: tracing: Remove unused argument from "ring_buffer_time_stamp() The "cpu" parameter is not being used by the function. Link: https://lkml.kernel.org/r/20210329130331.199402-1-y.karadz@gmail.com Signed-off-by: Yordan Karadzhov (VMware) Signed-off-by: Steven Rostedt (VMware) --- include/linux/ring_buffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 057b7ed4fe24..dac53fd3afea 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -181,7 +181,7 @@ unsigned long ring_buffer_commit_overrun_cpu(struct trace_buffer *buffer, int cp unsigned long ring_buffer_dropped_events_cpu(struct trace_buffer *buffer, int cpu); unsigned long ring_buffer_read_events_cpu(struct trace_buffer *buffer, int cpu); -u64 ring_buffer_time_stamp(struct trace_buffer *buffer, int cpu); +u64 ring_buffer_time_stamp(struct trace_buffer *buffer); void ring_buffer_normalize_time_stamp(struct trace_buffer *buffer, int cpu, u64 *ts); void ring_buffer_set_clock(struct trace_buffer *buffer, -- cgit v1.2.3 From 4578be130a6470d85ff05b13b75a00e6224eeeeb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 1 Apr 2021 09:06:29 -0700 Subject: overflow: Correct check_shl_overflow() comment A 'false' return means the value was safely set, so the comment should say 'true' for when it is not considered safe. Cc: Jason Gunthorpe Signed-off-by: Keith Busch Signed-off-by: Kees Cook Fixes: 0c66847793d1 ("overflow.h: Add arithmetic shift helper") Link: https://lore.kernel.org/r/20210401160629.1941787-1-kbusch@kernel.org --- include/linux/overflow.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/overflow.h b/include/linux/overflow.h index ef74051d5cfe..0f12345c21fb 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -235,7 +235,7 @@ static inline bool __must_check __must_check_overflow(bool overflow) * - 'a << s' sets the sign bit, if any, in '*d'. * * '*d' will hold the results of the attempted shift, but is not - * considered "safe for use" if false is returned. + * considered "safe for use" if true is returned. */ #define check_shl_overflow(a, s, d) __must_check_overflow(({ \ typeof(a) _a = a; \ -- cgit v1.2.3 From cad3193fe9d1f0af4d05ed86693f99984409b188 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 22 Mar 2021 00:51:39 +0100 Subject: mtd: spi-nor: implement OTP support for Winbond and similar flashes Use the new OTP ops to implement OTP access on Winbond flashes. Most Winbond flashes provides up to four different OTP regions ("Security Registers"). Winbond devices use a special opcode to read and write to the OTP regions, just like the RDSFDP opcode. In fact, it seems that the (undocumented) first OTP area of the newer flashes is the actual SFDP table. On a side note, Winbond devices also allow erasing the OTP regions as long as the area isn't locked down. Signed-off-by: Michael Walle Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20210321235140.8308-3-michael@walle.cc --- include/linux/mtd/spi-nor.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index a0d572855444..98ed91b529ea 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -107,6 +107,11 @@ #define SPINOR_OP_RD_EVCR 0x65 /* Read EVCR register */ #define SPINOR_OP_WD_EVCR 0x61 /* Write EVCR register */ +/* Used for GigaDevices and Winbond flashes. */ +#define SPINOR_OP_ESECR 0x44 /* Erase Security registers */ +#define SPINOR_OP_PSECR 0x42 /* Program Security registers */ +#define SPINOR_OP_RSECR 0x48 /* Read Security registers */ + /* Status Register bits. */ #define SR_WIP BIT(0) /* Write in progress */ #define SR_WEL BIT(1) /* Write enable latch */ @@ -138,6 +143,9 @@ /* Status Register 2 bits. */ #define SR2_QUAD_EN_BIT1 BIT(1) +#define SR2_LB1 BIT(3) /* Security Register Lock Bit 1 */ +#define SR2_LB2 BIT(4) /* Security Register Lock Bit 2 */ +#define SR2_LB3 BIT(5) /* Security Register Lock Bit 3 */ #define SR2_QUAD_EN_BIT7 BIT(7) /* Supported SPI protocols */ -- cgit v1.2.3 From a28e824fb8270eda43fd0f65c2a5fdf33f55c5eb Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 30 Mar 2021 12:12:37 +0100 Subject: nvmem: core: Add functions to make number reading easy Sometimes the clients of nvmem just want to get a number out of nvmem. They don't want to think about exactly how many bytes the nvmem cell took up. They just want the number. Let's make it easy. In general this concept is useful because nvmem space is precious and usually the fewest bits are allocated that will hold a given value on a given system. However, even though small numbers might be fine on one system that doesn't mean that logically the number couldn't be bigger. Imagine nvmem containing a max frequency for a component. On one system perhaps that fits in 16 bits. On another system it might fit in 32 bits. The code reading this number doesn't care--it just wants the number. We'll provide two functions: nvmem_cell_read_variable_le_u32() and nvmem_cell_read_variable_le_u64(). Comparing these to the existing functions like nvmem_cell_read_u32(): * These new functions have no problems if the value was stored in nvmem in fewer bytes. It's OK to use these function as long as the value stored will fit in 32-bits (or 64-bits). * These functions avoid problems that the earlier APIs had with bit offsets. For instance, you can't use nvmem_cell_read_u32() to read a value has nbits=32 and bit_offset=4 because the nvmem cell must be at least 5 bytes big to hold this value. The new API accounts for this and works fine. * These functions make it very explicit that they assume that the number was stored in little endian format. The old functions made this assumption whenever bit_offset was non-zero (see nvmem_shift_read_buffer_in_place()) but didn't whenever the bit_offset was zero. NOTE: it's assumed that we don't need an 8-bit or 16-bit version of this function. The 32-bit version of the function can be used to read 8-bit or 16-bit data. At the moment, I'm only adding the "unsigned" versions of these functions, but if it ends up being useful someone could add a "signed" version that did 2's complement sign extension. At the moment, I'm only adding the "little endian" versions of these functions. Adding the "big endian" version would require adding "big endian" support to nvmem_shift_read_buffer_in_place(). Signed-off-by: Douglas Anderson Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210330111241.19401-7-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-consumer.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index 052293f4cbdb..923dada24eb4 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -65,6 +65,10 @@ int nvmem_cell_read_u8(struct device *dev, const char *cell_id, u8 *val); int nvmem_cell_read_u16(struct device *dev, const char *cell_id, u16 *val); int nvmem_cell_read_u32(struct device *dev, const char *cell_id, u32 *val); int nvmem_cell_read_u64(struct device *dev, const char *cell_id, u64 *val); +int nvmem_cell_read_variable_le_u32(struct device *dev, const char *cell_id, + u32 *val); +int nvmem_cell_read_variable_le_u64(struct device *dev, const char *cell_id, + u64 *val); /* direct nvmem device read/write interface */ struct nvmem_device *nvmem_device_get(struct device *dev, const char *name); -- cgit v1.2.3 From a7f3d3d3600c8ed119eb0d2483de0062ce2e3707 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 26 Mar 2021 22:03:05 +0100 Subject: dma-mapping: add unlikely hint to error path in dma_mapping_error Zillions of drivers use the unlikely() hint when checking the result of dma_mapping_error(). This is an inline function anyway, so we can move the hint into the function and remove it from drivers over time. Signed-off-by: Heiner Kallweit Reviewed-by: Robin Murphy Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index e9d19b974f26..183e7103a66d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -95,7 +95,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { debug_dma_mapping_error(dev, dma_addr); - if (dma_addr == DMA_MAPPING_ERROR) + if (unlikely(dma_addr == DMA_MAPPING_ERROR)) return -ENOMEM; return 0; } -- cgit v1.2.3 From 1768289b44bae847612751d418fc5c5e680b5e5c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 31 Mar 2021 18:05:25 +0300 Subject: driver core: platform: Declare early_platform_cleanup() prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiler is not happy: CC drivers/base/platform.o drivers/base/platform.c:1557:20: warning: no previous prototype for ‘early_platform_cleanup’ [-Wmissing-prototypes] 1557 | void __weak __init early_platform_cleanup(void) { } | ^~~~~~~~~~~~~~~~~~~~~~ Declare early_platform_cleanup() prototype in the header to make everyone happy. Fixes: eecd37e105f0 ("drivers: Fix boot problem on SuperH") Cc: Guenter Roeck Reviewed-by: Guenter Roeck Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210331150525.59223-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 3f23f6e430bf..cd81e060863c 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -359,4 +359,7 @@ static inline int is_sh_early_platform_device(struct platform_device *pdev) } #endif /* CONFIG_SUPERH */ +/* For now only SuperH uses it */ +void early_platform_cleanup(void); + #endif /* _PLATFORM_DEVICE_H_ */ -- cgit v1.2.3 From d699ae4fc27496d01e8bc5ab2106bd79d1e7be92 Mon Sep 17 00:00:00 2001 From: Alexander Lochmann Date: Thu, 11 Feb 2021 10:51:55 +0100 Subject: ext4: updated locking documentation for journal_t Some members of transaction_t are allowed to be read without any lock being held if consistency doesn't matter. Based on LockDoc's findings, we extended the locking documentation of those members. Each one of them is marked with a short comment: "no lock for quick racy checks". Signed-off-by: Alexander Lochmann Signed-off-by: Horst Schirmeier Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/ad82c7a9-a624-4ed5-5ada-a6410c44c0b3@tu-dortmund.de Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 99d3cd051ac3..74710bb231a6 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -768,7 +768,8 @@ enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; struct journal_s { /** - * @j_flags: General journaling state flags [j_state_lock] + * @j_flags: General journaling state flags [j_state_lock, + * no lock for quick racy checks] */ unsigned long j_flags; @@ -808,7 +809,8 @@ struct journal_s /** * @j_barrier_count: * - * Number of processes waiting to create a barrier lock [j_state_lock] + * Number of processes waiting to create a barrier lock [j_state_lock, + * no lock for quick racy checks] */ int j_barrier_count; @@ -821,7 +823,8 @@ struct journal_s * @j_running_transaction: * * Transactions: The current running transaction... - * [j_state_lock] [caller holding open handle] + * [j_state_lock, no lock for quick racy checks] [caller holding + * open handle] */ transaction_t *j_running_transaction; @@ -1033,7 +1036,7 @@ struct journal_s * @j_commit_sequence: * * Sequence number of the most recently committed transaction - * [j_state_lock]. + * [j_state_lock, no lock for quick racy checks] */ tid_t j_commit_sequence; @@ -1041,7 +1044,7 @@ struct journal_s * @j_commit_request: * * Sequence number of the most recent transaction wanting commit - * [j_state_lock] + * [j_state_lock, no lock for quick racy checks] */ tid_t j_commit_request; -- cgit v1.2.3 From 3042b1b45c4106feff063932d4fd481c5009dbe1 Mon Sep 17 00:00:00 2001 From: Alexander Lochmann Date: Thu, 11 Feb 2021 18:14:10 +0100 Subject: Updated locking documentation for transaction_t Some members of transaction_t are allowed to be read without any lock being held if accessed from the correct context. We used LockDoc's findings to determine those members. Each member of them is marked with a short comment: "no lock needed for jbd2 thread". Signed-off-by: Alexander Lochmann Signed-off-by: Horst Schirmeier Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210211171410.17984-1-alexander.lochmann@tu-dortmund.de Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 74710bb231a6..b9aa85081a40 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -594,18 +594,22 @@ struct transaction_s */ unsigned long t_log_start; - /* Number of buffers on the t_buffers list [j_list_lock] */ + /* + * Number of buffers on the t_buffers list [j_list_lock, no locks + * needed for jbd2 thread] + */ int t_nr_buffers; /* * Doubly-linked circular list of all buffers reserved but not yet - * modified by this transaction [j_list_lock] + * modified by this transaction [j_list_lock, no locks needed fo + * jbd2 thread] */ struct journal_head *t_reserved_list; /* * Doubly-linked circular list of all metadata buffers owned by this - * transaction [j_list_lock] + * transaction [j_list_lock, no locks needed for jbd2 thread] */ struct journal_head *t_buffers; @@ -629,9 +633,11 @@ struct transaction_s struct journal_head *t_checkpoint_io_list; /* - * Doubly-linked circular list of metadata buffers being shadowed by log - * IO. The IO buffers on the iobuf list and the shadow buffers on this - * list match each other one for one at all times. [j_list_lock] + * Doubly-linked circular list of metadata buffers being + * shadowed by log IO. The IO buffers on the iobuf list and + * the shadow buffers on this list match each other one for + * one at all times. [j_list_lock, no locks needed for jbd2 + * thread] */ struct journal_head *t_shadow_list; -- cgit v1.2.3 From f06c609645ecd043c79380fac94145926603fb33 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Apr 2021 19:17:46 +0200 Subject: block: remove the unused RQF_ALLOCED flag Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bc6bc8383b43..158aefae1030 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -85,8 +85,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_ELVPRIV ((__force req_flags_t)(1 << 12)) /* account into disk and partition IO statistics */ #define RQF_IO_STAT ((__force req_flags_t)(1 << 13)) -/* request came from our alloc pool */ -#define RQF_ALLOCED ((__force req_flags_t)(1 << 14)) /* runtime pm request */ #define RQF_PM ((__force req_flags_t)(1 << 15)) /* on IO scheduler merge hash */ -- cgit v1.2.3 From e3baacf54275647a018ee35bff3bc775a8a2a01a Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 29 Mar 2021 15:57:33 +0300 Subject: regulator: helpers: Export helper voltage listing Some drivers need to translate voltage values to selectors prior regulator registration. Currently a regulator_desc based list_voltages helper is only exported for regulators using the linear_ranges. Export similar helper also for regulators using simple linear mapping. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/1200ef7a50c84327ada019b85f6527b4fc9b5ce1.1617020713.git.matti.vaittinen@fi.rohmeurope.com Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index d7c77ee370f3..39a540111645 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -543,4 +543,6 @@ void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data); int regulator_desc_list_voltage_linear_range(const struct regulator_desc *desc, unsigned int selector); +int regulator_desc_list_voltage_linear(const struct regulator_desc *desc, + unsigned int selector); #endif -- cgit v1.2.3 From fb8fee9efdcf084d9e31ba14cc4734d97e5dd972 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 29 Mar 2021 15:59:04 +0300 Subject: regulator: Add regmap helper for ramp-delay setting Quite a few regulator ICs do support setting ramp-delay by writing a value matching the delay to a ramp-delay register. Provide a simple helper for table-based delay setting. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/f101f1db564cf32cb58719c77af0b00d7236bb89.1617020713.git.matti.vaittinen@fi.rohmeurope.com Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 39a540111645..597ed117086f 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -373,6 +373,10 @@ struct regulator_desc { unsigned int pull_down_reg; unsigned int pull_down_mask; unsigned int pull_down_val_on; + unsigned int ramp_reg; + unsigned int ramp_mask; + const unsigned int *ramp_delay_table; + unsigned int n_ramp_values; unsigned int enable_time; @@ -535,6 +539,7 @@ int regulator_set_current_limit_regmap(struct regulator_dev *rdev, int min_uA, int max_uA); int regulator_get_current_limit_regmap(struct regulator_dev *rdev); void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data); +int regulator_set_ramp_delay_regmap(struct regulator_dev *rdev, int ramp_delay); /* * Helper functions intended to be used by regulator drivers prior registering -- cgit v1.2.3 From 4c4c0a89abd5c08e91df9bcce4ebcb3433bbb9bf Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 19 Feb 2021 08:18:12 +0200 Subject: net/mlx5: Pack mlx5_rl_entry structure mlx5_rl_entry structure is not properly packed as shown below. Due to this an array of size 9144 bytes allocated which is aligned to 16Kbytes. Hence, pack the structure and avoid the wastage. This offers 8Kbytes of saving per mlx5_core_dev struct. pahole -C mlx5_rl_entry drivers/net/ethernet/mellanox/mlx5/core/en_main.o Existing layout: struct mlx5_rl_entry { u8 rl_raw[48]; /* 0 48 */ u16 index; /* 48 2 */ /* XXX 6 bytes hole, try to pack */ u64 refcount; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ u16 uid; /* 64 2 */ u8 dedicated:1; /* 66: 0 1 */ /* size: 72, cachelines: 2, members: 5 */ /* sum members: 60, holes: 1, sum holes: 6 */ /* sum bitfield members: 1 bits (0 bytes) */ /* padding: 5 */ /* bit_padding: 7 bits */ /* last cacheline: 8 bytes */ }; After alignment: struct mlx5_rl_entry { u8 rl_raw[48]; /* 0 48 */ u64 refcount; /* 48 8 */ u16 index; /* 56 2 */ u16 uid; /* 58 2 */ u8 dedicated:1; /* 60: 0 1 */ /* size: 64, cachelines: 1, members: 5 */ /* padding: 3 */ /* bit_padding: 7 bits */ }; Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 23bb01d7c9b9..a9bd7e3bd554 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -517,8 +517,8 @@ struct mlx5_rate_limit { struct mlx5_rl_entry { u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)]; - u16 index; u64 refcount; + u16 index; u16 uid; u8 dedicated : 1; }; -- cgit v1.2.3 From 6b30b6d4d36c978e0ab0f22e85bf3c646732e98b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 19 Feb 2021 12:06:54 +0200 Subject: net/mlx5: Allocate rate limit table when rate is configured A device supports 128 rate limiters. A static table allocation consumes 8KB of memory even when rate is not configured. Instead, allocate the table when at least one rate is configured. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a9bd7e3bd554..baf38b5a2a8c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -530,6 +530,7 @@ struct mlx5_rl_table { u32 max_rate; u32 min_rate; struct mlx5_rl_entry *rl_entry; + u64 refcount; }; struct mlx5_core_roce { -- cgit v1.2.3 From 2daae89666ad253281bb3d6a027c00a702c02eff Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Thu, 1 Apr 2021 14:46:37 +0800 Subject: bpf, cgroup: Delete repeated struct bpf_prog declaration struct bpf_prog is declared twice. There is one declaration which is independent on the macro at 18th line. So the below one is not needed though. Remove the duplicate. Signed-off-by: Wan Jiabing Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20210401064637.993327-1-wanjiabing@vivo.com --- include/linux/bpf-cgroup.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 6a29fe11485d..8b77d08d4b47 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -458,7 +458,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); #else -struct bpf_prog; struct cgroup_bpf {}; static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} -- cgit v1.2.3 From 6ac4c6f887f5a8efb6a6952798c09a2562022966 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Thu, 1 Apr 2021 15:20:37 +0800 Subject: bpf: Remove repeated struct btf_type declaration struct btf_type is declared twice. One is declared at 35th line. The below one is not needed, hence remove the duplicate. Signed-off-by: Wan Jiabing Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20210401072037.995849-1-wanjiabing@vivo.com --- include/linux/bpf.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9fdd839b418c..ff8cd68c01b3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -928,7 +928,6 @@ struct bpf_link_primer { }; struct bpf_struct_ops_value; -struct btf_type; struct btf_member; #define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64 -- cgit v1.2.3 From fb05121fd6a20f0830ff2a4420c51af6ca4ac6e7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 19 Mar 2021 11:06:50 +0000 Subject: signal: Add unsafe_get_compat_sigset() In the same way as commit 14026b94ccfe ("signal: Add unsafe_put_compat_sigset()"), this time add unsafe_get_compat_sigset() macro which is the 'unsafe' version of get_compat_sigset() For the bigendian, use unsafe_get_user() directly to avoid intermediate copy through the stack. For the littleendian, use a straight unsafe_copy_from_user(). This commit adds the generic fallback for unsafe_copy_from_user(). Architectures wanting to use unsafe_get_compat_sigset() have to make sure they have their own unsafe_copy_from_user(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b05bf434ee13c76bc9df5f02653a10db5e7b54e5.1616151715.git.christophe.leroy@csgroup.eu --- include/linux/compat.h | 35 +++++++++++++++++++++++++++++++++++ include/linux/uaccess.h | 1 + 2 files changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 6e65be753603..5112c3e35782 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -465,6 +465,34 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, unsafe_put_user(__s->sig[0], &__c->sig[0], label); \ } \ } while (0) + +#define unsafe_get_compat_sigset(set, compat, label) do { \ + const compat_sigset_t __user *__c = compat; \ + compat_sigset_word hi, lo; \ + sigset_t *__s = set; \ + \ + switch (_NSIG_WORDS) { \ + case 4: \ + unsafe_get_user(lo, &__c->sig[7], label); \ + unsafe_get_user(hi, &__c->sig[6], label); \ + __s->sig[3] = hi | (((long)lo) << 32); \ + fallthrough; \ + case 3: \ + unsafe_get_user(lo, &__c->sig[5], label); \ + unsafe_get_user(hi, &__c->sig[4], label); \ + __s->sig[2] = hi | (((long)lo) << 32); \ + fallthrough; \ + case 2: \ + unsafe_get_user(lo, &__c->sig[3], label); \ + unsafe_get_user(hi, &__c->sig[2], label); \ + __s->sig[1] = hi | (((long)lo) << 32); \ + fallthrough; \ + case 1: \ + unsafe_get_user(lo, &__c->sig[1], label); \ + unsafe_get_user(hi, &__c->sig[0], label); \ + __s->sig[0] = hi | (((long)lo) << 32); \ + } \ +} while (0) #else #define unsafe_put_compat_sigset(compat, set, label) do { \ compat_sigset_t __user *__c = compat; \ @@ -472,6 +500,13 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, \ unsafe_copy_to_user(__c, __s, sizeof(*__c), label); \ } while (0) + +#define unsafe_get_compat_sigset(set, compat, label) do { \ + const compat_sigset_t __user *__c = compat; \ + sigset_t *__s = set; \ + \ + unsafe_copy_from_user(__s, __c, sizeof(*__c), label); \ +} while (0) #endif extern int compat_ptrace_request(struct task_struct *child, diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index c7c6e8b8344d..c05e903cef02 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -397,6 +397,7 @@ long strnlen_user_nofault(const void __user *unsafe_addr, long count); #define unsafe_get_user(x,p,e) unsafe_op_wrap(__get_user(x,p),e) #define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e) #define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e) +#define unsafe_copy_from_user(d,s,l,e) unsafe_op_wrap(__copy_from_user(d,s,l),e) static inline unsigned long user_access_save(void) { return 0UL; } static inline void user_access_restore(unsigned long flags) { } #endif -- cgit v1.2.3 From c3d5c2d96d69f2578d6fbf66e39cc2cf840d9812 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 4 Apr 2021 10:22:18 +0300 Subject: PCI/IOV: Add sysfs MSI-X vector assignment interface A typical cloud provider SR-IOV use case is to create many VFs for use by guest VMs. The VFs may not be assigned to a VM until a customer requests a VM of a certain size, e.g., number of CPUs. A VF may need MSI-X vectors proportional to the number of CPUs in the VM, but there is no standard way to change the number of MSI-X vectors supported by a VF. Some Mellanox ConnectX devices support dynamic assignment of MSI-X vectors to SR-IOV VFs. This can be done by the PF driver after VFs are enabled, and it can be done without affecting VFs that are already in use. The hardware supports a limited pool of MSI-X vectors that can be assigned to the PF or to individual VFs. This is device-specific behavior that requires support in the PF driver. Add a read-only "sriov_vf_total_msix" sysfs file for the PF and a writable "sriov_vf_msix_count" file for each VF. Management software may use these to learn how many MSI-X vectors are available and to dynamically assign them to VFs before the VFs are passed through to a VM. If the PF driver implements the ->sriov_get_vf_total_msix() callback, "sriov_vf_total_msix" contains the total number of MSI-X vectors available for distribution among VFs. If no driver is bound to the VF, writing "N" to "sriov_vf_msix_count" uses the PF driver ->sriov_set_msix_vec_count() callback to assign "N" MSI-X vectors to the VF. When a VF driver subsequently reads the MSI-X Message Control register, it will see the new Table Size "N". Link: https://lore.kernel.org/linux-pci/20210314124256.70253-2-leon@kernel.org Acked-by: Bjorn Helgaas Signed-off-by: Leon Romanovsky --- include/linux/pci.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 86c799c97b77..9b575a676888 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -856,6 +856,12 @@ struct module; * e.g. drivers/net/e100.c. * @sriov_configure: Optional driver callback to allow configuration of * number of VFs to enable via sysfs "sriov_numvfs" file. + * @sriov_set_msix_vec_count: PF Driver callback to change number of MSI-X + * vectors on a VF. Triggered via sysfs "sriov_vf_msix_count". + * This will change MSI-X Table Size in the VF Message Control + * registers. + * @sriov_get_vf_total_msix: PF driver callback to get the total number of + * MSI-X vectors available for distribution to the VFs. * @err_handler: See Documentation/PCI/pci-error-recovery.rst * @groups: Sysfs attribute groups. * @driver: Driver model structure. @@ -871,6 +877,8 @@ struct pci_driver { int (*resume)(struct pci_dev *dev); /* Device woken up */ void (*shutdown)(struct pci_dev *dev); int (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */ + int (*sriov_set_msix_vec_count)(struct pci_dev *vf, int msix_vec_count); /* On PF */ + u32 (*sriov_get_vf_total_msix)(struct pci_dev *pf); const struct pci_error_handlers *err_handler; const struct attribute_group **groups; struct device_driver driver; -- cgit v1.2.3 From 0b989c1e37053196676b2238f82195bd5a339d58 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 14 Mar 2021 14:42:54 +0200 Subject: net/mlx5: Add dynamic MSI-X capabilities bits These new fields declare the number of MSI-X vectors that is possible to allocate on the VF through PF configuration. Value must be in range defined by min_dynamic_vf_msix_table_size and max_dynamic_vf_msix_table_size. The driver should continue to query its MSI-X table through PCI configuration header. Link: https://lore.kernel.org/linux-pci/20210314124256.70253-3-leon@kernel.org Acked-by: Bjorn Helgaas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3ee7a86f39e4..432290b58a0b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1683,7 +1683,16 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_6e0[0x10]; u8 sf_base_id[0x10]; - u8 reserved_at_700[0x80]; + u8 reserved_at_700[0x8]; + u8 num_total_dynamic_vf_msix[0x18]; + u8 reserved_at_720[0x14]; + u8 dynamic_msix_table_size[0xc]; + u8 reserved_at_740[0xc]; + u8 min_dynamic_vf_msix_table_size[0x4]; + u8 reserved_at_750[0x4]; + u8 max_dynamic_vf_msix_table_size[0xc]; + + u8 reserved_at_760[0x20]; u8 vhca_tunnel_commands[0x40]; u8 reserved_at_7c0[0x40]; }; -- cgit v1.2.3 From a72232eabdfcfe365a05a3eb392288b78d25a5ca Mon Sep 17 00:00:00 2001 From: Vipin Sharma Date: Mon, 29 Mar 2021 21:42:04 -0700 Subject: cgroup: Add misc cgroup controller The Miscellaneous cgroup provides the resource limiting and tracking mechanism for the scalar resources which cannot be abstracted like the other cgroup resources. Controller is enabled by the CONFIG_CGROUP_MISC config option. A resource can be added to the controller via enum misc_res_type{} in the include/linux/misc_cgroup.h file and the corresponding name via misc_res_name[] in the kernel/cgroup/misc.c file. Provider of the resource must set its capacity prior to using the resource by calling misc_cg_set_capacity(). Once a capacity is set then the resource usage can be updated using charge and uncharge APIs. All of the APIs to interact with misc controller are in include/linux/misc_cgroup.h. Miscellaneous controller provides 3 interface files. If two misc resources (res_a and res_b) are registered then: misc.capacity A read-only flat-keyed file shown only in the root cgroup. It shows miscellaneous scalar resources available on the platform along with their quantities:: $ cat misc.capacity res_a 50 res_b 10 misc.current A read-only flat-keyed file shown in the non-root cgroups. It shows the current usage of the resources in the cgroup and its children:: $ cat misc.current res_a 3 res_b 0 misc.max A read-write flat-keyed file shown in the non root cgroups. Allowed maximum usage of the resources in the cgroup and its children.:: $ cat misc.max res_a max res_b 4 Limit can be set by:: # echo res_a 1 > misc.max Limit can be set to max by:: # echo res_a max > misc.max Limits can be set more than the capacity value in the misc.capacity file. Signed-off-by: Vipin Sharma Reviewed-by: David Rientjes Signed-off-by: Tejun Heo --- include/linux/cgroup_subsys.h | 4 ++ include/linux/misc_cgroup.h | 126 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 include/linux/misc_cgroup.h (limited to 'include/linux') diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index acb77dcff3b4..445235487230 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -61,6 +61,10 @@ SUBSYS(pids) SUBSYS(rdma) #endif +#if IS_ENABLED(CONFIG_CGROUP_MISC) +SUBSYS(misc) +#endif + /* * The following subsystems are not supported on the default hierarchy. */ diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h new file mode 100644 index 000000000000..1195d36558b4 --- /dev/null +++ b/include/linux/misc_cgroup.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Miscellaneous cgroup controller. + * + * Copyright 2020 Google LLC + * Author: Vipin Sharma + */ +#ifndef _MISC_CGROUP_H_ +#define _MISC_CGROUP_H_ + +/** + * Types of misc cgroup entries supported by the host. + */ +enum misc_res_type { + MISC_CG_RES_TYPES +}; + +struct misc_cg; + +#ifdef CONFIG_CGROUP_MISC + +#include + +/** + * struct misc_res: Per cgroup per misc type resource + * @max: Maximum limit on the resource. + * @usage: Current usage of the resource. + * @failed: True if charged failed for the resource in a cgroup. + */ +struct misc_res { + unsigned long max; + atomic_long_t usage; + bool failed; +}; + +/** + * struct misc_cg - Miscellaneous controller's cgroup structure. + * @css: cgroup subsys state object. + * @res: Array of misc resources usage in the cgroup. + */ +struct misc_cg { + struct cgroup_subsys_state css; + struct misc_res res[MISC_CG_RES_TYPES]; +}; + +unsigned long misc_cg_res_total_usage(enum misc_res_type type); +int misc_cg_set_capacity(enum misc_res_type type, unsigned long capacity); +int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, + unsigned long amount); +void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, + unsigned long amount); + +/** + * css_misc() - Get misc cgroup from the css. + * @css: cgroup subsys state object. + * + * Context: Any context. + * Return: + * * %NULL - If @css is null. + * * struct misc_cg* - misc cgroup pointer of the passed css. + */ +static inline struct misc_cg *css_misc(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct misc_cg, css) : NULL; +} + +/* + * get_current_misc_cg() - Find and get the misc cgroup of the current task. + * + * Returned cgroup has its ref count increased by 1. Caller must call + * put_misc_cg() to return the reference. + * + * Return: Misc cgroup to which the current task belongs to. + */ +static inline struct misc_cg *get_current_misc_cg(void) +{ + return css_misc(task_get_css(current, misc_cgrp_id)); +} + +/* + * put_misc_cg() - Put the misc cgroup and reduce its ref count. + * @cg - cgroup to put. + */ +static inline void put_misc_cg(struct misc_cg *cg) +{ + if (cg) + css_put(&cg->css); +} + +#else /* !CONFIG_CGROUP_MISC */ + +unsigned long misc_cg_res_total_usage(enum misc_res_type type) +{ + return 0; +} + +static inline int misc_cg_set_capacity(enum misc_res_type type, + unsigned long capacity) +{ + return 0; +} + +static inline int misc_cg_try_charge(enum misc_res_type type, + struct misc_cg *cg, + unsigned long amount) +{ + return 0; +} + +static inline void misc_cg_uncharge(enum misc_res_type type, + struct misc_cg *cg, + unsigned long amount) +{ +} + +static inline struct misc_cg *get_current_misc_cg(void) +{ + return NULL; +} + +static inline void put_misc_cg(struct misc_cg *cg) +{ +} + +#endif /* CONFIG_CGROUP_MISC */ +#endif /* _MISC_CGROUP_H_ */ -- cgit v1.2.3 From 7aef27f0b2a8a58c28578d3e0caf3f27e1a1c39c Mon Sep 17 00:00:00 2001 From: Vipin Sharma Date: Mon, 29 Mar 2021 21:42:06 -0700 Subject: svm/sev: Register SEV and SEV-ES ASIDs to the misc controller Secure Encrypted Virtualization (SEV) and Secure Encrypted Virtualization - Encrypted State (SEV-ES) ASIDs are used to encrypt KVMs on AMD platform. These ASIDs are available in the limited quantities on a host. Register their capacity and usage to the misc controller for tracking via cgroups. Signed-off-by: Vipin Sharma Reviewed-by: David Rientjes Signed-off-by: Tejun Heo --- include/linux/misc_cgroup.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index 1195d36558b4..c5af592481c0 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -12,6 +12,12 @@ * Types of misc cgroup entries supported by the host. */ enum misc_res_type { +#ifdef CONFIG_KVM_AMD_SEV + /* AMD SEV ASIDs resource */ + MISC_CG_RES_SEV, + /* AMD SEV-ES ASIDs resource */ + MISC_CG_RES_SEV_ES, +#endif MISC_CG_RES_TYPES }; -- cgit v1.2.3 From dd3f4e4972f146a685930ccfed95e4e1d13d952a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 4 Apr 2021 18:29:37 -0400 Subject: cgroup: misc: mark dummy misc_cg_res_total_usage() static inline The dummy implementation was missing static inline triggering the following compile warning on llvm. In file included from arch/x86/kvm/svm/sev.c:17: >> include/linux/misc_cgroup.h:98:15: warning: no previous prototype for function 'misc_cg_res_total_usage' [-Wmissing-prototypes] unsigned long misc_cg_res_total_usage(enum misc_res_type type) ^ include/linux/misc_cgroup.h:98:1: note: declare 'static' if the function is not intended to be used outside of this translation unit unsigned long misc_cg_res_total_usage(enum misc_res_type type) ^ static 1 warning generated. Add it. Signed-off-by: Tejun Heo Reported-by: kernel test robot --- include/linux/misc_cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index c5af592481c0..da2367e2ac1e 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -95,7 +95,7 @@ static inline void put_misc_cg(struct misc_cg *cg) #else /* !CONFIG_CGROUP_MISC */ -unsigned long misc_cg_res_total_usage(enum misc_res_type type) +static inline unsigned long misc_cg_res_total_usage(enum misc_res_type type) { return 0; } -- cgit v1.2.3 From 25faa935f9e0bd5aba34a820defb982d43bb4a77 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 9 Feb 2021 17:58:43 +0800 Subject: power: supply: core: Use true and false for bool variable Fix the following coccicheck warning: ./include/linux/power_supply.h:507:9-10: WARNING: return of 0/1 in function 'power_supply_is_watt_property' with return type bool. ./include/linux/power_supply.h:479:9-10: WARNING: return of 0/1 in function 'power_supply_is_amp_property' with return type bool. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 6e776be5bfa0..174a534fd907 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -482,12 +482,12 @@ static inline bool power_supply_is_amp_property(enum power_supply_property psp) case POWER_SUPPLY_PROP_CURRENT_NOW: case POWER_SUPPLY_PROP_CURRENT_AVG: case POWER_SUPPLY_PROP_CURRENT_BOOT: - return 1; + return true; default: break; } - return 0; + return false; } static inline bool power_supply_is_watt_property(enum power_supply_property psp) @@ -510,12 +510,12 @@ static inline bool power_supply_is_watt_property(enum power_supply_property psp) case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE: case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX: case POWER_SUPPLY_PROP_POWER_NOW: - return 1; + return true; default: break; } - return 0; + return false; } #ifdef CONFIG_POWER_SUPPLY_HWMON -- cgit v1.2.3 From e588fead04ec51ad9ede7010676de19dcaa50b71 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 29 Mar 2021 18:12:06 +0300 Subject: software node: Introduce SOFTWARE_NODE_REFERENCE() helper macro This is useful to assign software node reference with arguments in a common way. Moreover, we have already couple of users that may be converted. And by the fact, one of them is moved right here to use the helper. Tested-by: Daniel Scally Reviewed-by: Daniel Scally Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210329151207.36619-5-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/property.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index dd4687b56239..0d876316e61d 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -254,6 +254,13 @@ struct software_node_ref_args { u64 args[NR_FWNODE_REFERENCE_ARGS]; }; +#define SOFTWARE_NODE_REFERENCE(_ref_, ...) \ +(const struct software_node_ref_args) { \ + .node = _ref_, \ + .nargs = ARRAY_SIZE(((u64[]){ 0, ##__VA_ARGS__ })) - 1, \ + .args = { __VA_ARGS__ }, \ +} + /** * struct property_entry - "Built-in" device property representation. * @name: Name of the property. @@ -362,11 +369,7 @@ struct property_entry { .name = _name_, \ .length = sizeof(struct software_node_ref_args), \ .type = DEV_PROP_REF, \ - { .pointer = &(const struct software_node_ref_args) { \ - .node = _ref_, \ - .nargs = ARRAY_SIZE(((u64[]){ 0, ##__VA_ARGS__ })) - 1, \ - .args = { __VA_ARGS__ }, \ - } }, \ + { .pointer = &SOFTWARE_NODE_REFERENCE(_ref_, ##__VA_ARGS__), }, \ } struct property_entry * -- cgit v1.2.3 From d737e5d418706abf32f6de68c3e09958516d422f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Feb 2021 16:04:15 -0500 Subject: SUNRPC: Set TCP_CORK until the transmit queue is empty When we have multiple RPC requests queued up, it makes sense to set the TCP_CORK option while the transmit queue is non-empty. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d2e97ee802af..d81fe8b364d0 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -247,6 +247,7 @@ struct rpc_xprt { struct rpc_task * snd_task; /* Task blocked in send */ struct list_head xmit_queue; /* Send queue */ + atomic_long_t xmit_queuelen; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) -- cgit v1.2.3 From aca01415e076aa96cca0f801f4420ee5c10c660d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bence=20Cs=C3=B3k=C3=A1s?= Date: Wed, 31 Mar 2021 19:19:20 +0000 Subject: i2c: Add I2C_AQ_NO_REP_START adapter quirk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This quirk signifies that the adapter cannot do a repeated START, it always issues a STOP condition after transfers. Suggested-by: Wolfram Sang Signed-off-by: Bence Csókás Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 56622658b215..a670ae129f4b 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -687,6 +687,8 @@ struct i2c_adapter_quirks { #define I2C_AQ_NO_ZERO_LEN_READ BIT(5) #define I2C_AQ_NO_ZERO_LEN_WRITE BIT(6) #define I2C_AQ_NO_ZERO_LEN (I2C_AQ_NO_ZERO_LEN_READ | I2C_AQ_NO_ZERO_LEN_WRITE) +/* adapter cannot do repeated START */ +#define I2C_AQ_NO_REP_START BIT(7) /* * i2c_adapter is the structure used to identify a physical i2c bus along -- cgit v1.2.3 From eeb85a14ee3494febb85ccfbee0772eda0823b13 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 5 Apr 2021 00:12:23 -0700 Subject: net: Allow to specify ifindex when device is moved to another namespace Currently, we can specify ifindex on link creation. This change allows to specify ifindex when a device is moved to another network namespace. Even now, a device ifindex can be changed if there is another device with the same ifindex in the target namespace. So this change doesn't introduce completely new behavior, it adds more control to the process. CRIU users want to restore containers with pre-created network devices. A user will provide network devices and instructions where they have to be restored, then CRIU will restore network namespaces and move devices into them. The problem is that devices have to be restored with the same indexes that they have before C/R. Cc: Alexander Mikhalitsyn Suggested-by: Christian Brauner Signed-off-by: Andrei Vagin Reviewed-by: Christian Brauner Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f57b70fc251f..b482236c0e99 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4026,7 +4026,8 @@ void __dev_notify_flags(struct net_device *, unsigned int old_flags, int dev_change_name(struct net_device *, const char *); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); -int dev_change_net_namespace(struct net_device *, struct net *, const char *); +int dev_change_net_namespace(struct net_device *dev, struct net *net, + const char *pat, int new_ifindex); int __dev_set_mtu(struct net_device *, int); int dev_validate_mtu(struct net_device *dev, int mtu, struct netlink_ext_ack *extack); -- cgit v1.2.3 From 237c609f8744a8d5415f40a7ee731957934b0eef Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 1 Apr 2021 16:11:04 +0200 Subject: netfilter: nfnetlink: add and use nfnetlink_broadcast This removes the only reference of net->nfnl outside of the nfnetlink module. This allows to move net->nfnl to net_generic infra. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 791d516e1e88..d4c14257db5d 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -51,6 +51,8 @@ int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags); int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid); +void nfnetlink_broadcast(struct net *net, struct sk_buff *skb, __u32 portid, + __u32 group, gfp_t allocation); static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type) { -- cgit v1.2.3 From d556435156b7970b8ce61b355df558a5168927cc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Mar 2021 11:21:38 +0100 Subject: jbd2: avoid -Wempty-body warnings Building with 'make W=1' shows a harmless -Wempty-body warning: fs/jbd2/recovery.c: In function 'fc_do_one_pass': fs/jbd2/recovery.c:267:75: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body] 267 | jbd_debug(3, "Fast commit replay failed, err = %d\n", err); | ^ Change the empty dprintk() macros to no_printk(), which avoids this warning and adds format string checking. Signed-off-by: Arnd Bergmann Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20210322102152.95684-1-arnd@kernel.org Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b9aa85081a40..db0e1920cb12 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -61,7 +61,7 @@ void __jbd2_debug(int level, const char *file, const char *func, #define jbd_debug(n, fmt, a...) \ __jbd2_debug((n), __FILE__, __func__, __LINE__, (fmt), ##a) #else -#define jbd_debug(n, fmt, a...) /**/ +#define jbd_debug(n, fmt, a...) no_printk(fmt, ##a) #endif extern void *jbd2_alloc(size_t size, gfp_t flags); -- cgit v1.2.3 From f4022062e5417ab7228e95aec1a8687059a19db7 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 15 Mar 2021 16:56:46 +0000 Subject: soundwire: add static port mapping support Some of the SoundWire device ports are statically mapped to Controller ports during design, however there is no way to expose this information to the controller. Controllers like Qualcomm ones use this info to setup static bandwidth parameters for those ports. A generic port allocation is not possible in this cases! So this patch adds a new member m_port_map to struct sdw_slave to expose this static map. Signed-off-by: Srinivas Kandagatla Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210315165650.13392-2-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 5ff9a8f37e91..ced07f8fde87 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -642,6 +642,7 @@ struct sdw_slave_ops { * @debugfs: Slave debugfs * @node: node for bus list * @port_ready: Port ready completion flag for each Slave port + * @m_port_map: static Master port map for each Slave port * @dev_num: Current Device Number, values can be 0 or dev_num_sticky * @dev_num_sticky: one-time static Device Number assigned by Bus * @probed: boolean tracking driver state @@ -673,6 +674,7 @@ struct sdw_slave { #endif struct list_head node; struct completion port_ready[SDW_MAX_PORTS]; + unsigned int m_port_map[SDW_MAX_PORTS]; enum sdw_clk_stop_mode curr_clk_stop_mode; u16 dev_num; u16 dev_num_sticky; -- cgit v1.2.3 From 5befc7c26e5a98cd49789fb1beb52c62bd472dba Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 24 Mar 2021 16:18:05 -0700 Subject: nvme: implement non-mdts command limits Commands that access LBA contents without a data transfer between the host historically have not had a spec defined upper limit. The driver set the queue constraints for such commands to the max data transfer size just to be safe, but this artificial constraint frequently limits devices below their capabilities. The NVMe Workgroup ratified TP4040 defines how a controller may advertise their non-MDTS limits. Use these if provided and default to the current constraints if not. Since the Dataset Management command limits are defined in logical blocks, but without a namespace to tell us the logical block size, the code defaults to the safe 512b size. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b08787cd0881..edcbd60b88b9 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -405,6 +405,16 @@ struct nvme_id_ctrl_zns { __u8 rsvd1[4095]; }; +struct nvme_id_ctrl_nvm { + __u8 vsl; + __u8 wzsl; + __u8 wusl; + __u8 dmrl; + __le32 dmrsl; + __le64 dmsl; + __u8 rsvd16[4080]; +}; + enum { NVME_ID_CNS_NS = 0x00, NVME_ID_CNS_CTRL = 0x01, -- cgit v1.2.3 From 6dd6954668035aa62b8c73d219d8c0adf6dc866a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 6 Apr 2021 14:45:03 +0200 Subject: dt-bindings:iio:dac: update microchip,mcp4725.yaml reference Changeset 6ced946a4bba ("dt-bindings:iio:dac:microchip,mcp4725 yaml conversion") renamed: Documentation/devicetree/bindings/iio/dac/mcp4725.txt to: Documentation/devicetree/bindings/iio/dac/microchip,mcp4725.yaml. Update its cross-reference accordingly. Fixes: 6ced946a4bba ("dt-bindings:iio:dac:microchip,mcp4725 yaml conversion") Signed-off-by: Mauro Carvalho Chehab --- include/linux/iio/dac/mcp4725.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/dac/mcp4725.h b/include/linux/iio/dac/mcp4725.h index e9801c8d49c0..1f7e53c506b6 100644 --- a/include/linux/iio/dac/mcp4725.h +++ b/include/linux/iio/dac/mcp4725.h @@ -15,7 +15,7 @@ * @vref_buffered: Controls buffering of the external reference voltage. * * Vref related settings are available only on MCP4756. See - * Documentation/devicetree/bindings/iio/dac/mcp4725.txt for more information. + * Documentation/devicetree/bindings/iio/dac/microchip,mcp4725.yaml for more information. */ struct mcp4725_platform_data { bool use_vref; -- cgit v1.2.3 From ce288e0535688cc3475a3c3d4d96624514c3550c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 Mar 2021 09:29:59 +0200 Subject: block: remove BLK_BOUNCE_ISA support Remove the BLK_BOUNCE_ISA support now that all users are gone. Signed-off-by: Christoph Hellwig Acked-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20210331073001.46776-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bc6bc8383b43..0dbb72ea3735 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -436,11 +436,6 @@ struct request_queue { */ int id; - /* - * queue needs bounce pages for pages above this limit - */ - gfp_t bounce_gfp; - spinlock_t queue_lock; /* @@ -847,7 +842,6 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn; * * BLK_BOUNCE_HIGH : bounce all highmem pages * BLK_BOUNCE_ANY : don't bounce anything - * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary */ #if BITS_PER_LONG == 32 @@ -856,7 +850,6 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn; #define BLK_BOUNCE_HIGH -1ULL #endif #define BLK_BOUNCE_ANY (-1ULL) -#define BLK_BOUNCE_ISA (DMA_BIT_MASK(24)) /* * default timeout for SG_IO if none specified -- cgit v1.2.3 From 9bb33f24abbd0fa2fadad01ec75438d7cc239189 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 Mar 2021 09:30:00 +0200 Subject: block: refactor the bounce buffering code Get rid of all the PFN arithmetics and just use an enum for the two remaining options, and use PageHighMem for the actual bounce decision. Add a fast path to entirely avoid the call for the common case of a queue not using the legacy bouncing code. Signed-off-by: Christoph Hellwig Acked-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20210331073001.46776-8-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0dbb72ea3735..55cc8b96c844 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -313,8 +313,17 @@ enum blk_zoned_model { BLK_ZONED_HM, /* Host-managed zoned block device */ }; +/* + * BLK_BOUNCE_NONE: never bounce (default) + * BLK_BOUNCE_HIGH: bounce all highmem pages + */ +enum blk_bounce { + BLK_BOUNCE_NONE, + BLK_BOUNCE_HIGH, +}; + struct queue_limits { - unsigned long bounce_pfn; + enum blk_bounce bounce; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; @@ -835,22 +844,6 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) return q->nr_requests; } -extern unsigned long blk_max_low_pfn, blk_max_pfn; - -/* - * standard bounce addresses: - * - * BLK_BOUNCE_HIGH : bounce all highmem pages - * BLK_BOUNCE_ANY : don't bounce anything - */ - -#if BITS_PER_LONG == 32 -#define BLK_BOUNCE_HIGH ((u64)blk_max_low_pfn << PAGE_SHIFT) -#else -#define BLK_BOUNCE_HIGH -1ULL -#endif -#define BLK_BOUNCE_ANY (-1ULL) - /* * default timeout for SG_IO if none specified */ @@ -1134,7 +1127,7 @@ extern void blk_abort_request(struct request *); * Access functions for manipulating queue properties */ extern void blk_cleanup_queue(struct request_queue *); -extern void blk_queue_bounce_limit(struct request_queue *, u64); +void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce limit); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_segments(struct request_queue *, unsigned short); -- cgit v1.2.3 From 393bb12e00580aaa23356504eed38d8f5571153a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 Mar 2021 09:30:01 +0200 Subject: block: stop calling blk_queue_bounce for passthrough requests Instead of overloading the passthrough fast path with the deprecated block layer bounce buffering let the users that combine an old undermaintained driver with a highmem system pay the price by always falling back to copies in that case. Signed-off-by: Christoph Hellwig Acked-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20210331073001.46776-9-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 55cc8b96c844..d5d320da51f8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -909,7 +909,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, extern void blk_rq_unprep_clone(struct request *rq); extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); -extern int blk_rq_append_bio(struct request *rq, struct bio **bio); +int blk_rq_append_bio(struct request *rq, struct bio *bio); extern void blk_queue_split(struct bio **); extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, -- cgit v1.2.3 From 0bfc6a4ea63c2adac71a824397ef48f28dbc5e47 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Mar 2021 09:53:05 -0600 Subject: vfio: Split creation of a vfio_device into init and register ops This makes the struct vfio_device part of the public interface so it can be used with container_of and so forth, as is typical for a Linux subystem. This is the first step to bring some type-safety to the vfio interface by allowing the replacement of 'void *' and 'struct device *' inputs with a simple and clear 'struct vfio_device *' For now the self-allocating vfio_add_group_dev() interface is kept so each user can be updated as a separate patch. The expected usage pattern is driver core probe() function: my_device = kzalloc(sizeof(*mydevice)); vfio_init_group_dev(&my_device->vdev, dev, ops, mydevice); /* other driver specific prep */ vfio_register_group_dev(&my_device->vdev); dev_set_drvdata(dev, my_device); driver core remove() function: my_device = dev_get_drvdata(dev); vfio_unregister_group_dev(&my_device->vdev); /* other driver specific tear down */ kfree(my_device); Allowing the driver to be able to use the drvdata and vfio_device to go to/from its own data. The pattern also makes it clear that vfio_register_group_dev() must be last in the sequence, as once it is called the core code can immediately start calling ops. The init/register gap is provided to allow for the driver to do setup before ops can be called and thus avoid races. Reviewed-by: Christoph Hellwig Reviewed-by: Liu Yi L Reviewed-by: Cornelia Huck Reviewed-by: Max Gurtovoy Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Signed-off-by: Jason Gunthorpe Message-Id: <3-v3-225de1400dfc+4e074-vfio1_jgg@nvidia.com> Signed-off-by: Alex Williamson --- include/linux/vfio.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index b7e18bde5aa8..ad8b579d67d3 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -15,6 +15,18 @@ #include #include +struct vfio_device { + struct device *dev; + const struct vfio_device_ops *ops; + struct vfio_group *group; + + /* Members below here are private, not for driver use */ + refcount_t refcount; + struct completion comp; + struct list_head group_next; + void *device_data; +}; + /** * struct vfio_device_ops - VFIO bus driver device callbacks * @@ -48,11 +60,15 @@ struct vfio_device_ops { extern struct iommu_group *vfio_iommu_group_get(struct device *dev); extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev); +void vfio_init_group_dev(struct vfio_device *device, struct device *dev, + const struct vfio_device_ops *ops, void *device_data); +int vfio_register_group_dev(struct vfio_device *device); extern int vfio_add_group_dev(struct device *dev, const struct vfio_device_ops *ops, void *device_data); extern void *vfio_del_group_dev(struct device *dev); +void vfio_unregister_group_dev(struct vfio_device *device); extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); extern void vfio_device_put(struct vfio_device *device); extern void *vfio_device_data(struct vfio_device *device); -- cgit v1.2.3 From 1ae1b20f6f2c67659c963e5fe58f9b4a47df9f12 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Mar 2021 09:53:07 -0600 Subject: vfio/mdev: Use vfio_init/register/unregister_group_dev mdev gets little benefit because it doesn't actually do anything, however it is the last user, so move the vfio_init/register/unregister_group_dev() code here for now. Reviewed-by: Christoph Hellwig Reviewed-by: Liu Yi L Reviewed-by: Kevin Tian Reviewed-by: Cornelia Huck Signed-off-by: Jason Gunthorpe Message-Id: <10-v3-225de1400dfc+4e074-vfio1_jgg@nvidia.com> Signed-off-by: Alex Williamson --- include/linux/vfio.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index ad8b579d67d3..4995faf51efe 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -63,11 +63,6 @@ extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev); void vfio_init_group_dev(struct vfio_device *device, struct device *dev, const struct vfio_device_ops *ops, void *device_data); int vfio_register_group_dev(struct vfio_device *device); -extern int vfio_add_group_dev(struct device *dev, - const struct vfio_device_ops *ops, - void *device_data); - -extern void *vfio_del_group_dev(struct device *dev); void vfio_unregister_group_dev(struct vfio_device *device); extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); extern void vfio_device_put(struct vfio_device *device); -- cgit v1.2.3 From 6df62c5b05f4ad6876815ea8b8775905a090224a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Mar 2021 09:53:08 -0600 Subject: vfio: Make vfio_device_ops pass a 'struct vfio_device *' instead of 'void *' This is the standard kernel pattern, the ops associated with a struct get the struct pointer in for typesafety. The expected design is to use container_of to cleanly go from the subsystem level type to the driver level type without having any type erasure in a void *. Reviewed-by: Dan Williams Reviewed-by: Christoph Hellwig Reviewed-by: Cornelia Huck Signed-off-by: Jason Gunthorpe Message-Id: <12-v3-225de1400dfc+4e074-vfio1_jgg@nvidia.com> Signed-off-by: Alex Williamson --- include/linux/vfio.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 4995faf51efe..784c34c0a287 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -44,17 +44,17 @@ struct vfio_device { */ struct vfio_device_ops { char *name; - int (*open)(void *device_data); - void (*release)(void *device_data); - ssize_t (*read)(void *device_data, char __user *buf, + int (*open)(struct vfio_device *vdev); + void (*release)(struct vfio_device *vdev); + ssize_t (*read)(struct vfio_device *vdev, char __user *buf, size_t count, loff_t *ppos); - ssize_t (*write)(void *device_data, const char __user *buf, + ssize_t (*write)(struct vfio_device *vdev, const char __user *buf, size_t count, loff_t *size); - long (*ioctl)(void *device_data, unsigned int cmd, + long (*ioctl)(struct vfio_device *vdev, unsigned int cmd, unsigned long arg); - int (*mmap)(void *device_data, struct vm_area_struct *vma); - void (*request)(void *device_data, unsigned int count); - int (*match)(void *device_data, char *buf); + int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); + void (*request)(struct vfio_device *vdev, unsigned int count); + int (*match)(struct vfio_device *vdev, char *buf); }; extern struct iommu_group *vfio_iommu_group_get(struct device *dev); -- cgit v1.2.3 From 1e04ec14204dec28131855d8dd160c3d55d12797 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Mar 2021 09:53:08 -0600 Subject: vfio: Remove device_data from the vfio bus driver API There are no longer any users, so it can go away. Everything is using container_of now. Reviewed-by: Christoph Hellwig Reviewed-by: Kevin Tian Reviewed-by: Cornelia Huck Reviewed-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe Message-Id: <14-v3-225de1400dfc+4e074-vfio1_jgg@nvidia.com> Signed-off-by: Alex Williamson --- include/linux/vfio.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 784c34c0a287..a2c5b30e1763 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -24,7 +24,6 @@ struct vfio_device { refcount_t refcount; struct completion comp; struct list_head group_next; - void *device_data; }; /** @@ -61,12 +60,11 @@ extern struct iommu_group *vfio_iommu_group_get(struct device *dev); extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev); void vfio_init_group_dev(struct vfio_device *device, struct device *dev, - const struct vfio_device_ops *ops, void *device_data); + const struct vfio_device_ops *ops); int vfio_register_group_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device); extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); extern void vfio_device_put(struct vfio_device *device); -extern void *vfio_device_data(struct vfio_device *device); /* events for the backend driver notify callback */ enum vfio_iommu_notify_type { -- cgit v1.2.3 From dc67dac617e3586ec2db49f3c4fde1d6ac7afe14 Mon Sep 17 00:00:00 2001 From: Aditya Srivastava Date: Sun, 14 Mar 2021 12:58:28 +0530 Subject: soc: fsl: guts: fix comment syntax in file The opening comment mark '/**' is used for kernel-doc comments. There are certain comments in include/linux/fsl/guts.h which follows this syntax, but the content inside does not comply with kernel-doc. E.g., opening comment for "Freecale 85xx and 86xx Global Utilties register set" follows kernel-doc syntax(i.e., '/**'), but the content inside does not comply with any kernel-doc specification (function, struct, etc). This causes unwelcomed warning from kernel-doc: "warning: expecting prototype for Freecale 85xx and 86xx Global Utilties register set(). Prototype was for __FSL_GUTS_H__() instead" Replace all such comment occurrences with general comment format, i.e. '/*' to pervent kernel-doc from parsing these. Signed-off-by: Aditya Srivastava Reviewed-by: Randy Dunlap Signed-off-by: Li Yang --- include/linux/fsl/guts.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsl/guts.h b/include/linux/fsl/guts.h index 0ac27b233f12..fdb55ca47a4f 100644 --- a/include/linux/fsl/guts.h +++ b/include/linux/fsl/guts.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -/** +/* * Freecale 85xx and 86xx Global Utilties register set * * Authors: Jeff Brown @@ -14,7 +14,7 @@ #include #include -/** +/* * Global Utility Registers. * * Not all registers defined in this structure are available on all chips, so -- cgit v1.2.3 From 2cd87a7b293dedbbaea3b6739f95d428a2d9890d Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 5 Apr 2021 17:43:03 +0100 Subject: coresight: core: Add support for dedicated percpu sinks Add support for dedicated sinks that are bound to individual CPUs. (e.g, TRBE). To allow quicker access to the sink for a given CPU bound source, keep a percpu array of the sink devices. Also, add support for building a path to the CPU local sink from the ETM. This adds a new percpu sink type CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM. This new sink type is exclusively available and can only work with percpu source type device CORESIGHT_DEV_SUBTYPE_SOURCE_PROC. This defines a percpu structure that accommodates a single coresight_device which can be used to store an initialized instance from a sink driver. As these sinks are exclusively linked and dependent on corresponding percpu sources devices, they should also be the default sink device during a perf session. Outwards device connections are scanned while establishing paths between a source and a sink device. But such connections are not present for certain percpu source and sink devices which are exclusively linked and dependent. Build the path directly and skip connection scanning for such devices. Cc: Mathieu Poirier Cc: Mike Leach Cc: Suzuki K Poulose Tested-by: Suzuki K Poulose Reviewed-by: Mike Leach Signed-off-by: Anshuman Khandual [Moved the set/get percpu sink APIs from TRBE patch to here Fixed build break on arm32] Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20210405164307.1720226-17-suzuki.poulose@arm.com Signed-off-by: Mathieu Poirier --- include/linux/coresight.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 976ec2697610..85008a65e21f 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -50,6 +50,7 @@ enum coresight_dev_subtype_sink { CORESIGHT_DEV_SUBTYPE_SINK_PORT, CORESIGHT_DEV_SUBTYPE_SINK_BUFFER, CORESIGHT_DEV_SUBTYPE_SINK_SYSMEM, + CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM, }; enum coresight_dev_subtype_link { @@ -455,6 +456,18 @@ static inline void csdev_access_write64(struct csdev_access *csa, u64 val, u32 o } #endif /* CONFIG_64BIT */ +static inline bool coresight_is_percpu_source(struct coresight_device *csdev) +{ + return csdev && (csdev->type == CORESIGHT_DEV_TYPE_SOURCE) && + (csdev->subtype.source_subtype == CORESIGHT_DEV_SUBTYPE_SOURCE_PROC); +} + +static inline bool coresight_is_percpu_sink(struct coresight_device *csdev) +{ + return csdev && (csdev->type == CORESIGHT_DEV_TYPE_SINK) && + (csdev->subtype.sink_subtype == CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM); +} + extern struct coresight_device * coresight_register(struct coresight_desc *desc); extern void coresight_unregister(struct coresight_device *csdev); -- cgit v1.2.3 From 0f6925b3e8da0dbbb52447ca8a8b42b371aac7db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Apr 2021 06:26:02 -0700 Subject: virtio_net: Do not pull payload in skb->head Xuan Zhuo reported that commit 3226b158e67c ("net: avoid 32 x truesize under-estimation for tiny skbs") brought a ~10% performance drop. The reason for the performance drop was that GRO was forced to chain sk_buff (using skb_shinfo(skb)->frag_list), which uses more memory but also cause packet consumers to go over a lot of overhead handling all the tiny skbs. It turns out that virtio_net page_to_skb() has a wrong strategy : It allocates skbs with GOOD_COPY_LEN (128) bytes in skb->head, then copies 128 bytes from the page, before feeding the packet to GRO stack. This was suboptimal before commit 3226b158e67c ("net: avoid 32 x truesize under-estimation for tiny skbs") because GRO was using 2 frags per MSS, meaning we were not packing MSS with 100% efficiency. Fix is to pull only the ethernet header in page_to_skb() Then, we change virtio_net_hdr_to_skb() to pull the missing headers, instead of assuming they were already pulled by callers. This fixes the performance regression, but could also allow virtio_net to accept packets with more than 128bytes of headers. Many thanks to Xuan Zhuo for his report, and his tests/help. Fixes: 3226b158e67c ("net: avoid 32 x truesize under-estimation for tiny skbs") Reported-by: Xuan Zhuo Link: https://www.spinics.net/lists/netdev/msg731397.html Co-Developed-by: Xuan Zhuo Signed-off-by: Xuan Zhuo Signed-off-by: Eric Dumazet Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: virtualization@lists.linux-foundation.org Acked-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 98775d7fa696..b465f8f3e554 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -65,14 +65,18 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, skb_reset_mac_header(skb); if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { - u16 start = __virtio16_to_cpu(little_endian, hdr->csum_start); - u16 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); + u32 start = __virtio16_to_cpu(little_endian, hdr->csum_start); + u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); + u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16)); + + if (!pskb_may_pull(skb, needed)) + return -EINVAL; if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; p_off = skb_transport_offset(skb) + thlen; - if (p_off > skb_headlen(skb)) + if (!pskb_may_pull(skb, p_off)) return -EINVAL; } else { /* gso packets without NEEDS_CSUM do not set transport_offset. @@ -102,14 +106,14 @@ retry: } p_off = keys.control.thoff + thlen; - if (p_off > skb_headlen(skb) || + if (!pskb_may_pull(skb, p_off) || keys.basic.ip_proto != ip_proto) return -EINVAL; skb_set_transport_header(skb, keys.control.thoff); } else if (gso_type) { p_off = thlen; - if (p_off > skb_headlen(skb)) + if (!pskb_may_pull(skb, p_off)) return -EINVAL; } } -- cgit v1.2.3 From 77651900cede54930cd8a039dcd4583bfa308807 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 5 Apr 2021 16:13:41 -0700 Subject: usbnet: add _mii suffix to usbnet_set/get_link_ksettings The generic functions assumed devices provided an MDIO interface (accessed via older mii code, not phylib). This is true only for genuine ethernet. Devices with a higher level of abstraction or based on different technologies do not have MDIO. To support this case, first rename the existing functions with _mii suffix. v2: rebased on changed upstream v3: changed names to clearly say that this does NOT use phylib v4: moved hunks to correct patch; reworded commmit messages Signed-off-by : Oliver Neukum Tested-by: Roland Dreier Reviewed-by: Grant Grundler Tested-by: Grant Grundler Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index cfbfd6fe01df..a89e1452107d 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -267,9 +267,9 @@ extern void usbnet_pause_rx(struct usbnet *); extern void usbnet_resume_rx(struct usbnet *); extern void usbnet_purge_paused_rxq(struct usbnet *); -extern int usbnet_get_link_ksettings(struct net_device *net, +extern int usbnet_get_link_ksettings_mii(struct net_device *net, struct ethtool_link_ksettings *cmd); -extern int usbnet_set_link_ksettings(struct net_device *net, +extern int usbnet_set_link_ksettings_mii(struct net_device *net, const struct ethtool_link_ksettings *cmd); extern u32 usbnet_get_link(struct net_device *net); extern u32 usbnet_get_msglevel(struct net_device *); -- cgit v1.2.3 From 956baa99571bbaf88f3e91190dfb498c685b0e21 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 5 Apr 2021 16:13:42 -0700 Subject: usbnet: add method for reporting speed without MII The old method for reporting link speed assumed a driver uses the generic phy (mii) MDIO read/write functions. CDC devices don't expose the phy. Add a primitive internal version reporting back directly what the CDC notification/status operations recorded. v2: rebased on upstream v3: changed names and made clear which units are used v4: moved hunks to correct patch; rewrote commmit messages Signed-off-by: Oliver Neukum Tested-by: Roland Dreier Reviewed-by: Grant Grundler Tested-by: Grant Grundler Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index a89e1452107d..8336e86ce606 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -53,6 +53,9 @@ struct usbnet { u32 hard_mtu; /* count any extra framing */ size_t rx_urb_size; /* size for rx urbs */ struct mii_if_info mii; + long rx_speed; /* If MII not used */ + long tx_speed; /* If MII not used */ +# define SPEED_UNSET -1 /* various kinds of pending driver work */ struct sk_buff_head rxq; @@ -81,8 +84,6 @@ struct usbnet { # define EVENT_LINK_CHANGE 11 # define EVENT_SET_RX_MODE 12 # define EVENT_NO_IP_ALIGN 13 - u32 rx_speed; /* in bps - NOT Mbps */ - u32 tx_speed; /* in bps - NOT Mbps */ }; static inline struct usb_driver *driver_of(struct usb_interface *intf) @@ -271,6 +272,8 @@ extern int usbnet_get_link_ksettings_mii(struct net_device *net, struct ethtool_link_ksettings *cmd); extern int usbnet_set_link_ksettings_mii(struct net_device *net, const struct ethtool_link_ksettings *cmd); +extern int usbnet_get_link_ksettings_internal(struct net_device *net, + struct ethtool_link_ksettings *cmd); extern u32 usbnet_get_link(struct net_device *net); extern u32 usbnet_get_msglevel(struct net_device *); extern void usbnet_set_msglevel(struct net_device *, u32); -- cgit v1.2.3 From 1c84b33101c82683dee8b06761ca1f69e78c8ee7 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 1 Apr 2021 15:00:19 -0700 Subject: bpf, sockmap: Fix sk->prot unhash op reset In '4da6a196f93b1' we fixed a potential unhash loop caused when a TLS socket in a sockmap was removed from the sockmap. This happened because the unhash operation on the TLS ctx continued to point at the sockmap implementation of unhash even though the psock has already been removed. The sockmap unhash handler when a psock is removed does the following, void sock_map_unhash(struct sock *sk) { void (*saved_unhash)(struct sock *sk); struct sk_psock *psock; rcu_read_lock(); psock = sk_psock(sk); if (unlikely(!psock)) { rcu_read_unlock(); if (sk->sk_prot->unhash) sk->sk_prot->unhash(sk); return; } [...] } The unlikely() case is there to handle the case where psock is detached but the proto ops have not been updated yet. But, in the above case with TLS and removed psock we never fixed sk_prot->unhash() and unhash() points back to sock_map_unhash resulting in a loop. To fix this we added this bit of code, static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { sk->sk_prot->unhash = psock->saved_unhash; This will set the sk_prot->unhash back to its saved value. This is the correct callback for a TLS socket that has been removed from the sock_map. Unfortunately, this also overwrites the unhash pointer for all psocks. We effectively break sockmap unhash handling for any future socks. Omitting the unhash operation will leave stale entries in the map if a socket transition through unhash, but does not do close() op. To fix set unhash correctly before calling into tls_update. This way the TLS enabled socket will point to the saved unhash() handler. Fixes: 4da6a196f93b1 ("bpf: Sockmap/tls, during free we may call tcp_bpf_unhash() in loop") Reported-by: Cong Wang Reported-by: Lorenz Bauer Suggested-by: Cong Wang Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/161731441904.68884.15593917809745631972.stgit@john-XPS-13-9370 --- include/linux/skmsg.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 8edbbf5f2f93..822c048934e3 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -349,8 +349,13 @@ static inline void sk_psock_update_proto(struct sock *sk, static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { - sk->sk_prot->unhash = psock->saved_unhash; if (inet_csk_has_ulp(sk)) { + /* TLS does not have an unhash proto in SW cases, but we need + * to ensure we stop using the sock_map unhash routine because + * the associated psock is being removed. So use the original + * unhash handler. + */ + WRITE_ONCE(sk->sk_prot->unhash, psock->saved_unhash); tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); } else { sk->sk_write_space = psock->saved_write_space; -- cgit v1.2.3 From a14587dfc5ad2312dabdd42a610d80ecd0dc8bea Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Thu, 21 Jan 2021 16:01:37 +0200 Subject: net/mlx5: Fix placement of log_max_flow_counter The cited commit wrongly placed log_max_flow_counter field of mlx5_ifc_flow_table_prop_layout_bits, align it to the HW spec intended placement. Fixes: 16f1c5bb3ed7 ("net/mlx5: Check device capability for maximum flow counters") Signed-off-by: Raed Salem Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index df5d91c8b2d4..1ccedb7816d0 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -437,11 +437,11 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_60[0x18]; u8 log_max_ft_num[0x8]; - u8 reserved_at_80[0x18]; + u8 reserved_at_80[0x10]; + u8 log_max_flow_counter[0x8]; u8 log_max_destination[0x8]; - u8 log_max_flow_counter[0x8]; - u8 reserved_at_a8[0x10]; + u8 reserved_at_a0[0x18]; u8 log_max_flow[0x8]; u8 reserved_at_c0[0x40]; -- cgit v1.2.3 From ce28f0fd670ddffcd564ce7119bdefbaf08f02d3 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 4 Apr 2021 10:50:50 +0300 Subject: net/mlx5: Fix PPLM register mapping Add reserved mapping to cover all the register in order to avoid setting arbitrary values to newer FW which implements the reserved fields. Fixes: a58837f52d43 ("net/mlx5e: Expose FEC feilds and related capability bit") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1ccedb7816d0..9940070cda8f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8835,6 +8835,8 @@ struct mlx5_ifc_pplm_reg_bits { u8 fec_override_admin_100g_2x[0x10]; u8 fec_override_admin_50g_1x[0x10]; + + u8 reserved_at_140[0x140]; }; struct mlx5_ifc_ppcnt_reg_bits { -- cgit v1.2.3 From 534b1204ca4694db1093b15cf3e79a99fcb6a6da Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 4 Apr 2021 12:55:00 +0300 Subject: net/mlx5: Fix PBMC register mapping Add reserved mapping to cover all the register in order to avoid setting arbitrary values to newer FW which implements the reserved fields. Fixes: 50b4a3c23646 ("net/mlx5: PPTB and PBMC register firmware command support") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9940070cda8f..9c68b2da14c6 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10200,7 +10200,7 @@ struct mlx5_ifc_pbmc_reg_bits { struct mlx5_ifc_bufferx_reg_bits buffer[10]; - u8 reserved_at_2e0[0x40]; + u8 reserved_at_2e0[0x80]; }; struct mlx5_ifc_qtct_reg_bits { -- cgit v1.2.3 From a91d98a0a2b8e4c433b7341708f7d706e0cf1c8e Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 10 Sep 2020 15:28:02 +0800 Subject: net/mlx5: Map register values to restore objects Currently reg_c0 lower 16 bits and reg_b are used to store the chain id that missed in FDB and NIC tables accordingly. However, the registers' values may index a restore object, rather than a single u32 value. Different object types can be used to restore mutually exclusive contexts such as chain id and sample group id. Use the mapping object to associate an index with a restore object as a prestep for supporting additional restore types. Signed-off-by: Chris Mi Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 994c2c8cb4fd..125ae482383b 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -74,20 +74,19 @@ bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw); bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw); /* Reg C0 usage: - * Reg C0 = < ESW_PFNUM_BITS(4) | ESW_VPORT BITS(12) | ESW_CHAIN_TAG(16) > + * Reg C0 = < ESW_PFNUM_BITS(4) | ESW_VPORT BITS(12) | ESW_REG_C0_OBJ(16) > * * Highest 4 bits of the reg c0 is the PF_NUM (range 0-15), 12 bits of * unique non-zero vport id (range 1-4095). The rest (lowest 16 bits) is left - * for tc chain tag restoration. + * for user data objects managed by a common mapping context. * PFNUM + VPORT comprise the SOURCE_PORT matching. */ #define ESW_VPORT_BITS 12 #define ESW_PFNUM_BITS 4 #define ESW_SOURCE_PORT_METADATA_BITS (ESW_PFNUM_BITS + ESW_VPORT_BITS) #define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS) -#define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS) -#define ESW_CHAIN_TAG_METADATA_MASK GENMASK(ESW_CHAIN_TAG_METADATA_BITS - 1,\ - 0) +#define ESW_REG_C0_USER_DATA_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS) +#define ESW_REG_C0_USER_DATA_METADATA_MASK GENMASK(ESW_REG_C0_USER_DATA_METADATA_BITS - 1, 0) static inline u32 mlx5_eswitch_get_vport_metadata_mask(void) { -- cgit v1.2.3 From eee87072e2fb9000b12c5e752ebd4a05882da2e4 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Thu, 1 Apr 2021 14:41:49 -0700 Subject: bus: mhi: core: Remove pre_init flag used for power purposes Some controllers can choose to skip preparation for power up. In that case, device context is initialized based on the pre_init flag not being set during mhi_prepare_for_power_up(). There is no reason MHI host driver should maintain and provide controllers with two separate paths for preparing MHI. Going forward, all controllers will be required to call the mhi_prepare_for_power_up() API followed by their choice of sync or async power up. This allows MHI host driver to get rid of the pre_init flag and sets up a common way for all controllers to use MHI. This also helps controllers fail early on during preparation phase in some failure cases. Signed-off-by: Bhaumik Bhatt Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1617313309-24035-1-git-send-email-bbhatt@codeaurora.org Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index b16afd36b444..c9b36a34cc7f 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -354,7 +354,6 @@ struct mhi_controller_config { * @index: Index of the MHI controller instance * @bounce_buf: Use of bounce buffer * @fbc_download: MHI host needs to do complete image transfer (optional) - * @pre_init: MHI host needs to do pre-initialization before power up * @wake_set: Device wakeup set flag * @irq_flags: irq flags passed to request_irq (optional) * @@ -447,7 +446,6 @@ struct mhi_controller { int index; bool bounce_buf; bool fbc_download; - bool pre_init; bool wake_set; unsigned long irq_flags; }; -- cgit v1.2.3 From 6731fefd95671575ceaba8e1c60881d529537352 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Thu, 1 Apr 2021 14:16:18 -0700 Subject: bus: mhi: Improve documentation on channel transfer setup APIs The mhi_prepare_for_transfer() and mhi_unprepare_from_transfer() APIs could use better explanation. Add details on what MHI does when these APIs are used. Signed-off-by: Bhaumik Bhatt Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1617311778-1254-10-git-send-email-bbhatt@codeaurora.org Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index c9b36a34cc7f..d095fba37d1e 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -712,13 +712,27 @@ int mhi_device_get_sync(struct mhi_device *mhi_dev); void mhi_device_put(struct mhi_device *mhi_dev); /** - * mhi_prepare_for_transfer - Setup channel for data transfer + * mhi_prepare_for_transfer - Setup UL and DL channels for data transfer. + * Allocate and initialize the channel context and + * also issue the START channel command to both + * channels. Channels can be started only if both + * host and device execution environments match and + * channels are in a DISABLED state. * @mhi_dev: Device associated with the channels */ int mhi_prepare_for_transfer(struct mhi_device *mhi_dev); /** - * mhi_unprepare_from_transfer - Unprepare the channels + * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer. + * Issue the RESET channel command and let the + * device clean-up the context so no incoming + * transfers are seen on the host. Free memory + * associated with the context on host. If device + * is unresponsive, only perform a host side + * clean-up. Channels can be reset only if both + * host and device execution environments match + * and channels are in an ENABLED, STOPPED or + * SUSPENDED state. * @mhi_dev: Device associated with the channels */ void mhi_unprepare_from_transfer(struct mhi_device *mhi_dev); -- cgit v1.2.3 From 45b77828b01cdf2af655e4edbc63646fc7e07b48 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 14 Mar 2021 18:14:57 +0000 Subject: iio:event: Add timeout event info type For adaptive threshold events, the current value is compared with a (typically) low pass filtered version of the same signal that slowly tracks large scale changes. However, sometimes a step change can result in a large lag before the low pass filtered version begins to track the signal again. Timeouts can be used to made an instantaneous 'correction'. Documentation of this attribute is added in a later patch. Signed-off-by: Jonathan Cameron Reviewed-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20210314181511.531414-11-jic23@kernel.org --- include/linux/iio/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index 5aa7f66d4345..84b3f8175cc6 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -16,6 +16,7 @@ enum iio_event_info { IIO_EV_INFO_PERIOD, IIO_EV_INFO_HIGH_PASS_FILTER_3DB, IIO_EV_INFO_LOW_PASS_FILTER_3DB, + IIO_EV_INFO_TIMEOUT, }; #define IIO_VAL_INT 1 -- cgit v1.2.3 From 8cc110478cab83d71f217a9aafec034ed63a5f8e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 1 Apr 2021 14:17:45 +0200 Subject: dt-bindings:iio:dac: update microchip,mcp4725.yaml reference Changeset 6ced946a4bba ("dt-bindings:iio:dac:microchip,mcp4725 yaml conversion") renamed: Documentation/devicetree/bindings/iio/dac/mcp4725.txt to: Documentation/devicetree/bindings/iio/dac/microchip,mcp4725.yaml. Update its cross-reference accordingly. Fixes: 6ced946a4bba ("dt-bindings:iio:dac:microchip,mcp4725 yaml conversion") Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/82fb54974e8a22be15e64343260a6de39a18edda.1617279356.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Cameron --- include/linux/iio/dac/mcp4725.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/dac/mcp4725.h b/include/linux/iio/dac/mcp4725.h index e9801c8d49c0..1f7e53c506b6 100644 --- a/include/linux/iio/dac/mcp4725.h +++ b/include/linux/iio/dac/mcp4725.h @@ -15,7 +15,7 @@ * @vref_buffered: Controls buffering of the external reference voltage. * * Vref related settings are available only on MCP4756. See - * Documentation/devicetree/bindings/iio/dac/mcp4725.txt for more information. + * Documentation/devicetree/bindings/iio/dac/microchip,mcp4725.yaml for more information. */ struct mcp4725_platform_data { bool use_vref; -- cgit v1.2.3 From 13b89768275d6ca9764bf91449e4cafe46ba706b Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 2 Apr 2021 20:31:27 +0530 Subject: mtd: rawnand: Add support for secure regions in NAND memory On a typical end product, a vendor may choose to secure some regions in the NAND memory which are supposed to stay intact between FW upgrades. The access to those regions will be blocked by a secure element like Trustzone. So the normal world software like Linux kernel should not touch these regions (including reading). The regions are declared using a NAND chip DT property, "secure-regions". So let's make use of this property in the raw NAND core and skip access to the secure regions present in a system. Signed-off-by: Manivannan Sadhasivam Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210402150128.29128-4-manivannan.sadhasivam@linaro.org --- include/linux/mtd/rawnand.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 93e8f72beba6..29df2f43dcb5 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1035,6 +1035,16 @@ struct nand_manufacturer { void *priv; }; +/** + * struct nand_secure_region - NAND secure region structure + * @offset: Offset of the start of the secure region + * @size: Size of the secure region + */ +struct nand_secure_region { + u64 offset; + u64 size; +}; + /** * struct nand_chip - NAND Private Flash Chip Data * @base: Inherit from the generic NAND device @@ -1085,6 +1095,8 @@ struct nand_manufacturer { * NAND Controller drivers should not modify this value, but they're * allowed to read it. * @read_retries: The number of read retry modes supported + * @secure_regions: Structure containing the secure regions info + * @nr_secure_regions: Number of secure regions * @controller: The hardware controller structure which is shared among multiple * independent devices * @ecc: The ECC controller structure @@ -1134,6 +1146,8 @@ struct nand_chip { unsigned int suspended : 1; int cur_cs; int read_retries; + struct nand_secure_region *secure_regions; + u8 nr_secure_regions; /* Externals */ struct nand_controller *controller; -- cgit v1.2.3 From 2e1a44c1c4acf209c0dd7bc04421d101b9e80d11 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 23 Mar 2021 09:05:57 +0800 Subject: iommu/vt-d: Remove svm_dev_ops The svm_dev_ops has never been referenced in the tree, and there's no plan to have anything to use it. Remove it to make the code neat. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210323010600.678627-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 3 --- include/linux/intel-svm.h | 7 ------- 2 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 1732298ce888..e0f8c2ade3e8 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -769,14 +769,11 @@ u32 intel_svm_get_pasid(struct iommu_sva *handle); int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); -struct svm_dev_ops; - struct intel_svm_dev { struct list_head list; struct rcu_head rcu; struct device *dev; struct intel_iommu *iommu; - struct svm_dev_ops *ops; struct iommu_sva sva; u32 pasid; int users; diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 39d368a810b8..6c9d10c0fb1e 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -8,13 +8,6 @@ #ifndef __INTEL_SVM_H__ #define __INTEL_SVM_H__ -struct device; - -struct svm_dev_ops { - void (*fault_cb)(struct device *dev, u32 pasid, u64 address, - void *private, int rwxp, int response); -}; - /* Values for rxwp in fault_cb callback */ #define SVM_REQ_READ (1<<3) #define SVM_REQ_WRITE (1<<2) -- cgit v1.2.3 From 06905ea8319731036695cf1a4c53c12b0f9373cb Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 23 Mar 2021 09:05:58 +0800 Subject: iommu/vt-d: Remove SVM_FLAG_PRIVATE_PASID The SVM_FLAG_PRIVATE_PASID has never been referenced in the tree, and there's no plan to have anything to use it. So cleanup it. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210323010600.678627-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-svm.h | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 6c9d10c0fb1e..10fa80eef13a 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -14,16 +14,6 @@ #define SVM_REQ_EXEC (1<<1) #define SVM_REQ_PRIV (1<<0) -/* - * The SVM_FLAG_PRIVATE_PASID flag requests a PASID which is *not* the "main" - * PASID for the current process. Even if a PASID already exists, a new one - * will be allocated. And the PASID allocated with SVM_FLAG_PRIVATE_PASID - * will not be given to subsequent callers. This facility allows a driver to - * disambiguate between multiple device contexts which access the same MM, - * if there is no other way to do so. It should be used sparingly, if at all. - */ -#define SVM_FLAG_PRIVATE_PASID (1<<0) - /* * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only * for access to kernel addresses. No IOTLB flushes are automatically done @@ -35,18 +25,18 @@ * It is unlikely that we will ever hook into flush_tlb_kernel_range() to * do such IOTLB flushes automatically. */ -#define SVM_FLAG_SUPERVISOR_MODE (1<<1) +#define SVM_FLAG_SUPERVISOR_MODE BIT(0) /* * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest * processes. Compared to the host bind, the primary differences are: * 1. mm life cycle management * 2. fault reporting */ -#define SVM_FLAG_GUEST_MODE (1<<2) +#define SVM_FLAG_GUEST_MODE BIT(1) /* * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space, * which requires guest and host PASID translation at both directions. */ -#define SVM_FLAG_GUEST_PASID (1<<3) +#define SVM_FLAG_GUEST_PASID BIT(2) #endif /* __INTEL_SVM_H__ */ -- cgit v1.2.3 From 3431c3f660a39f6ced954548a59dba6541ce3eb1 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Thu, 25 Mar 2021 11:38:24 +0800 Subject: iommu: Fix a boundary issue to avoid performance drop After the change of patch ("iommu: Switch gather->end to the inclusive end"), the performace drops from 1600+K IOPS to 1200K in our kunpeng ARM64 platform. We find that the range [start1, end1) actually is joint from the range [end1, end2), but it is considered as disjoint after the change, so it needs more times of TLB sync, and spends more time on it. So fix the boundary issue to avoid performance drop. Fixes: 862c3715de8f ("iommu: Switch gather->end to the inclusive end") Signed-off-by: Xiang Chen Acked-by: Will Deacon Link: https://lore.kernel.org/r/1616643504-120688-1-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5e7fe519430a..9ca6e6b8084d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -547,7 +547,7 @@ static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, * structure can be rewritten. */ if (gather->pgsize != size || - end < gather->start || start > gather->end) { + end + 1 < gather->start || start > gather->end + 1) { if (gather->pgsize) iommu_iotlb_sync(domain, gather); gather->pgsize = size; -- cgit v1.2.3 From f598a497bc7dfbec60270bca8b8408db3d23ac07 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 25 Mar 2021 20:29:58 +0800 Subject: iova: Add CPU hotplug handler to flush rcaches Like the Intel IOMMU driver already does, flush the per-IOVA domain CPU rcache when a CPU goes offline - there's no point in keeping it. Reviewed-by: Robin Murphy Signed-off-by: John Garry Link: https://lore.kernel.org/r/1616675401-151997-2-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- include/linux/cpuhotplug.h | 1 + include/linux/iova.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index f14adb882338..cedac9986557 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -58,6 +58,7 @@ enum cpuhp_state { CPUHP_NET_DEV_DEAD, CPUHP_PCI_XGENE_DEAD, CPUHP_IOMMU_INTEL_DEAD, + CPUHP_IOMMU_IOVA_DEAD, CPUHP_LUSTRE_CFS_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, diff --git a/include/linux/iova.h b/include/linux/iova.h index c834c01c0a5b..4be6c0ab4997 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -95,6 +95,7 @@ struct iova_domain { flush-queues */ atomic_t fq_timer_on; /* 1 when timer is active, 0 when not */ + struct hlist_node cpuhp_dead; }; static inline unsigned long iova_size(struct iova *iova) -- cgit v1.2.3 From 363f266eeff6e22a09483dc922dccd7cd0b9fe9c Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 25 Mar 2021 20:29:59 +0800 Subject: iommu/vt-d: Remove IOVA domain rcache flushing for CPU offlining Now that the core code handles flushing per-IOVA domain CPU rcaches, remove the handling here. Reviewed-by: Lu Baolu Signed-off-by: John Garry Link: https://lore.kernel.org/r/1616675401-151997-3-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index cedac9986557..85996494bec1 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -57,7 +57,6 @@ enum cpuhp_state { CPUHP_PAGE_ALLOC_DEAD, CPUHP_NET_DEV_DEAD, CPUHP_PCI_XGENE_DEAD, - CPUHP_IOMMU_INTEL_DEAD, CPUHP_IOMMU_IOVA_DEAD, CPUHP_LUSTRE_CFS_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, -- cgit v1.2.3 From 149448b353e2517ecc6eced7d9f46e9f3e08b89e Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 25 Mar 2021 20:30:00 +0800 Subject: iommu: Delete iommu_dma_free_cpu_cached_iovas() Function iommu_dma_free_cpu_cached_iovas() no longer has any caller, so delete it. With that, function free_cpu_cached_iovas() may be made static. Signed-off-by: John Garry Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/1616675401-151997-4-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- include/linux/dma-iommu.h | 5 ----- include/linux/iova.h | 5 ----- 2 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 13d1f4c14d7b..6e75a2d689b4 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -83,10 +83,5 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he { } -static inline void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, - struct iommu_domain *domain) -{ -} - #endif /* CONFIG_IOMMU_DMA */ #endif /* __DMA_IOMMU_H */ diff --git a/include/linux/iova.h b/include/linux/iova.h index 4be6c0ab4997..71d8a2de6635 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -157,7 +157,6 @@ int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); -void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); #else static inline int iova_cache_get(void) { @@ -234,10 +233,6 @@ static inline void put_iova_domain(struct iova_domain *iovad) { } -static inline void free_cpu_cached_iovas(unsigned int cpu, - struct iova_domain *iovad) -{ -} #endif #endif -- cgit v1.2.3 From 0d35309ab5e080095190965aa7cfc3ca8fb88af9 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 1 Apr 2021 17:47:10 +0200 Subject: iommu: Fix comment for struct iommu_fwspec Commit 986d5ecc5699 ("iommu: Move fwspec->iommu_priv to struct dev_iommu") removed iommu_priv from fwspec and commit 5702ee24182f ("ACPI/IORT: Check ATS capability in root complex nodes") added @flags. Update the struct doc. Acked-by: Jonathan Cameron Acked-by: Will Deacon Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20210401154718.307519-2-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5e7fe519430a..1d422bf722a1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -571,7 +571,7 @@ struct iommu_group *fsl_mc_device_group(struct device *dev); * struct iommu_fwspec - per-device IOMMU instance data * @ops: ops for this device's IOMMU * @iommu_fwnode: firmware handle for this device's IOMMU - * @iommu_priv: IOMMU driver private data for this device + * @flags: IOMMU_FWSPEC_* flags * @num_pasid_bits: number of PASID bits supported by this device * @num_ids: number of associated device IDs * @ids: IDs which this device may present to the IOMMU -- cgit v1.2.3 From 434b73e61cc65cdd26618af6fa4736c2ba1eb29b Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 1 Apr 2021 17:47:11 +0200 Subject: iommu/arm-smmu-v3: Use device properties for pasid-num-bits The pasid-num-bits property shouldn't need a dedicated fwspec field, it's a job for device properties. Add properties for IORT, and access the number of PASID bits using device_property_read_u32(). Suggested-by: Robin Murphy Acked-by: Jonathan Cameron Acked-by: Will Deacon Reviewed-by: Eric Auger Signed-off-by: Jean-Philippe Brucker Acked-by: Hanjun Guo Link: https://lore.kernel.org/r/20210401154718.307519-3-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1d422bf722a1..16ce75693d83 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -572,7 +572,6 @@ struct iommu_group *fsl_mc_device_group(struct device *dev); * @ops: ops for this device's IOMMU * @iommu_fwnode: firmware handle for this device's IOMMU * @flags: IOMMU_FWSPEC_* flags - * @num_pasid_bits: number of PASID bits supported by this device * @num_ids: number of associated device IDs * @ids: IDs which this device may present to the IOMMU */ @@ -580,7 +579,6 @@ struct iommu_fwspec { const struct iommu_ops *ops; struct fwnode_handle *iommu_fwnode; u32 flags; - u32 num_pasid_bits; unsigned int num_ids; u32 ids[]; }; -- cgit v1.2.3 From 34b48c704d194738eef0893aa06e412bdc8a972f Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 1 Apr 2021 17:47:12 +0200 Subject: iommu: Separate IOMMU_DEV_FEAT_IOPF from IOMMU_DEV_FEAT_SVA Some devices manage I/O Page Faults (IOPF) themselves instead of relying on PCIe PRI or Arm SMMU stall. Allow their drivers to enable SVA without mandating IOMMU-managed IOPF. The other device drivers now need to first enable IOMMU_DEV_FEAT_IOPF before enabling IOMMU_DEV_FEAT_SVA. Enabling IOMMU_DEV_FEAT_IOPF on its own doesn't have any effect visible to the device driver, it is used in combination with other features. Reviewed-by: Eric Auger Reviewed-by: Lu Baolu Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20210401154718.307519-4-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 16ce75693d83..45c4eb372f56 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -156,10 +156,24 @@ struct iommu_resv_region { enum iommu_resv_type type; }; -/* Per device IOMMU features */ +/** + * enum iommu_dev_features - Per device IOMMU features + * @IOMMU_DEV_FEAT_AUX: Auxiliary domain feature + * @IOMMU_DEV_FEAT_SVA: Shared Virtual Addresses + * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall. Generally + * enabling %IOMMU_DEV_FEAT_SVA requires + * %IOMMU_DEV_FEAT_IOPF, but some devices manage I/O Page + * Faults themselves instead of relying on the IOMMU. When + * supported, this feature must be enabled before and + * disabled after %IOMMU_DEV_FEAT_SVA. + * + * Device drivers query whether a feature is supported using + * iommu_dev_has_feature(), and enable it using iommu_dev_enable_feature(). + */ enum iommu_dev_features { - IOMMU_DEV_FEAT_AUX, /* Aux-domain feature */ - IOMMU_DEV_FEAT_SVA, /* Shared Virtual Addresses */ + IOMMU_DEV_FEAT_AUX, + IOMMU_DEV_FEAT_SVA, + IOMMU_DEV_FEAT_IOPF, }; #define IOMMU_PASID_INVALID (-1U) -- cgit v1.2.3 From fc36479db74e957c4696b605a32c4afaa15fa6cb Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 1 Apr 2021 17:47:15 +0200 Subject: iommu: Add a page fault handler Some systems allow devices to handle I/O Page Faults in the core mm. For example systems implementing the PCIe PRI extension or Arm SMMU stall model. Infrastructure for reporting these recoverable page faults was added to the IOMMU core by commit 0c830e6b3282 ("iommu: Introduce device fault report API"). Add a page fault handler for host SVA. IOMMU driver can now instantiate several fault workqueues and link them to IOPF-capable devices. Drivers can choose between a single global workqueue, one per IOMMU device, one per low-level fault queue, one per domain, etc. When it receives a fault event, most commonly in an IRQ handler, the IOMMU driver reports the fault using iommu_report_device_fault(), which calls the registered handler. The page fault handler then calls the mm fault handler, and reports either success or failure with iommu_page_response(). After the handler succeeds, the hardware retries the access. The iopf_param pointer could be embedded into iommu_fault_param. But putting iopf_param into the iommu_param structure allows us not to care about ordering between calls to iopf_queue_add_device() and iommu_register_device_fault_handler(). Tested-by: Lu Baolu Reviewed-by: Eric Auger Reviewed-by: Jacob Pan Reviewed-by: Jonathan Cameron Reviewed-by: Lu Baolu Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20210401154718.307519-7-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 45c4eb372f56..86d688c4418f 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -367,6 +367,7 @@ struct iommu_fault_param { * struct dev_iommu - Collection of per-device IOMMU data * * @fault_param: IOMMU detected device fault reporting data + * @iopf_param: I/O Page Fault queue and data * @fwspec: IOMMU fwspec data * @iommu_dev: IOMMU device this device is linked to * @priv: IOMMU Driver private data @@ -377,6 +378,7 @@ struct iommu_fault_param { struct dev_iommu { struct mutex lock; struct iommu_fault_param *fault_param; + struct iopf_device_param *iopf_param; struct iommu_fwspec *fwspec; struct iommu_device *iommu_dev; void *priv; -- cgit v1.2.3 From 47685cb202d1aff6f70a2bb91e8271392fefea84 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Apr 2021 17:52:37 +0200 Subject: iommu: remove the unused domain_window_disable method domain_window_disable is wired up by fsl_pamu, but never actually called. Signed-off-by: Christoph Hellwig Acked-by: Will Deacon Acked-by: Li Yang Link: https://lore.kernel.org/r/20210401155256.298656-2-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 86d688c4418f..565b8810354d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -223,7 +223,6 @@ struct iommu_iotlb_gather { * @put_resv_regions: Free list of reserved regions for a device * @apply_resv_region: Temporary helper call-back for iova reserved ranges * @domain_window_enable: Configure and enable a particular window for a domain - * @domain_window_disable: Disable a particular window for a domain * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) @@ -284,7 +283,6 @@ struct iommu_ops { /* Window handling functions */ int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t paddr, u64 size, int prot); - void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr); int (*of_xlate)(struct device *dev, struct of_phandle_args *args); bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); -- cgit v1.2.3 From 392825e0c76cf9aca33e5a3bf981cde2a2c87251 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Apr 2021 17:52:38 +0200 Subject: iommu/fsl_pamu: remove fsl_pamu_get_domain_attr None of the values returned by this function are ever queried. Also remove the DOMAIN_ATTR_FSL_PAMUV1 enum value that is not otherwise used. Signed-off-by: Christoph Hellwig Acked-by: Will Deacon Acked-by: Li Yang Link: https://lore.kernel.org/r/20210401155256.298656-3-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 565b8810354d..95ce205347a9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -104,9 +104,6 @@ enum iommu_cap { * -the actual size of the mapped region of a window must be power * of 2 starting with 4KB and physical address must be naturally * aligned. - * DOMAIN_ATTR_FSL_PAMUV1 corresponds to the above mentioned contraints. - * The caller can invoke iommu_domain_get_attr to check if the underlying - * iommu implementation supports these constraints. */ enum iommu_attr { @@ -115,7 +112,6 @@ enum iommu_attr { DOMAIN_ATTR_WINDOWS, DOMAIN_ATTR_FSL_PAMU_STASH, DOMAIN_ATTR_FSL_PAMU_ENABLE, - DOMAIN_ATTR_FSL_PAMUV1, DOMAIN_ATTR_NESTING, /* two stages of translation */ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, DOMAIN_ATTR_IO_PGTABLE_CFG, -- cgit v1.2.3 From ba58d1216e2b2d2320b50591b767f50b13c623a8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Apr 2021 17:52:41 +0200 Subject: iommu/fsl_pamu: remove support for multiple windows The only domains allocated forces use of a single window. Remove all the code related to multiple window support, as well as the need for qman_portal to force a single window. Remove the now unused DOMAIN_ATTR_WINDOWS iommu_attr. Signed-off-by: Christoph Hellwig Acked-by: Will Deacon Acked-by: Li Yang Link: https://lore.kernel.org/r/20210401155256.298656-6-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 95ce205347a9..761aa8b69936 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -109,7 +109,6 @@ enum iommu_cap { enum iommu_attr { DOMAIN_ATTR_GEOMETRY, DOMAIN_ATTR_PAGING, - DOMAIN_ATTR_WINDOWS, DOMAIN_ATTR_FSL_PAMU_STASH, DOMAIN_ATTR_FSL_PAMU_ENABLE, DOMAIN_ATTR_NESTING, /* two stages of translation */ -- cgit v1.2.3 From 376dfd2a2ff41596a6efc8ea56f8b0de172b04a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Apr 2021 17:52:42 +0200 Subject: iommu/fsl_pamu: remove ->domain_window_enable The only thing that fsl_pamu_window_enable does for the current caller is to fill in the prot value in the only dma_window structure, and to propagate a few values from the iommu_domain_geometry struture into the dma_window. Remove the dma_window entirely, hardcode the prot value and otherwise use the iommu_domain_geometry structure instead. Remove the now unused ->domain_window_enable iommu method. Signed-off-by: Christoph Hellwig Acked-by: Li Yang Link: https://lore.kernel.org/r/20210401155256.298656-7-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 761aa8b69936..69537e83aae4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -217,7 +217,6 @@ struct iommu_iotlb_gather { * @get_resv_regions: Request list of reserved regions for a device * @put_resv_regions: Free list of reserved regions for a device * @apply_resv_region: Temporary helper call-back for iova reserved ranges - * @domain_window_enable: Configure and enable a particular window for a domain * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) @@ -275,10 +274,6 @@ struct iommu_ops { struct iommu_domain *domain, struct iommu_resv_region *region); - /* Window handling functions */ - int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr, - phys_addr_t paddr, u64 size, int prot); - int (*of_xlate)(struct device *dev, struct of_phandle_args *args); bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); @@ -521,11 +516,6 @@ extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr, extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, void *data); -/* Window handling function prototypes */ -extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, - phys_addr_t offset, u64 size, - int prot); - extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags); @@ -749,13 +739,6 @@ static inline void iommu_iotlb_sync(struct iommu_domain *domain, { } -static inline int iommu_domain_window_enable(struct iommu_domain *domain, - u32 wnd_nr, phys_addr_t paddr, - u64 size, int prot) -{ - return -ENODEV; -} - static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { return 0; -- cgit v1.2.3 From 4eeb96f6efac10e66fd10e718d2adeece3879121 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Apr 2021 17:52:43 +0200 Subject: iommu/fsl_pamu: replace DOMAIN_ATTR_FSL_PAMU_STASH with a direct call Add a fsl_pamu_configure_l1_stash API that qman_portal can call directly instead of indirecting through the iommu attr API. Signed-off-by: Christoph Hellwig Acked-by: Will Deacon Acked-by: Li Yang Link: https://lore.kernel.org/r/20210401155256.298656-8-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 69537e83aae4..a3968122aa69 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -109,7 +109,6 @@ enum iommu_cap { enum iommu_attr { DOMAIN_ATTR_GEOMETRY, DOMAIN_ATTR_PAGING, - DOMAIN_ATTR_FSL_PAMU_STASH, DOMAIN_ATTR_FSL_PAMU_ENABLE, DOMAIN_ATTR_NESTING, /* two stages of translation */ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, -- cgit v1.2.3 From 7d61cb6ff0122a017ae907aed62478a4db9c5991 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Apr 2021 17:52:46 +0200 Subject: iommu/fsl_pamu: enable the liodn when attaching a device Instead of a separate call to enable all devices from the list, just enable the liodn once the device is attached to the iommu domain. This also remove the DOMAIN_ATTR_FSL_PAMU_ENABLE iommu_attr. Signed-off-by: Christoph Hellwig Acked-by: Will Deacon Acked-by: Li Yang Link: https://lore.kernel.org/r/20210401155256.298656-11-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a3968122aa69..f5caaa8d39be 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -109,7 +109,6 @@ enum iommu_cap { enum iommu_attr { DOMAIN_ATTR_GEOMETRY, DOMAIN_ATTR_PAGING, - DOMAIN_ATTR_FSL_PAMU_ENABLE, DOMAIN_ATTR_NESTING, /* two stages of translation */ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, DOMAIN_ATTR_IO_PGTABLE_CFG, -- cgit v1.2.3 From 9fb5fad562fa0a41c84691714d99c23f54168a9e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Apr 2021 17:52:50 +0200 Subject: iommu: remove DOMAIN_ATTR_PAGING DOMAIN_ATTR_PAGING is never used. Signed-off-by: Christoph Hellwig Acked-by: Will Deacon Acked-by: Li Yang Link: https://lore.kernel.org/r/20210401155256.298656-15-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f5caaa8d39be..85b51084b820 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -108,7 +108,6 @@ enum iommu_cap { enum iommu_attr { DOMAIN_ATTR_GEOMETRY, - DOMAIN_ATTR_PAGING, DOMAIN_ATTR_NESTING, /* two stages of translation */ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, DOMAIN_ATTR_IO_PGTABLE_CFG, -- cgit v1.2.3 From bc9a05eef113e75cfa792fdf24dae011bc3d5294 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Apr 2021 17:52:51 +0200 Subject: iommu: remove DOMAIN_ATTR_GEOMETRY The geometry information can be trivially queried from the iommu_domain struture. Signed-off-by: Christoph Hellwig Acked-by: Will Deacon Acked-by: Li Yang Link: https://lore.kernel.org/r/20210401155256.298656-16-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 85b51084b820..207446cc6d3f 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -107,7 +107,6 @@ enum iommu_cap { */ enum iommu_attr { - DOMAIN_ATTR_GEOMETRY, DOMAIN_ATTR_NESTING, /* two stages of translation */ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, DOMAIN_ATTR_IO_PGTABLE_CFG, -- cgit v1.2.3 From 7e147547783a9035df816864b6a45ffbb254d700 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Apr 2021 17:52:52 +0200 Subject: iommu: remove DOMAIN_ATTR_NESTING Use an explicit enable_nesting method instead. Signed-off-by: Christoph Hellwig Acked-by: Will Deacon Acked-by: Li Yang Link: https://lore.kernel.org/r/20210401155256.298656-17-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 207446cc6d3f..d7d76b5e1192 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -107,7 +107,6 @@ enum iommu_cap { */ enum iommu_attr { - DOMAIN_ATTR_NESTING, /* two stages of translation */ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, DOMAIN_ATTR_IO_PGTABLE_CFG, DOMAIN_ATTR_MAX, @@ -210,6 +209,7 @@ struct iommu_iotlb_gather { * @device_group: find iommu group for a particular device * @domain_get_attr: Query domain attributes * @domain_set_attr: Change domain attributes + * @enable_nesting: Enable nesting * @get_resv_regions: Request list of reserved regions for a device * @put_resv_regions: Free list of reserved regions for a device * @apply_resv_region: Temporary helper call-back for iova reserved ranges @@ -262,6 +262,7 @@ struct iommu_ops { enum iommu_attr attr, void *data); int (*domain_set_attr)(struct iommu_domain *domain, enum iommu_attr attr, void *data); + int (*enable_nesting)(struct iommu_domain *domain); /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); @@ -511,6 +512,7 @@ extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr, void *data); extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, void *data); +int iommu_enable_nesting(struct iommu_domain *domain); extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags); -- cgit v1.2.3 From a250c23f15c21c556becd4986f453255e545807c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 1 Apr 2021 17:52:54 +0200 Subject: iommu: remove DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE Instead make the global iommu_dma_strict paramete in iommu.c canonical by exporting helpers to get and set it and use those directly in the drivers. This make sure that the iommu.strict parameter also works for the AMD and Intel IOMMU drivers on x86. As those default to lazy flushing a new IOMMU_CMD_LINE_STRICT is used to turn the value into a tristate to represent the default if not overriden by an explicit parameter. [ported on top of the other iommu_attr changes and added a few small missing bits] Signed-off-by: Robin Murphy . Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210401155256.298656-19-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d7d76b5e1192..9349bdd62e91 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -107,7 +107,6 @@ enum iommu_cap { */ enum iommu_attr { - DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, DOMAIN_ATTR_IO_PGTABLE_CFG, DOMAIN_ATTR_MAX, }; @@ -514,6 +513,9 @@ extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, void *data); int iommu_enable_nesting(struct iommu_domain *domain); +void iommu_set_dma_strict(bool val); +bool iommu_get_dma_strict(struct iommu_domain *domain); + extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags); -- cgit v1.2.3 From 4fc52b81e87be583efb834df5b58245cb9ddd3e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Apr 2021 17:52:55 +0200 Subject: iommu: remove DOMAIN_ATTR_IO_PGTABLE_CFG Use an explicit set_pgtable_quirks method instead that just passes the actual quirk bitmask instead. Signed-off-by: Christoph Hellwig Acked-by: Will Deacon Acked-by: Li Yang Link: https://lore.kernel.org/r/20210401155256.298656-20-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/io-pgtable.h | 4 ---- include/linux/iommu.h | 12 +++++++++++- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index a4c9ca2c31f1..4d40dfa75b55 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -204,10 +204,6 @@ struct io_pgtable { #define io_pgtable_ops_to_pgtable(x) container_of((x), struct io_pgtable, ops) -struct io_pgtable_domain_attr { - unsigned long quirks; -}; - static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop) { if (iop->cfg.tlb && iop->cfg.tlb->tlb_flush_all) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9349bdd62e91..fbac49fe0880 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -107,7 +107,6 @@ enum iommu_cap { */ enum iommu_attr { - DOMAIN_ATTR_IO_PGTABLE_CFG, DOMAIN_ATTR_MAX, }; @@ -209,6 +208,7 @@ struct iommu_iotlb_gather { * @domain_get_attr: Query domain attributes * @domain_set_attr: Change domain attributes * @enable_nesting: Enable nesting + * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) * @get_resv_regions: Request list of reserved regions for a device * @put_resv_regions: Free list of reserved regions for a device * @apply_resv_region: Temporary helper call-back for iova reserved ranges @@ -262,6 +262,8 @@ struct iommu_ops { int (*domain_set_attr)(struct iommu_domain *domain, enum iommu_attr attr, void *data); int (*enable_nesting)(struct iommu_domain *domain); + int (*set_pgtable_quirks)(struct iommu_domain *domain, + unsigned long quirks); /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); @@ -512,6 +514,8 @@ extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr, extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, void *data); int iommu_enable_nesting(struct iommu_domain *domain); +int iommu_set_pgtable_quirks(struct iommu_domain *domain, + unsigned long quirks); void iommu_set_dma_strict(bool val); bool iommu_get_dma_strict(struct iommu_domain *domain); @@ -891,6 +895,12 @@ static inline int iommu_domain_set_attr(struct iommu_domain *domain, return -EINVAL; } +static inline int iommu_set_pgtable_quirks(struct iommu_domain *domain, + unsigned long quirks) +{ + return 0; +} + static inline int iommu_device_register(struct iommu_device *iommu) { return -ENODEV; -- cgit v1.2.3 From 7876a83ffe8c23c7049a63c747a7b96cafaf10a4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Apr 2021 17:52:56 +0200 Subject: iommu: remove iommu_domain_{get,set}_attr Remove the now unused iommu attr infrastructure. Signed-off-by: Christoph Hellwig Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210401155256.298656-21-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 36 ------------------------------------ 1 file changed, 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index fbac49fe0880..a5b3af54fbb8 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -96,20 +96,6 @@ enum iommu_cap { IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */ }; -/* - * Following constraints are specifc to FSL_PAMUV1: - * -aperture must be power of 2, and naturally aligned - * -number of windows must be power of 2, and address space size - * of each window is determined by aperture size / # of windows - * -the actual size of the mapped region of a window must be power - * of 2 starting with 4KB and physical address must be naturally - * aligned. - */ - -enum iommu_attr { - DOMAIN_ATTR_MAX, -}; - /* These are the possible reserved region types */ enum iommu_resv_type { /* Memory regions which must be mapped 1:1 at all times */ @@ -205,8 +191,6 @@ struct iommu_iotlb_gather { * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain * @device_group: find iommu group for a particular device - * @domain_get_attr: Query domain attributes - * @domain_set_attr: Change domain attributes * @enable_nesting: Enable nesting * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) * @get_resv_regions: Request list of reserved regions for a device @@ -257,10 +241,6 @@ struct iommu_ops { void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); - int (*domain_get_attr)(struct iommu_domain *domain, - enum iommu_attr attr, void *data); - int (*domain_set_attr)(struct iommu_domain *domain, - enum iommu_attr attr, void *data); int (*enable_nesting)(struct iommu_domain *domain); int (*set_pgtable_quirks)(struct iommu_domain *domain, unsigned long quirks); @@ -509,10 +489,6 @@ extern int iommu_page_response(struct device *dev, extern int iommu_group_id(struct iommu_group *group); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); -extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr, - void *data); -extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, - void *data); int iommu_enable_nesting(struct iommu_domain *domain); int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks); @@ -883,18 +859,6 @@ static inline int iommu_group_id(struct iommu_group *group) return -ENODEV; } -static inline int iommu_domain_get_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) -{ - return -EINVAL; -} - -static inline int iommu_domain_set_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) -{ - return -EINVAL; -} - static inline int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks) { -- cgit v1.2.3 From d151c85c52a314c6ecb91ab35b3f696a6778b509 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Apr 2021 16:33:09 +0200 Subject: iommu/amd: Remove the unused device errata code The device errata mechism is entirely unused, so remove it. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210402143312.372386-2-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/amd-iommu.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 450717299928..474065ed88a4 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -32,24 +32,6 @@ struct pci_dev; extern int amd_iommu_detect(void); extern int amd_iommu_init_hardware(void); -/** - * amd_iommu_enable_device_erratum() - Enable erratum workaround for device - * in the IOMMUv2 driver - * @pdev: The PCI device the workaround is necessary for - * @erratum: The erratum workaround to enable - * - * The function needs to be called before amd_iommu_init_device(). - * Possible values for the erratum number are for now: - * - AMD_PRI_DEV_ERRATUM_ENABLE_RESET - Reset PRI capability when PRI - * is enabled - * - AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE - Limit number of outstanding PRI - * requests to one - */ -#define AMD_PRI_DEV_ERRATUM_ENABLE_RESET 0 -#define AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE 1 - -extern void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum); - /** * amd_iommu_init_device() - Init device for use with IOMMUv2 driver * @pdev: The PCI device to initialize -- cgit v1.2.3 From fc1b6620501f1a4b88f583549c63666180bea177 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Apr 2021 16:33:12 +0200 Subject: iommu/amd: Move a few prototypes to include/linux/amd-iommu.h A few functions that were intentended for the perf events support are currently declared in arch/x86/events/amd/iommu.h, which mens they are not in scope for the actual function definition. Also amdkfd has started using a few of them using externs in a .c file. End that misery by moving the prototypes to the proper header. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210402143312.372386-5-hch@lst.de Signed-off-by: Joerg Roedel --- include/linux/amd-iommu.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 474065ed88a4..58e6c3806c09 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -10,6 +10,8 @@ #include +struct amd_iommu; + /* * This is mainly used to communicate information back-and-forth * between SVM and IOMMU for setting up and tearing down posted @@ -194,4 +196,14 @@ static inline int amd_iommu_deactivate_guest_mode(void *data) } #endif /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */ +int amd_iommu_get_num_iommus(void); +bool amd_iommu_pc_supported(void); +u8 amd_iommu_pc_get_max_banks(unsigned int idx); +u8 amd_iommu_pc_get_max_counters(unsigned int idx); +int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, + u64 *value); +int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, + u64 *value); +struct amd_iommu *get_amd_iommu(unsigned int idx); + #endif /* _ASM_X86_AMD_IOMMU_H */ -- cgit v1.2.3 From c0474a606ecb9326227b4d68059942f9db88a897 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 20 Mar 2021 10:54:13 +0800 Subject: iommu/vt-d: Invalidate PASID cache when root/context entry changed When the Intel IOMMU is operating in the scalable mode, some information from the root and context table may be used to tag entries in the PASID cache. Software should invalidate the PASID-cache when changing root or context table entries. Suggested-by: Ashok Raj Fixes: 7373a8cc38197 ("iommu/vt-d: Setup context and enable RID2PASID support") Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210320025415.641201-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index e0f8c2ade3e8..03faf20a6817 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -378,6 +378,7 @@ enum { /* PASID cache invalidation granu */ #define QI_PC_ALL_PASIDS 0 #define QI_PC_PASID_SEL 1 +#define QI_PC_GLOBAL 3 #define QI_EIOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) #define QI_EIOTLB_IH(ih) (((u64)ih) << 6) -- cgit v1.2.3 From 9de07a4e8d4cb269f9876b2ffa282b5ffd09e05b Mon Sep 17 00:00:00 2001 From: John Chen Date: Tue, 30 Mar 2021 19:33:19 +0800 Subject: HID: input: map battery capacity (00850065) This is the capacity in percentage, relative to design capacity. Specifically, it is present in Apple Magic Mouse 2. In contrast, usage 00850064 is also the capacity in percentage, but is relative to full capacity. It is not mapped here because I don't have such device. Signed-off-by: John Chen Signed-off-by: Jiri Kosina --- include/linux/hid.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index ef702b3f56e3..b40e1abbe11d 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -153,6 +153,7 @@ struct hid_item { #define HID_UP_CONSUMER 0x000c0000 #define HID_UP_DIGITIZER 0x000d0000 #define HID_UP_PID 0x000f0000 +#define HID_UP_BATTERY 0x00850000 #define HID_UP_HPVENDOR 0xff7f0000 #define HID_UP_HPVENDOR2 0xff010000 #define HID_UP_MSVENDOR 0xff000000 @@ -297,6 +298,8 @@ struct hid_item { #define HID_DG_TOOLSERIALNUMBER 0x000d005b #define HID_DG_LATENCYMODE 0x000d0060 +#define HID_BAT_ABSOLUTESTATEOFCHARGE 0x00850065 + #define HID_VD_ASUS_CUSTOM_MEDIA_KEYS 0xff310076 /* * HID report types --- Ouch! HID spec says 1 2 3! -- cgit v1.2.3 From 4a35d6a03744ded782c9301f5f5d78ad68ce680f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 Apr 2021 13:17:10 +0100 Subject: irqdomain: Get rid of irq_create_identity_mapping() The sole user of irq_create_identity_mapping() having been converted, get rid of the unused helper. Signed-off-by: Marc Zyngier --- include/linux/irqdomain.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 33cacc8af26d..d2c61de208a8 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -419,12 +419,6 @@ extern int irq_create_strict_mappings(struct irq_domain *domain, unsigned int irq_base, irq_hw_number_t hwirq_base, int count); -static inline int irq_create_identity_mapping(struct irq_domain *host, - irq_hw_number_t hwirq) -{ - return irq_create_strict_mappings(host, hwirq, hwirq, 1); -} - extern const struct irq_domain_ops irq_domain_simple_ops; /* stock xlate functions */ -- cgit v1.2.3 From a8cf291bdac5d415eadb55e79df1fca8c3f0dfef Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Wed, 9 Dec 2020 14:09:26 +0800 Subject: ptp: Reorganize ptp_kvm.c to make it arch-independent Currently, the ptp_kvm module contains a lot of x86-specific code. Let's move this code into a new arch-specific file in the same directory, and rename the arch-independent file to ptp_kvm_common.c. Acked-by: Richard Cochran Reviewed-by: Andre Przywara Signed-off-by: Jianyong Wu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201209060932.212364-4-jianyong.wu@arm.com --- include/linux/ptp_kvm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/ptp_kvm.h (limited to 'include/linux') diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h new file mode 100644 index 000000000000..f960a719f0d5 --- /dev/null +++ b/include/linux/ptp_kvm.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Virtual PTP 1588 clock for use with KVM guests + * + * Copyright (C) 2017 Red Hat Inc. + */ + +#ifndef _PTP_KVM_H_ +#define _PTP_KVM_H_ + +struct timespec64; +struct clocksource; + +int kvm_arch_ptp_init(void); +int kvm_arch_ptp_get_clock(struct timespec64 *ts); +int kvm_arch_ptp_get_crosststamp(u64 *cycle, + struct timespec64 *tspec, struct clocksource **cs); + +#endif /* _PTP_KVM_H_ */ -- cgit v1.2.3 From b2c67cbe9f447312f5cdd7c6641b463f2349aec0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 9 Dec 2020 14:09:27 +0800 Subject: time: Add mechanism to recognize clocksource in time_get_snapshot System time snapshots are not conveying information about the current clocksource which was used, but callers like the PTP KVM guest implementation have the requirement to evaluate the clocksource type to select the appropriate mechanism. Introduce a clocksource id field in struct clocksource which is by default set to CSID_GENERIC (0). Clocksource implementations can set that field to a value which allows to identify the clocksource. Store the clocksource id of the current clocksource in the system_time_snapshot so callers can evaluate which clocksource was used to take the snapshot and act accordingly. Signed-off-by: Thomas Gleixner Signed-off-by: Jianyong Wu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201209060932.212364-5-jianyong.wu@arm.com --- include/linux/clocksource.h | 6 ++++++ include/linux/clocksource_ids.h | 11 +++++++++++ include/linux/timekeeping.h | 12 +++++++----- 3 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 include/linux/clocksource_ids.h (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 86d143db6523..1290d0dce840 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -62,6 +63,10 @@ struct module; * 400-499: Perfect * The ideal clocksource. A must-use where * available. + * @id: Defaults to CSID_GENERIC. The id value is captured + * in certain snapshot functions to allow callers to + * validate the clocksource from which the snapshot was + * taken. * @flags: Flags describing special properties * @enable: Optional function to enable the clocksource * @disable: Optional function to disable the clocksource @@ -100,6 +105,7 @@ struct clocksource { const char *name; struct list_head list; int rating; + enum clocksource_ids id; enum vdso_clock_mode vdso_clock_mode; unsigned long flags; diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h new file mode 100644 index 000000000000..4d8e19e05328 --- /dev/null +++ b/include/linux/clocksource_ids.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CLOCKSOURCE_IDS_H +#define _LINUX_CLOCKSOURCE_IDS_H + +/* Enum to give clocksources a unique identifier */ +enum clocksource_ids { + CSID_GENERIC = 0, + CSID_MAX, +}; + +#endif diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index c6792cf01bc7..78a98bdff76d 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -3,6 +3,7 @@ #define _LINUX_TIMEKEEPING_H #include +#include /* Included from linux/ktime.h */ @@ -243,11 +244,12 @@ struct ktime_timestamps { * @cs_was_changed_seq: The sequence number of clocksource change events */ struct system_time_snapshot { - u64 cycles; - ktime_t real; - ktime_t raw; - unsigned int clock_was_set_seq; - u8 cs_was_changed_seq; + u64 cycles; + ktime_t real; + ktime_t raw; + enum clocksource_ids cs_id; + unsigned int clock_was_set_seq; + u8 cs_was_changed_seq; }; /** -- cgit v1.2.3 From 100148d0fc7dcf8672fe0ac83f44dc5749b4da5c Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Wed, 9 Dec 2020 14:09:28 +0800 Subject: clocksource: Add clocksource id for arm arch counter Add clocksource id to the ARM generic counter so that it can be easily identified from callers such as ptp_kvm. Cc: Mark Rutland Reviewed-by: Andre Przywara Signed-off-by: Jianyong Wu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201209060932.212364-6-jianyong.wu@arm.com --- include/linux/clocksource_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h index 4d8e19e05328..16775d7d8f8d 100644 --- a/include/linux/clocksource_ids.h +++ b/include/linux/clocksource_ids.h @@ -5,6 +5,7 @@ /* Enum to give clocksources a unique identifier */ enum clocksource_ids { CSID_GENERIC = 0, + CSID_ARM_ARCH_COUNTER, CSID_MAX, }; -- cgit v1.2.3 From 3bf725699bf62494b3e179f1795f08c7d749f061 Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Wed, 9 Dec 2020 14:09:29 +0800 Subject: KVM: arm64: Add support for the KVM PTP service Implement the hypervisor side of the KVM PTP interface. The service offers wall time and cycle count from host to guest. The caller must specify whether they want the host's view of either the virtual or physical counter. Signed-off-by: Jianyong Wu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201209060932.212364-7-jianyong.wu@arm.com --- include/linux/arm-smccc.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 1a27bd9493fe..6861489a1890 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -103,6 +103,7 @@ /* KVM "vendor specific" services */ #define ARM_SMCCC_KVM_FUNC_FEATURES 0 +#define ARM_SMCCC_KVM_FUNC_PTP 1 #define ARM_SMCCC_KVM_FUNC_FEATURES_2 127 #define ARM_SMCCC_KVM_NUM_FUNCS 128 @@ -114,6 +115,21 @@ #define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED 1 +/* + * ptp_kvm is a feature used for time sync between vm and host. + * ptp_kvm module in guest kernel will get service from host using + * this hypercall ID. + */ +#define ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_PTP) + +/* ptp_kvm counter type ID */ +#define KVM_PTP_VIRT_COUNTER 0 +#define KVM_PTP_PHYS_COUNTER 1 + /* Paravirtualised time calls (defined by ARM DEN0057A) */ #define ARM_SMCCC_HV_PV_TIME_FEATURES \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ -- cgit v1.2.3 From e84dff1bf0eaccd0231ecf02a8f5c9830d7d34dc Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Wed, 24 Mar 2021 15:29:11 +0800 Subject: PM: core: Remove duplicate declaration from header file struct device is declared twice, so remove the duplicate. Signed-off-by: Wan Jiabing Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 482313a8ccfc..c9657408fee1 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -39,7 +39,6 @@ static inline void pm_vt_switch_unregister(struct device *dev) * Device power management */ -struct device; #ifdef CONFIG_PM extern const char power_group_name[]; /* = "power" */ -- cgit v1.2.3 From 2ab80d46fead0309d7f190d8023c8d64b2ffcbd5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 29 Mar 2021 20:15:19 +0200 Subject: cpuidle: Use s64 as exit_latency_ns and target_residency_ns data type Subsequent changes will cause the exit_latency_ns and target_residency_ns fields in struct cpuidle_state to be used in computations in which data type conversions to u64 may turn a negative number close to zero into a verly large positive number leading to incorrect results. In preparation for that, change the data type of the fields mentioned above to s64, but ensure that they will not be negative themselves. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index bd605b5585cf..fce476275e16 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -49,8 +49,8 @@ struct cpuidle_state { char name[CPUIDLE_NAME_LEN]; char desc[CPUIDLE_DESC_LEN]; - u64 exit_latency_ns; - u64 target_residency_ns; + s64 exit_latency_ns; + s64 target_residency_ns; unsigned int flags; unsigned int exit_latency; /* in US */ int power_usage; /* in mW */ -- cgit v1.2.3 From b3084079c1779645222d5ea4e9da88d38026cae8 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 31 Mar 2021 13:57:12 +0300 Subject: clk: mux: provide devm_clk_hw_register_mux() Add devm_clk_hw_register_mux() - devres-managed version of clk_hw_register_mux(). Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/20210331105735.3690009-2-dmitry.baryshkov@linaro.org Signed-off-by: Rob Clark --- include/linux/clk-provider.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 58f6fe866ae9..3eb15e0262f5 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -868,6 +868,13 @@ struct clk_hw *__clk_hw_register_mux(struct device *dev, struct device_node *np, const struct clk_parent_data *parent_data, unsigned long flags, void __iomem *reg, u8 shift, u32 mask, u8 clk_mux_flags, u32 *table, spinlock_t *lock); +struct clk_hw *__devm_clk_hw_register_mux(struct device *dev, struct device_node *np, + const char *name, u8 num_parents, + const char * const *parent_names, + const struct clk_hw **parent_hws, + const struct clk_parent_data *parent_data, + unsigned long flags, void __iomem *reg, u8 shift, u32 mask, + u8 clk_mux_flags, u32 *table, spinlock_t *lock); struct clk *clk_register_mux_table(struct device *dev, const char *name, const char * const *parent_names, u8 num_parents, unsigned long flags, void __iomem *reg, u8 shift, u32 mask, @@ -902,6 +909,12 @@ struct clk *clk_register_mux_table(struct device *dev, const char *name, __clk_hw_register_mux((dev), NULL, (name), (num_parents), NULL, NULL, \ (parent_data), (flags), (reg), (shift), \ BIT((width)) - 1, (clk_mux_flags), NULL, (lock)) +#define devm_clk_hw_register_mux(dev, name, parent_names, num_parents, flags, reg, \ + shift, width, clk_mux_flags, lock) \ + __devm_clk_hw_register_mux((dev), NULL, (name), (num_parents), \ + (parent_names), NULL, NULL, (flags), (reg), \ + (shift), BIT((width)) - 1, (clk_mux_flags), \ + NULL, (lock)) int clk_mux_val_to_index(struct clk_hw *hw, u32 *table, unsigned int flags, unsigned int val); -- cgit v1.2.3 From f4b43ac0b0af1d115effd08133046a694ac33dc2 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 31 Mar 2021 13:57:13 +0300 Subject: clk: divider: add devm_clk_hw_register_divider Add devm_clk_hw_register_divider() - devres version of clk_hw_register_divider(). Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/20210331105735.3690009-3-dmitry.baryshkov@linaro.org Signed-off-by: Rob Clark --- include/linux/clk-provider.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 3eb15e0262f5..162a2e5546a3 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -785,6 +785,23 @@ struct clk *clk_register_divider_table(struct device *dev, const char *name, (parent_data), (flags), (reg), (shift), \ (width), (clk_divider_flags), (table), \ (lock)) +/** + * devm_clk_hw_register_divider - register a divider clock with the clock framework + * @dev: device registering this clock + * @name: name of this clock + * @parent_name: name of clock's parent + * @flags: framework-specific flags + * @reg: register address to adjust divider + * @shift: number of bits to shift the bitfield + * @width: width of the bitfield + * @clk_divider_flags: divider-specific flags for this clock + * @lock: shared register lock for this clock + */ +#define devm_clk_hw_register_divider(dev, name, parent_name, flags, reg, shift, \ + width, clk_divider_flags, lock) \ + __devm_clk_hw_register_divider((dev), NULL, (name), (parent_name), NULL, \ + NULL, (flags), (reg), (shift), (width), \ + (clk_divider_flags), NULL, (lock)) /** * devm_clk_hw_register_divider_table - register a table based divider clock * with the clock framework (devres variant) -- cgit v1.2.3 From 56f15e2cb1f77fbcf9df38de7e5dcb4b37070196 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Apr 2021 17:23:59 -0700 Subject: ethtool: document PHY tunable callbacks Add missing kdoc for phy tunable callbacks. Signed-off-by: Jakub Kicinski Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 3583f7fc075c..5c631a298994 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -410,6 +410,8 @@ struct ethtool_pause_stats { * @get_ethtool_phy_stats: Return extended statistics about the PHY device. * This is only useful if the device maintains PHY statistics and * cannot use the standard PHY library helpers. + * @get_phy_tunable: Read the value of a PHY tunable. + * @set_phy_tunable: Set the value of a PHY tunable. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must -- cgit v1.2.3 From f0ebc2b6b7df7716749445cda26734b3826a48cf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Apr 2021 17:28:25 -0700 Subject: ethtool: un-kdocify extended link state Extended link state structures and enums use kdoc headers but then do not describe any of the members. Convert to normal comments. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index ec4cd3921c67..a2b1a21ee7fd 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -87,9 +87,7 @@ u32 ethtool_op_get_link(struct net_device *dev); int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *eti); -/** - * struct ethtool_link_ext_state_info - link extended state and substate. - */ +/* Link extended state and substate. */ struct ethtool_link_ext_state_info { enum ethtool_link_ext_state link_ext_state; union { -- cgit v1.2.3 From d9c65de0c1e1574d2cc8007dbe02291fe47db1d9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Apr 2021 17:28:27 -0700 Subject: ethtool: fix kdoc in headers Fix remaining issues with kdoc in the ethtool headers. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index a2b1a21ee7fd..7c88dfff7420 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -290,6 +290,9 @@ struct ethtool_pause_stats { * do not attach ext_substate attribute to netlink message). If link_ext_state * and link_ext_substate are unknown, return -ENODATA. If not implemented, * link_ext_state and link_ext_substate will not be sent to userspace. + * @get_eeprom_len: Read range of EEPROM addresses for validation of + * @get_eeprom and @set_eeprom requests. + * Returns 0 if device does not support EEPROM access. * @get_eeprom: Read data from the device EEPROM. * Should fill in the magic field. Don't need to check len for zero * or wraparound. Fill in the data argument with the eeprom values @@ -382,6 +385,8 @@ struct ethtool_pause_stats { * @get_module_eeprom: Get the eeprom information from the plug-in module * @get_eee: Get Energy-Efficient (EEE) supported and status. * @set_eee: Set EEE status (enable/disable) as well as LPI timers. + * @get_tunable: Read the value of a driver / device tunable. + * @set_tunable: Set the value of a driver / device tunable. * @get_per_queue_coalesce: Get interrupt coalescing parameters per queue. * It must check that the given queue number is valid. If neither a RX nor * a TX queue has this number, return -EINVAL. If only a RX queue or a TX @@ -545,8 +550,8 @@ struct phy_tdr_config; * @get_sset_count: Get number of strings that @get_strings will write. * @get_strings: Return a set of strings that describe the requested objects * @get_stats: Return extended statistics about the PHY device. - * @start_cable_test - Start a cable test - * @start_cable_test_tdr - Start a Time Domain Reflectometry cable test + * @start_cable_test: Start a cable test + * @start_cable_test_tdr: Start a Time Domain Reflectometry cable test * * All operations are optional (i.e. the function pointer may be set to %NULL) * and callers must take this into account. Callers must hold the RTNL lock. -- cgit v1.2.3 From 2a3d15f270efa50d78d8a32d895e9d5396668f3a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 6 Apr 2021 16:40:26 -0300 Subject: vfio/mdev: Add missing typesafety around mdev_device The mdev API should accept and pass a 'struct mdev_device *' in all places, not pass a 'struct device *' and cast it internally with to_mdev_device(). Particularly in its struct mdev_driver functions, the whole point of a bus's struct device_driver wrapper is to provide type safety compared to the default struct device_driver. Further, the driver core standard is for bus drivers to expose their device structure in their public headers that can be used with container_of() inlines and '&foo->dev' to go between the class levels, and '&foo->dev' to be used with dev_err/etc driver core helper functions. Move 'struct mdev_device' to mdev.h Once done this allows moving some one instruction exported functions to static inlines, which in turns allows removing one of the two grotesque symbol_get()'s related to mdev in the core code. Reviewed-by: Kevin Tian Reviewed-by: Cornelia Huck Signed-off-by: Jason Gunthorpe Reviewed-by: Christoph Hellwig Message-Id: <3-v2-d36939638fc6+d54-vfio2_jgg@nvidia.com> Signed-off-by: Alex Williamson --- include/linux/mdev.h | 58 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 27eb383cb95d..52f7ea19dd0f 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -10,7 +10,21 @@ #ifndef MDEV_H #define MDEV_H -struct mdev_device; +struct mdev_device { + struct device dev; + struct mdev_parent *parent; + guid_t uuid; + void *driver_data; + struct list_head next; + struct kobject *type_kobj; + struct device *iommu_device; + bool active; +}; + +static inline struct mdev_device *to_mdev_device(struct device *dev) +{ + return container_of(dev, struct mdev_device, dev); +} /* * Called by the parent device driver to set the device which represents @@ -19,12 +33,17 @@ struct mdev_device; * * @dev: the mediated device that iommu will isolate. * @iommu_device: a pci device which represents the iommu for @dev. - * - * Return 0 for success, otherwise negative error value. */ -int mdev_set_iommu_device(struct device *dev, struct device *iommu_device); +static inline void mdev_set_iommu_device(struct mdev_device *mdev, + struct device *iommu_device) +{ + mdev->iommu_device = iommu_device; +} -struct device *mdev_get_iommu_device(struct device *dev); +static inline struct device *mdev_get_iommu_device(struct mdev_device *mdev) +{ + return mdev->iommu_device; +} /** * struct mdev_parent_ops - Structure to be registered for each parent device to @@ -126,16 +145,25 @@ struct mdev_type_attribute mdev_type_attr_##_name = \ **/ struct mdev_driver { const char *name; - int (*probe)(struct device *dev); - void (*remove)(struct device *dev); + int (*probe)(struct mdev_device *dev); + void (*remove)(struct mdev_device *dev); struct device_driver driver; }; #define to_mdev_driver(drv) container_of(drv, struct mdev_driver, driver) -void *mdev_get_drvdata(struct mdev_device *mdev); -void mdev_set_drvdata(struct mdev_device *mdev, void *data); -const guid_t *mdev_uuid(struct mdev_device *mdev); +static inline void *mdev_get_drvdata(struct mdev_device *mdev) +{ + return mdev->driver_data; +} +static inline void mdev_set_drvdata(struct mdev_device *mdev, void *data) +{ + mdev->driver_data = data; +} +static inline const guid_t *mdev_uuid(struct mdev_device *mdev) +{ + return &mdev->uuid; +} extern struct bus_type mdev_bus_type; @@ -146,7 +174,13 @@ int mdev_register_driver(struct mdev_driver *drv, struct module *owner); void mdev_unregister_driver(struct mdev_driver *drv); struct device *mdev_parent_dev(struct mdev_device *mdev); -struct device *mdev_dev(struct mdev_device *mdev); -struct mdev_device *mdev_from_dev(struct device *dev); +static inline struct device *mdev_dev(struct mdev_device *mdev) +{ + return &mdev->dev; +} +static inline struct mdev_device *mdev_from_dev(struct device *dev) +{ + return dev->bus == &mdev_bus_type ? to_mdev_device(dev) : NULL; +} #endif /* MDEV_H */ -- cgit v1.2.3 From 91b9969d9c6bb7c02253bbfc536bfd892f636fdc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 6 Apr 2021 16:40:27 -0300 Subject: vfio/mdev: Simplify driver registration This is only done once, we don't need to generate code to initialize a structure stored in the ELF .data segment. Fill in the three required .driver members directly instead of copying data into them during mdev_register_driver(). Further the to_mdev_driver() function doesn't belong in a public header, just inline it into the two places that need it. Finally, we can now clearly see that 'drv' derived from dev->driver cannot be NULL, firstly because the driver core forbids it, and secondly because NULL won't pass through the container_of(). Remove the dead code. Reviewed-by: Christoph Hellwig Reviewed-by: Kevin Tian Reviewed-by: Cornelia Huck Signed-off-by: Jason Gunthorpe Message-Id: <4-v2-d36939638fc6+d54-vfio2_jgg@nvidia.com> Signed-off-by: Alex Williamson --- include/linux/mdev.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 52f7ea19dd0f..cb771c712da0 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -137,21 +137,17 @@ struct mdev_type_attribute mdev_type_attr_##_name = \ /** * struct mdev_driver - Mediated device driver - * @name: driver name * @probe: called when new device created * @remove: called when device removed * @driver: device driver structure * **/ struct mdev_driver { - const char *name; int (*probe)(struct mdev_device *dev); void (*remove)(struct mdev_device *dev); struct device_driver driver; }; -#define to_mdev_driver(drv) container_of(drv, struct mdev_driver, driver) - static inline void *mdev_get_drvdata(struct mdev_device *mdev) { return mdev->driver_data; @@ -170,7 +166,7 @@ extern struct bus_type mdev_bus_type; int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops); void mdev_unregister_device(struct device *dev); -int mdev_register_driver(struct mdev_driver *drv, struct module *owner); +int mdev_register_driver(struct mdev_driver *drv); void mdev_unregister_driver(struct mdev_driver *drv); struct device *mdev_parent_dev(struct mdev_device *mdev); -- cgit v1.2.3 From 417fd5bf242d7691c15fe0bd705ab76c69276572 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 6 Apr 2021 16:40:28 -0300 Subject: vfio/mdev: Use struct mdev_type in struct mdev_device The kobj pointer in mdev_device is actually pointing at a struct mdev_type. Use the proper type so things are understandable. There are a number of places that are confused and passing both the mdev and the mtype as function arguments, fix these to derive the mtype directly from the mdev to remove the redundancy. Reviewed-by: Christoph Hellwig Reviewed-by: Kevin Tian Reviewed-by: Cornelia Huck Signed-off-by: Jason Gunthorpe Message-Id: <5-v2-d36939638fc6+d54-vfio2_jgg@nvidia.com> Signed-off-by: Alex Williamson --- include/linux/mdev.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mdev.h b/include/linux/mdev.h index cb771c712da0..349e8ac1fe33 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -10,13 +10,15 @@ #ifndef MDEV_H #define MDEV_H +struct mdev_type; + struct mdev_device { struct device dev; struct mdev_parent *parent; guid_t uuid; void *driver_data; struct list_head next; - struct kobject *type_kobj; + struct mdev_type *type; struct device *iommu_device; bool active; }; -- cgit v1.2.3 From fbea43239074e16c91048f5ce70378664efbdb99 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 6 Apr 2021 16:40:33 -0300 Subject: vfio/mdev: Remove duplicate storage of parent in mdev_device mdev_device->type->parent is the same thing. The struct mdev_device was relying on the kref on the mdev_parent to also indirectly hold a kref on the mdev_type pointer. Now that the type holds a kref on the parent we can directly kref the mdev_type and remove this implicit relationship. Reviewed-by: Christoph Hellwig Reviewed-by: Cornelia Huck Signed-off-by: Jason Gunthorpe Message-Id: <10-v2-d36939638fc6+d54-vfio2_jgg@nvidia.com> Signed-off-by: Alex Williamson --- include/linux/mdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 349e8ac1fe33..fb582adda28a 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -14,7 +14,6 @@ struct mdev_type; struct mdev_device { struct device dev; - struct mdev_parent *parent; guid_t uuid; void *driver_data; struct list_head next; -- cgit v1.2.3 From 15fcc44be0c7afa2945b1896a96ac2ddf09f1fa7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 6 Apr 2021 16:40:34 -0300 Subject: vfio/mdev: Add mdev/mtype_get_type_group_id() This returns the index in the supported_type_groups array that is associated with the mdev_type attached to the struct mdev_device or its containing struct kobject. Each mdev_device can be spawned from exactly one mdev_type, which in turn originates from exactly one supported_type_group. Drivers are using weird string calculations to try and get back to this index, providing a direct access to the index removes a bunch of wonky driver code. mdev_type->group can be deleted as the group is obtained using the type_group_id. Reviewed-by: Christoph Hellwig Reviewed-by: Kevin Tian Reviewed-by: Cornelia Huck Signed-off-by: Jason Gunthorpe Message-Id: <11-v2-d36939638fc6+d54-vfio2_jgg@nvidia.com> Signed-off-by: Alex Williamson --- include/linux/mdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdev.h b/include/linux/mdev.h index fb582adda28a..41e919365223 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -46,6 +46,9 @@ static inline struct device *mdev_get_iommu_device(struct mdev_device *mdev) return mdev->iommu_device; } +unsigned int mdev_get_type_group_id(struct mdev_device *mdev); +unsigned int mtype_get_type_group_id(struct kobject *mtype_kobj); + /** * struct mdev_parent_ops - Structure to be registered for each parent device to * register the device to mdev module. -- cgit v1.2.3 From 0854fa82c96ca37a35e954b7079c0bfd795affb1 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 6 Apr 2021 23:40:51 -0700 Subject: net: remove the new_ifindex argument from dev_change_net_namespace Here is only one place where we want to specify new_ifindex. In all other cases, callers pass 0 as new_ifindex. It looks reasonable to add a low-level function with new_ifindex and to convert dev_change_net_namespace to a static inline wrapper. Fixes: eeb85a14ee34 ("net: Allow to specify ifindex when device is moved to another namespace") Suggested-by: Jakub Kicinski Signed-off-by: Andrei Vagin Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b482236c0e99..5cbc950b34df 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4026,8 +4026,14 @@ void __dev_notify_flags(struct net_device *, unsigned int old_flags, int dev_change_name(struct net_device *, const char *); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); +int __dev_change_net_namespace(struct net_device *dev, struct net *net, + const char *pat, int new_ifindex); +static inline int dev_change_net_namespace(struct net_device *dev, struct net *net, - const char *pat, int new_ifindex); + const char *pat) +{ + return __dev_change_net_namespace(dev, net, pat, 0); +} int __dev_set_mtu(struct net_device *, int); int dev_validate_mtu(struct net_device *dev, int mtu, struct netlink_ext_ack *extack); -- cgit v1.2.3 From a975d7d8a356ce92872af0b007b101183f0224e2 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Wed, 7 Apr 2021 13:06:51 +0300 Subject: ethtool: Remove link_mode param and derive link params from driver Some drivers clear the 'ethtool_link_ksettings' struct in their get_link_ksettings() callback, before populating it with actual values. Such drivers will set the new 'link_mode' field to zero, resulting in user space receiving wrong link mode information given that zero is a valid value for the field. Another problem is that some drivers (notably tun) can report random values in the 'link_mode' field. This can result in a general protection fault when the field is used as an index to the 'link_mode_params' array [1]. This happens because such drivers implement their set_link_ksettings() callback by simply overwriting their private copy of 'ethtool_link_ksettings' struct with the one they get from the stack, which is not always properly initialized. Fix these problems by removing 'link_mode' from 'ethtool_link_ksettings' and instead have drivers call ethtool_params_from_link_mode() with the current link mode. The function will derive the link parameters (e.g., speed) from the link mode and fill them in the 'ethtool_link_ksettings' struct. v3: * Remove link_mode parameter and derive the link parameters in the driver instead of passing link_mode parameter to ethtool and derive it there. v2: * Introduce 'cap_link_mode_supported' instead of adding a validity field to 'ethtool_link_ksettings' struct. [1] general protection fault, probably for non-canonical address 0xdffffc00f14cc32c: 0000 [#1] PREEMPT SMP KASAN KASAN: probably user-memory-access in range [0x000000078a661960-0x000000078a661967] CPU: 0 PID: 8452 Comm: syz-executor360 Not tainted 5.11.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__ethtool_get_link_ksettings+0x1a3/0x3a0 net/ethtool/ioctl.c:446 Code: b7 3e fa 83 fd ff 0f 84 30 01 00 00 e8 16 b0 3e fa 48 8d 3c ed 60 d5 69 8a 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 +38 d0 7c 08 84 d2 0f 85 b9 RSP: 0018:ffffc900019df7a0 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff888026136008 RCX: 0000000000000000 RDX: 00000000f14cc32c RSI: ffffffff873439ca RDI: 000000078a661960 RBP: 00000000ffff8880 R08: 00000000ffffffff R09: ffff88802613606f R10: ffffffff873439bc R11: 0000000000000000 R12: 0000000000000000 R13: ffff88802613606c R14: ffff888011d0c210 R15: ffff888011d0c210 FS: 0000000000749300(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004b60f0 CR3: 00000000185c2000 CR4: 00000000001506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: linkinfo_prepare_data+0xfd/0x280 net/ethtool/linkinfo.c:37 ethnl_default_notify+0x1dc/0x630 net/ethtool/netlink.c:586 ethtool_notify+0xbd/0x1f0 net/ethtool/netlink.c:656 ethtool_set_link_ksettings+0x277/0x330 net/ethtool/ioctl.c:620 dev_ethtool+0x2b35/0x45d0 net/ethtool/ioctl.c:2842 dev_ioctl+0x463/0xb70 net/core/dev_ioctl.c:440 sock_do_ioctl+0x148/0x2d0 net/socket.c:1060 sock_ioctl+0x477/0x6a0 net/socket.c:1177 vfs_ioctl fs/ioctl.c:48 [inline] __do_sys_ioctl fs/ioctl.c:753 [inline] __se_sys_ioctl fs/ioctl.c:739 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:739 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: c8907043c6ac9 ("ethtool: Get link mode in use instead of speed and duplex parameters") Signed-off-by: Danielle Ratson Reported-by: Eric Dumazet Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/ethtool.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 7c88dfff7420..cdca84e6dd6b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -127,7 +127,6 @@ struct ethtool_link_ksettings { __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); } link_modes; u32 lanes; - enum ethtool_link_mode_bit_indices link_mode; }; /** @@ -574,4 +573,12 @@ struct ethtool_phy_ops { */ void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops); +/* + * ethtool_params_from_link_mode - Derive link parameters from a given link mode + * @link_ksettings: Link parameters to be derived from the link mode + * @link_mode: Link mode + */ +void +ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, + enum ethtool_link_mode_bit_indices link_mode); #endif /* _LINUX_ETHTOOL_H */ -- cgit v1.2.3 From 01fd45f676f1b3785b7cdd5d815f9c31ddcd9dd1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 7 Apr 2021 12:39:21 +0200 Subject: USB: serial: add generic support for TIOCSSERIAL TIOCSSERIAL is a horrid, underspecified, legacy interface which for most serial devices is only useful for setting the close_delay and closing_wait parameters. The closing_wait parameter determines how long to wait for the transfer buffers to drain during close and the default timeout of 30 seconds may not be sufficient at low line speeds. In other cases, when for example flow is stopped, the default timeout may instead be too long. Add generic support for TIOCSSERIAL and TIOCGSERIAL with handling of the three common parameters close_delay, closing_wait and line for the benefit of all USB serial drivers while still allowing drivers to implement further functionality through the existing callbacks. This currently includes a few drivers that report their base baud clock rate even if that is really only of interest when setting custom divisors through the deprecated ASYNC_SPD_CUST interface; an interface which only the FTDI driver actually implements. Some drivers have also been reporting back a fake UART type, something which should no longer be needed and will be dropped by a follow-on patch. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- include/linux/usb/serial.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index e9b90577f50b..8c63fa9bfc74 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -279,7 +279,7 @@ struct usb_serial_driver { int (*write_room)(struct tty_struct *tty); int (*ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); - int (*get_serial)(struct tty_struct *tty, struct serial_struct *ss); + void (*get_serial)(struct tty_struct *tty, struct serial_struct *ss); int (*set_serial)(struct tty_struct *tty, struct serial_struct *ss); void (*set_termios)(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old); -- cgit v1.2.3 From fd921693fe989afe82600d97b37f54c942a6db6c Mon Sep 17 00:00:00 2001 From: David Stevens Date: Thu, 8 Apr 2021 18:54:28 +0900 Subject: drm/syncobj: use newly allocated stub fences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allocate a new private stub fence in drm_syncobj_assign_null_handle, instead of using a static stub fence. When userspace creates a fence with DRM_SYNCOBJ_CREATE_SIGNALED or when userspace signals a fence via DRM_IOCTL_SYNCOBJ_SIGNAL, the timestamp obtained when the fence is exported and queried with SYNC_IOC_FILE_INFO should match when the fence's status was changed from the perspective of userspace, which is during the respective ioctl. When a static stub fence started being used in by these ioctls, this behavior changed. Instead, the timestamp returned by SYNC_IOC_FILE_INFO became the first time anything used the static stub fence, which has no meaning to userspace. Signed-off-by: David Stevens Link: https://patchwork.freedesktop.org/patch/msgid/20210408095428.3983055-1-stevensd@google.com Reviewed-by: Christian König Signed-off-by: Christian König --- include/linux/dma-fence.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 9f12efaaa93a..6ffb4b2c6371 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -587,6 +587,7 @@ static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr) } struct dma_fence *dma_fence_get_stub(void); +struct dma_fence *dma_fence_allocate_private_stub(void); u64 dma_fence_context_alloc(unsigned num); #define DMA_FENCE_TRACE(f, fmt, args...) \ -- cgit v1.2.3 From 7c566bb5e4d5fb0d89579a90d8a1f54eaff6f95d Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 11 Feb 2021 21:35:46 +0900 Subject: asm-generic/io.h: Add a non-posted variant of ioremap() ARM64 currently defaults to posted MMIO (nGnRE), but some devices require the use of non-posted MMIO (nGnRnE). Introduce a new ioremap() variant to handle this case. ioremap_np() returns NULL on arches that do not implement this variant. sparc64 is the only architecture that needs to be touched directly, because it includes neither of the generic io.h or iomap.h headers. This adds the IORESOURCE_MEM_NONPOSTED flag, which maps to this variant and marks a given resource as requiring non-posted mappings. This is implemented in the resource system because it is a SoC-level requirement, so existing drivers do not need special-case code to pick this ioremap variant. Then this is implemented in devres by introducing devm_ioremap_np(), and making devm_ioremap_resource() automatically select this variant when the resource has the IORESOURCE_MEM_NONPOSTED flag set. Acked-by: Marc Zyngier Signed-off-by: Hector Martin --- include/linux/io.h | 2 ++ include/linux/ioport.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io.h b/include/linux/io.h index 8394c56babc2..d718354ed3e1 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -68,6 +68,8 @@ void __iomem *devm_ioremap_uc(struct device *dev, resource_size_t offset, resource_size_t size); void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset, resource_size_t size); +void __iomem *devm_ioremap_np(struct device *dev, resource_size_t offset, + resource_size_t size); void devm_iounmap(struct device *dev, void __iomem *addr); int check_signature(const volatile void __iomem *io_addr, const unsigned char *signature, int length); diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 55de385c839c..1de6c2e40c32 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -108,6 +108,7 @@ struct resource { #define IORESOURCE_MEM_32BIT (3<<3) #define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */ #define IORESOURCE_MEM_EXPANSIONROM (1<<6) +#define IORESOURCE_MEM_NONPOSTED (1<<7) /* PnP I/O specific bits (IORESOURCE_BITS) */ #define IORESOURCE_IO_16BIT_ADDR (1<<0) -- cgit v1.2.3 From b10eb2d50911f98a8f1cacf00b1b677339593f4c Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 25 Mar 2021 22:50:19 +0900 Subject: asm-generic/io.h: implement pci_remap_cfgspace using ioremap_np Now that we have ioremap_np(), we can make pci_remap_cfgspace() default to it, falling back to ioremap() on platforms where it is not available. Remove the arm64 implementation, since that is now redundant. Future cleanups should be able to do the same for other arches, and eventually make the generic pci_remap_cfgspace() unconditional. Acked-by: Will Deacon Signed-off-by: Hector Martin --- include/linux/io.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io.h b/include/linux/io.h index d718354ed3e1..61ff7d6278b6 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -82,20 +82,20 @@ void devm_memunmap(struct device *dev, void *addr); #ifdef CONFIG_PCI /* * The PCI specifications (Rev 3.0, 3.2.5 "Transaction Ordering and - * Posting") mandate non-posted configuration transactions. There is - * no ioremap API in the kernel that can guarantee non-posted write - * semantics across arches so provide a default implementation for - * mapping PCI config space that defaults to ioremap(); arches - * should override it if they have memory mapping implementations that - * guarantee non-posted writes semantics to make the memory mapping - * compliant with the PCI specification. + * Posting") mandate non-posted configuration transactions. This default + * implementation attempts to use the ioremap_np() API to provide this + * on arches that support it, and falls back to ioremap() on those that + * don't. Overriding this function is deprecated; arches that properly + * support non-posted accesses should implement ioremap_np() instead, which + * this default implementation can then use to return mappings compliant with + * the PCI specification. */ #ifndef pci_remap_cfgspace #define pci_remap_cfgspace pci_remap_cfgspace static inline void __iomem *pci_remap_cfgspace(phys_addr_t offset, size_t size) { - return ioremap(offset, size); + return ioremap_np(offset, size) ?: ioremap(offset, size); } #endif #endif -- cgit v1.2.3 From 8a657f71705f9f9c2bf8308e2cfd57b9f329e0d9 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Mon, 1 Mar 2021 12:36:24 +0900 Subject: arm64: Move ICH_ sysreg bits from arm-gic-v3.h to sysreg.h These definitions are in arm-gic-v3.h for historical reasons which no longer apply. Move them to sysreg.h so the AIC driver can use them, as it needs to peek into vGIC registers to deal with the GIC maintentance interrupt. Acked-by: Marc Zyngier Acked-by: Will Deacon Signed-off-by: Hector Martin --- include/linux/irqchip/arm-gic-v3.h | 56 -------------------------------------- 1 file changed, 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index f6d092fdb93d..81cbf85f73de 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -575,67 +575,11 @@ #define ICC_SRE_EL1_DFB (1U << 1) #define ICC_SRE_EL1_SRE (1U << 0) -/* - * Hypervisor interface registers (SRE only) - */ -#define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1) - -#define ICH_LR_EOI (1ULL << 41) -#define ICH_LR_GROUP (1ULL << 60) -#define ICH_LR_HW (1ULL << 61) -#define ICH_LR_STATE (3ULL << 62) -#define ICH_LR_PENDING_BIT (1ULL << 62) -#define ICH_LR_ACTIVE_BIT (1ULL << 63) -#define ICH_LR_PHYS_ID_SHIFT 32 -#define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) -#define ICH_LR_PRIORITY_SHIFT 48 -#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) - /* These are for GICv2 emulation only */ #define GICH_LR_VIRTUALID (0x3ffUL << 0) #define GICH_LR_PHYSID_CPUID_SHIFT (10) #define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) -#define ICH_MISR_EOI (1 << 0) -#define ICH_MISR_U (1 << 1) - -#define ICH_HCR_EN (1 << 0) -#define ICH_HCR_UIE (1 << 1) -#define ICH_HCR_NPIE (1 << 3) -#define ICH_HCR_TC (1 << 10) -#define ICH_HCR_TALL0 (1 << 11) -#define ICH_HCR_TALL1 (1 << 12) -#define ICH_HCR_EOIcount_SHIFT 27 -#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) - -#define ICH_VMCR_ACK_CTL_SHIFT 2 -#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) -#define ICH_VMCR_FIQ_EN_SHIFT 3 -#define ICH_VMCR_FIQ_EN_MASK (1 << ICH_VMCR_FIQ_EN_SHIFT) -#define ICH_VMCR_CBPR_SHIFT 4 -#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) -#define ICH_VMCR_EOIM_SHIFT 9 -#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT) -#define ICH_VMCR_BPR1_SHIFT 18 -#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) -#define ICH_VMCR_BPR0_SHIFT 21 -#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) -#define ICH_VMCR_PMR_SHIFT 24 -#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) -#define ICH_VMCR_ENG0_SHIFT 0 -#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT) -#define ICH_VMCR_ENG1_SHIFT 1 -#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) - -#define ICH_VTR_PRI_BITS_SHIFT 29 -#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT) -#define ICH_VTR_ID_BITS_SHIFT 23 -#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT) -#define ICH_VTR_SEIS_SHIFT 22 -#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) -#define ICH_VTR_A3V_SHIFT 21 -#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) - #define ICC_IAR1_EL1_SPURIOUS 0x3ff #define ICC_SRE_EL2_SRE (1 << 0) -- cgit v1.2.3 From 76cde26394114f6af2710c6b2ad6854f1e8ee859 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 21 Jan 2021 08:55:15 +0900 Subject: irqchip/apple-aic: Add support for the Apple Interrupt Controller This is the root interrupt controller used on Apple ARM SoCs such as the M1. This irqchip driver performs multiple functions: * Handles both IRQs and FIQs * Drives the AIC peripheral itself (which handles IRQs) * Dispatches FIQs to downstream hard-wired clients (currently the ARM timer). * Implements a virtual IPI multiplexer to funnel multiple Linux IPIs into a single hardware IPI Reviewed-by: Marc Zyngier Acked-by: Will Deacon Signed-off-by: Hector Martin --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index f14adb882338..f56eee992c75 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -100,6 +100,7 @@ enum cpuhp_state { CPUHP_AP_CPU_PM_STARTING, CPUHP_AP_IRQ_GIC_STARTING, CPUHP_AP_IRQ_HIP04_STARTING, + CPUHP_AP_IRQ_APPLE_AIC_STARTING, CPUHP_AP_IRQ_ARMADA_XP_STARTING, CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_IRQ_MIPS_GIC_STARTING, -- cgit v1.2.3 From 0d66ccc1627013c95f1e7ef10b95b8451cd7834e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 1 Apr 2021 16:23:42 -0700 Subject: jump_label: Provide CONFIG-driven build state defaults As shown in the comment in jump_label.h, choosing the initial state of static branches changes the assembly layout. If the condition is expected to be likely it's inline, and if unlikely it is out of line via a jump. A few places in the kernel use (or could be using) a CONFIG to choose the default state, which would give a small performance benefit to their compile-time declared default. Provide the infrastructure to do this. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210401232347.2791257-2-keescook@chromium.org --- include/linux/jump_label.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index d92691262f51..05f5554d860f 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -382,6 +382,21 @@ struct static_key_false { [0 ... (count) - 1] = STATIC_KEY_FALSE_INIT, \ } +#define _DEFINE_STATIC_KEY_1(name) DEFINE_STATIC_KEY_TRUE(name) +#define _DEFINE_STATIC_KEY_0(name) DEFINE_STATIC_KEY_FALSE(name) +#define DEFINE_STATIC_KEY_MAYBE(cfg, name) \ + __PASTE(_DEFINE_STATIC_KEY_, IS_ENABLED(cfg))(name) + +#define _DEFINE_STATIC_KEY_RO_1(name) DEFINE_STATIC_KEY_TRUE_RO(name) +#define _DEFINE_STATIC_KEY_RO_0(name) DEFINE_STATIC_KEY_FALSE_RO(name) +#define DEFINE_STATIC_KEY_MAYBE_RO(cfg, name) \ + __PASTE(_DEFINE_STATIC_KEY_RO_, IS_ENABLED(cfg))(name) + +#define _DECLARE_STATIC_KEY_1(name) DECLARE_STATIC_KEY_TRUE(name) +#define _DECLARE_STATIC_KEY_0(name) DECLARE_STATIC_KEY_FALSE(name) +#define DECLARE_STATIC_KEY_MAYBE(cfg, name) \ + __PASTE(_DECLARE_STATIC_KEY_, IS_ENABLED(cfg))(name) + extern bool ____wrong_branch_error(void); #define static_key_enabled(x) \ @@ -482,6 +497,10 @@ extern bool ____wrong_branch_error(void); #endif /* CONFIG_JUMP_LABEL */ +#define static_branch_maybe(config, x) \ + (IS_ENABLED(config) ? static_branch_likely(x) \ + : static_branch_unlikely(x)) + /* * Advanced usage; refcount, branch is enabled when: count != 0 */ -- cgit v1.2.3 From 51cba1ebc60df9c4ce034a9f5441169c0d0956c0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 1 Apr 2021 16:23:43 -0700 Subject: init_on_alloc: Optimize static branches The state of CONFIG_INIT_ON_ALLOC_DEFAULT_ON (and ...ON_FREE...) did not change the assembly ordering of the static branches: they were always out of line. Use the new jump_label macros to check the CONFIG settings to default to the "expected" state, which slightly optimizes the resulting assembly code. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Reviewed-by: Alexander Potapenko Acked-by: Vlastimil Babka Link: https://lore.kernel.org/r/20210401232347.2791257-3-keescook@chromium.org --- include/linux/mm.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8ba434287387..616dcaf08d99 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2904,18 +2904,20 @@ static inline void kernel_poison_pages(struct page *page, int numpages) { } static inline void kernel_unpoison_pages(struct page *page, int numpages) { } #endif -DECLARE_STATIC_KEY_FALSE(init_on_alloc); +DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc); static inline bool want_init_on_alloc(gfp_t flags) { - if (static_branch_unlikely(&init_on_alloc)) + if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, + &init_on_alloc)) return true; return flags & __GFP_ZERO; } -DECLARE_STATIC_KEY_FALSE(init_on_free); +DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free); static inline bool want_init_on_free(void) { - return static_branch_unlikely(&init_on_free); + return static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON, + &init_on_free); } extern bool _debug_pagealloc_enabled_early; -- cgit v1.2.3 From 39218ff4c625dbf2e68224024fe0acaa60bcd51a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 1 Apr 2021 16:23:44 -0700 Subject: stack: Optionally randomize kernel stack offset each syscall This provides the ability for architectures to enable kernel stack base address offset randomization. This feature is controlled by the boot param "randomize_kstack_offset=on/off", with its default value set by CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT. This feature is based on the original idea from the last public release of PaX's RANDKSTACK feature: https://pax.grsecurity.net/docs/randkstack.txt All the credit for the original idea goes to the PaX team. Note that the design and implementation of this upstream randomize_kstack_offset feature differs greatly from the RANDKSTACK feature (see below). Reasoning for the feature: This feature aims to make harder the various stack-based attacks that rely on deterministic stack structure. We have had many such attacks in past (just to name few): https://jon.oberheide.org/files/infiltrate12-thestackisback.pdf https://jon.oberheide.org/files/stackjacking-infiltrate11.pdf https://googleprojectzero.blogspot.com/2016/06/exploiting-recursion-in-linux-kernel_20.html As Linux kernel stack protections have been constantly improving (vmap-based stack allocation with guard pages, removal of thread_info, STACKLEAK), attackers have had to find new ways for their exploits to work. They have done so, continuing to rely on the kernel's stack determinism, in situations where VMAP_STACK and THREAD_INFO_IN_TASK_STRUCT were not relevant. For example, the following recent attacks would have been hampered if the stack offset was non-deterministic between syscalls: https://repositorio-aberto.up.pt/bitstream/10216/125357/2/374717.pdf (page 70: targeting the pt_regs copy with linear stack overflow) https://a13xp0p0v.github.io/2020/02/15/CVE-2019-18683.html (leaked stack address from one syscall as a target during next syscall) The main idea is that since the stack offset is randomized on each system call, it is harder for an attack to reliably land in any particular place on the thread stack, even with address exposures, as the stack base will change on the next syscall. Also, since randomization is performed after placing pt_regs, the ptrace-based approach[1] to discover the randomized offset during a long-running syscall should not be possible. Design description: During most of the kernel's execution, it runs on the "thread stack", which is pretty deterministic in its structure: it is fixed in size, and on every entry from userspace to kernel on a syscall the thread stack starts construction from an address fetched from the per-cpu cpu_current_top_of_stack variable. The first element to be pushed to the thread stack is the pt_regs struct that stores all required CPU registers and syscall parameters. Finally the specific syscall function is called, with the stack being used as the kernel executes the resulting request. The goal of randomize_kstack_offset feature is to add a random offset after the pt_regs has been pushed to the stack and before the rest of the thread stack is used during the syscall processing, and to change it every time a process issues a syscall. The source of randomness is currently architecture-defined (but x86 is using the low byte of rdtsc()). Future improvements for different entropy sources is possible, but out of scope for this patch. Further more, to add more unpredictability, new offsets are chosen at the end of syscalls (the timing of which should be less easy to measure from userspace than at syscall entry time), and stored in a per-CPU variable, so that the life of the value does not stay explicitly tied to a single task. As suggested by Andy Lutomirski, the offset is added using alloca() and an empty asm() statement with an output constraint, since it avoids changes to assembly syscall entry code, to the unwinder, and provides correct stack alignment as defined by the compiler. In order to make this available by default with zero performance impact for those that don't want it, it is boot-time selectable with static branches. This way, if the overhead is not wanted, it can just be left turned off with no performance impact. The generated assembly for x86_64 with GCC looks like this: ... ffffffff81003977: 65 8b 05 02 ea 00 7f mov %gs:0x7f00ea02(%rip),%eax # 12380 ffffffff8100397e: 25 ff 03 00 00 and $0x3ff,%eax ffffffff81003983: 48 83 c0 0f add $0xf,%rax ffffffff81003987: 25 f8 07 00 00 and $0x7f8,%eax ffffffff8100398c: 48 29 c4 sub %rax,%rsp ffffffff8100398f: 48 8d 44 24 0f lea 0xf(%rsp),%rax ffffffff81003994: 48 83 e0 f0 and $0xfffffffffffffff0,%rax ... As a result of the above stack alignment, this patch introduces about 5 bits of randomness after pt_regs is spilled to the thread stack on x86_64, and 6 bits on x86_32 (since its has 1 fewer bit required for stack alignment). The amount of entropy could be adjusted based on how much of the stack space we wish to trade for security. My measure of syscall performance overhead (on x86_64): lmbench: /usr/lib/lmbench/bin/x86_64-linux-gnu/lat_syscall -N 10000 null randomize_kstack_offset=y Simple syscall: 0.7082 microseconds randomize_kstack_offset=n Simple syscall: 0.7016 microseconds So, roughly 0.9% overhead growth for a no-op syscall, which is very manageable. And for people that don't want this, it's off by default. There are two gotchas with using the alloca() trick. First, compilers that have Stack Clash protection (-fstack-clash-protection) enabled by default (e.g. Ubuntu[3]) add pagesize stack probes to any dynamic stack allocations. While the randomization offset is always less than a page, the resulting assembly would still contain (unreachable!) probing routines, bloating the resulting assembly. To avoid this, -fno-stack-clash-protection is unconditionally added to the kernel Makefile since this is the only dynamic stack allocation in the kernel (now that VLAs have been removed) and it is provably safe from Stack Clash style attacks. The second gotcha with alloca() is a negative interaction with -fstack-protector*, in that it sees the alloca() as an array allocation, which triggers the unconditional addition of the stack canary function pre/post-amble which slows down syscalls regardless of the static branch. In order to avoid adding this unneeded check and its associated performance impact, architectures need to carefully remove uses of -fstack-protector-strong (or -fstack-protector) in the compilation units that use the add_random_kstack() macro and to audit the resulting stack mitigation coverage (to make sure no desired coverage disappears). No change is visible for this on x86 because the stack protector is already unconditionally disabled for the compilation unit, but the change is required on arm64. There is, unfortunately, no attribute that can be used to disable stack protector for specific functions. Comparison to PaX RANDKSTACK feature: The RANDKSTACK feature randomizes the location of the stack start (cpu_current_top_of_stack), i.e. including the location of pt_regs structure itself on the stack. Initially this patch followed the same approach, but during the recent discussions[2], it has been determined to be of a little value since, if ptrace functionality is available for an attacker, they can use PTRACE_PEEKUSR/PTRACE_POKEUSR to read/write different offsets in the pt_regs struct, observe the cache behavior of the pt_regs accesses, and figure out the random stack offset. Another difference is that the random offset is stored in a per-cpu variable, rather than having it be per-thread. As a result, these implementations differ a fair bit in their implementation details and results, though obviously the intent is similar. [1] https://lore.kernel.org/kernel-hardening/2236FBA76BA1254E88B949DDB74E612BA4BC57C1@IRSMSX102.ger.corp.intel.com/ [2] https://lore.kernel.org/kernel-hardening/20190329081358.30497-1-elena.reshetova@intel.com/ [3] https://lists.ubuntu.com/archives/ubuntu-devel/2019-June/040741.html Co-developed-by: Elena Reshetova Signed-off-by: Elena Reshetova Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210401232347.2791257-4-keescook@chromium.org --- include/linux/randomize_kstack.h | 54 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 include/linux/randomize_kstack.h (limited to 'include/linux') diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h new file mode 100644 index 000000000000..fd80fab663a9 --- /dev/null +++ b/include/linux/randomize_kstack.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _LINUX_RANDOMIZE_KSTACK_H +#define _LINUX_RANDOMIZE_KSTACK_H + +#include +#include +#include + +DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, + randomize_kstack_offset); +DECLARE_PER_CPU(u32, kstack_offset); + +/* + * Do not use this anywhere else in the kernel. This is used here because + * it provides an arch-agnostic way to grow the stack with correct + * alignment. Also, since this use is being explicitly masked to a max of + * 10 bits, stack-clash style attacks are unlikely. For more details see + * "VLAs" in Documentation/process/deprecated.rst + */ +void *__builtin_alloca(size_t size); +/* + * Use, at most, 10 bits of entropy. We explicitly cap this to keep the + * "VLA" from being unbounded (see above). 10 bits leaves enough room for + * per-arch offset masks to reduce entropy (by removing higher bits, since + * high entropy may overly constrain usable stack space), and for + * compiler/arch-specific stack alignment to remove the lower bits. + */ +#define KSTACK_OFFSET_MAX(x) ((x) & 0x3FF) + +/* + * These macros must be used during syscall entry when interrupts and + * preempt are disabled, and after user registers have been stored to + * the stack. + */ +#define add_random_kstack_offset() do { \ + if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ + &randomize_kstack_offset)) { \ + u32 offset = raw_cpu_read(kstack_offset); \ + u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset)); \ + /* Keep allocation even after "ptr" loses scope. */ \ + asm volatile("" : "=o"(*ptr) :: "memory"); \ + } \ +} while (0) + +#define choose_random_kstack_offset(rand) do { \ + if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ + &randomize_kstack_offset)) { \ + u32 offset = raw_cpu_read(kstack_offset); \ + offset ^= (rand); \ + raw_cpu_write(kstack_offset, offset); \ + } \ +} while (0) + +#endif -- cgit v1.2.3 From 794aaf01444d4e765e2b067cba01cc69c1c68ed9 Mon Sep 17 00:00:00 2001 From: "William A. Kennington III" Date: Wed, 7 Apr 2021 02:55:27 -0700 Subject: spi: Fix use-after-free with devm_spi_alloc_* We can't rely on the contents of the devres list during spi_unregister_controller(), as the list is already torn down at the time we perform devres_find() for devm_spi_release_controller. This causes devices registered with devm_spi_alloc_{master,slave}() to be mistakenly identified as legacy, non-devm managed devices and have their reference counters decremented below 0. ------------[ cut here ]------------ WARNING: CPU: 1 PID: 660 at lib/refcount.c:28 refcount_warn_saturate+0x108/0x174 [] (refcount_warn_saturate) from [] (kobject_put+0x90/0x98) [] (kobject_put) from [] (put_device+0x20/0x24) r4:b6700140 [] (put_device) from [] (devm_spi_release_controller+0x3c/0x40) [] (devm_spi_release_controller) from [] (release_nodes+0x84/0xc4) r5:b6700180 r4:b6700100 [] (release_nodes) from [] (devres_release_all+0x5c/0x60) r8:b1638c54 r7:b117ad94 r6:b1638c10 r5:b117ad94 r4:b163dc10 [] (devres_release_all) from [] (__device_release_driver+0x144/0x1ec) r5:b117ad94 r4:b163dc10 [] (__device_release_driver) from [] (device_driver_detach+0x84/0xa0) r9:00000000 r8:00000000 r7:b117ad94 r6:b163dc54 r5:b1638c10 r4:b163dc10 [] (device_driver_detach) from [] (unbind_store+0xe4/0xf8) Instead, determine the devm allocation state as a flag on the controller which is guaranteed to be stable during cleanup. Fixes: 5e844cc37a5c ("spi: Introduce device-managed SPI controller allocation") Signed-off-by: William A. Kennington III Link: https://lore.kernel.org/r/20210407095527.2771582-1-wak@google.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 592897fa4f03..643139b1eafe 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -510,6 +510,9 @@ struct spi_controller { #define SPI_MASTER_GPIO_SS BIT(5) /* GPIO CS must select slave */ + /* flag indicating this is a non-devres managed controller */ + bool devm_allocated; + /* flag indicating this is an SPI slave controller */ bool slave; -- cgit v1.2.3 From 25de4ce5ed02994aea8bc111d133308f6fd62566 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 23 Mar 2021 09:43:26 +0200 Subject: clocksource/drivers/timer-ti-dm: Handle dra7 timer wrap errata i940 There is a timer wrap issue on dra7 for the ARM architected timer. In a typical clock configuration the timer fails to wrap after 388 days. To work around the issue, we need to use timer-ti-dm percpu timers instead. Let's configure dmtimer3 and 4 as percpu timers by default, and warn about the issue if the dtb is not configured properly. Let's do this as a single patch so it can be backported to v5.8 and later kernels easily. Note that this patch depends on earlier timer-ti-dm systimer posted mode fixes, and a preparatory clockevent patch "clocksource/drivers/timer-ti-dm: Prepare to handle dra7 timer wrap issue". For more information, please see the errata for "AM572x Sitara Processors Silicon Revisions 1.1, 2.0": https://www.ti.com/lit/er/sprz429m/sprz429m.pdf The concept is based on earlier reference patches done by Tero Kristo and Keerthy. Cc: Keerthy Cc: Tero Kristo Signed-off-by: Tony Lindgren Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210323074326.28302-3-tony@atomide.com --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index f14adb882338..cc7c3fda2aa6 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -135,6 +135,7 @@ enum cpuhp_state { CPUHP_AP_RISCV_TIMER_STARTING, CPUHP_AP_CLINT_TIMER_STARTING, CPUHP_AP_CSKY_TIMER_STARTING, + CPUHP_AP_TI_GP_TIMER_STARTING, CPUHP_AP_HYPERV_TIMER_STARTING, CPUHP_AP_KVM_STARTING, CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, -- cgit v1.2.3 From d3c4a43d9291279c28b26757351a6ab72c110753 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 Apr 2021 08:22:55 +0200 Subject: block: refactor blk_drop_partitions Move the busy check and disk-wide sync into the only caller, so that the remainder can be shared with del_gendisk. Also pass the gendisk instead of the bdev as that is all that is needed. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210406062303.811835-5-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index f364619092cc..16178a935c40 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -273,7 +273,7 @@ static inline sector_t get_capacity(struct gendisk *disk) int bdev_disk_changed(struct block_device *bdev, bool invalidate); int blk_add_partitions(struct gendisk *disk, struct block_device *bdev); -int blk_drop_partitions(struct block_device *bdev); +void blk_drop_partitions(struct gendisk *disk); extern struct gendisk *__alloc_disk_node(int minors, int node_id); extern void put_disk(struct gendisk *disk); -- cgit v1.2.3 From 3212135a718b06be38811f2d9a320ae842e76409 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 Apr 2021 08:23:02 +0200 Subject: block: remove disk_part_iter Just open code the xa_for_each in the remaining user. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210406062303.811835-12-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 16178a935c40..7e9660ea967d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -204,25 +204,6 @@ static inline dev_t disk_devt(struct gendisk *disk) void disk_uevent(struct gendisk *disk, enum kobject_action action); -/* - * Smarter partition iterator without context limits. - */ -#define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */ -#define DISK_PITER_INCL_PART0 (1 << 2) /* include partition 0 */ -#define DISK_PITER_INCL_EMPTY_PART0 (1 << 3) /* include empty partition 0 */ - -struct disk_part_iter { - struct gendisk *disk; - struct block_device *part; - unsigned long idx; - unsigned int flags; -}; - -extern void disk_part_iter_init(struct disk_part_iter *piter, - struct gendisk *disk, unsigned int flags); -struct block_device *disk_part_iter_next(struct disk_part_iter *piter); -extern void disk_part_iter_exit(struct disk_part_iter *piter); - /* block/genhd.c */ extern void device_add_disk(struct device *parent, struct gendisk *disk, const struct attribute_group **groups); -- cgit v1.2.3 From 35eb1f5033cf301bda48a22f2edcaed2fda70bfd Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Tue, 30 Mar 2021 14:39:23 +0800 Subject: powercap: RAPL: Fix struct declaration in header file struct rapl_package is declared twice in intel_rapl.h, once at line 80 and once earlier. Code inspection suggests that the first instance should be struct rapl_domain rather than rapl_package, so change it. Signed-off-by: Wan Jiabing [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/intel_rapl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index 50b8398ffd21..93780834fc8f 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -33,7 +33,7 @@ enum rapl_domain_reg_id { RAPL_DOMAIN_REG_MAX, }; -struct rapl_package; +struct rapl_domain; enum rapl_primitives { ENERGY_COUNTER, -- cgit v1.2.3 From 2efa3377230bab432fdeabe5828d19d2d49a96d2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 1 Apr 2021 22:15:37 +0800 Subject: freezer: Remove unused inline function try_to_freeze_nowarn() There is no caller in tree, so can remove it. Signed-off-by: YueHaibing Signed-off-by: Rafael J. Wysocki --- include/linux/freezer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 27828145ca09..0621c5f86c39 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -279,7 +279,6 @@ static inline int freeze_kernel_threads(void) { return -ENOSYS; } static inline void thaw_processes(void) {} static inline void thaw_kernel_threads(void) {} -static inline bool try_to_freeze_nowarn(void) { return false; } static inline bool try_to_freeze(void) { return false; } static inline void freezer_do_not_count(void) {} -- cgit v1.2.3 From 953c1fd96b1a70bcbbfb10973c2126eba8d891c7 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 2 Apr 2021 14:14:22 +0800 Subject: PM: runtime: Replace inline function pm_runtime_callbacks_present() Commit 9a7875461fd0 ("PM: runtime: Replace pm_runtime_callbacks_present()") forgot to change the inline version. Fixes: 9a7875461fd0 ("PM: runtime: Replace pm_runtime_callbacks_present()") Signed-off-by: YueHaibing Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index b492ae00cc90..6c08a085367b 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -265,7 +265,7 @@ static inline void pm_runtime_no_callbacks(struct device *dev) {} static inline void pm_runtime_irq_safe(struct device *dev) {} static inline bool pm_runtime_is_irq_safe(struct device *dev) { return false; } -static inline bool pm_runtime_callbacks_present(struct device *dev) { return false; } +static inline bool pm_runtime_has_no_callbacks(struct device *dev) { return false; } static inline void pm_runtime_mark_last_busy(struct device *dev) {} static inline void __pm_runtime_use_autosuspend(struct device *dev, bool use) {} -- cgit v1.2.3 From 8eb99e9a64a07ea08070591bdc2615526a103e62 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 7 Apr 2021 19:58:20 +0200 Subject: ACPI: utils: Add acpi_reduced_hardware() helper Add a getter for the acpi_gbl_reduced_hardware variable so that modules can check if they are running on an ACPI reduced-hw platform or not. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 3bdcfc4401b7..e2e6db8313c8 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -748,6 +748,11 @@ acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv) return NULL; } +static inline bool acpi_reduced_hardware(void) +{ + return false; +} + static inline void acpi_dev_put(struct acpi_device *adev) {} static inline bool is_acpi_node(const struct fwnode_handle *fwnode) -- cgit v1.2.3 From d08a745729646f407277e904b02991458f20d261 Mon Sep 17 00:00:00 2001 From: Angela Czubak Date: Thu, 8 Apr 2021 12:37:59 +0200 Subject: resource: Prevent irqresource_disabled() from erasing flags Some Chromebooks use hard-coded interrupts in their ACPI tables. This is an excerpt as dumped on Relm: ... Name (_HID, "ELAN0001") // _HID: Hardware ID Name (_DDN, "Elan Touchscreen ") // _DDN: DOS Device Name Name (_UID, 0x05) // _UID: Unique ID Name (ISTP, Zero) Method (_CRS, 0, NotSerialized) // _CRS: Current Resource Settings { Name (BUF0, ResourceTemplate () { I2cSerialBusV2 (0x0010, ControllerInitiated, 0x00061A80, AddressingMode7Bit, "\\_SB.I2C1", 0x00, ResourceConsumer, , Exclusive, ) Interrupt (ResourceConsumer, Edge, ActiveLow, Exclusive, ,, ) { 0x000000B8, } }) Return (BUF0) /* \_SB_.I2C1.ETSA._CRS.BUF0 */ } ... This interrupt is hard-coded to 0xB8 = 184 which is too high to be mapped to IO-APIC, so no triggering information is propagated as acpi_register_gsi() fails and irqresource_disabled() is issued, which leads to erasing triggering and polarity information. Do not overwrite flags as it leads to erasing triggering and polarity information which might be useful in case of hard-coded interrupts. This way the information can be read later on even though mapping to APIC domain failed. Signed-off-by: Angela Czubak [ rjw: Changelog rearrangement ] Signed-off-by: Rafael J. Wysocki --- include/linux/ioport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 55de385c839c..647744d8514e 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -331,7 +331,7 @@ static inline void irqresource_disabled(struct resource *res, u32 irq) { res->start = irq; res->end = irq; - res->flags = IORESOURCE_IRQ | IORESOURCE_DISABLED | IORESOURCE_UNSET; + res->flags |= IORESOURCE_IRQ | IORESOURCE_DISABLED | IORESOURCE_UNSET; } extern struct address_space *iomem_get_mapping(void); -- cgit v1.2.3 From 9885d016ffa9465f91498e7a70c413c30446ad49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Wed, 7 Apr 2021 22:22:49 +0200 Subject: net: phy: marvell10g: add separate structure for 88X3340 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 88X3340 contains 4 cores similar to 88X3310, but there is a difference: it does not support xaui host mode. Instead the corresponding MACTYPE means rxaui / 5gbase-r / 2500base-x / sgmii without AN Signed-off-by: Marek Behún Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/marvell_phy.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index 274abd5fbac3..6b11a5411082 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -22,10 +22,14 @@ #define MARVELL_PHY_ID_88E1545 0x01410ea0 #define MARVELL_PHY_ID_88E1548P 0x01410ec0 #define MARVELL_PHY_ID_88E3016 0x01410e60 -#define MARVELL_PHY_ID_88X3310 0x002b09a0 #define MARVELL_PHY_ID_88E2110 0x002b09b0 #define MARVELL_PHY_ID_88X2222 0x01410f10 +/* PHY IDs and mask for Alaska 10G PHYs */ +#define MARVELL_PHY_ID_88X33X0_MASK 0xfffffff8 +#define MARVELL_PHY_ID_88X3310 0x002b09a0 +#define MARVELL_PHY_ID_88X3340 0x002b09a8 + /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ #define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 -- cgit v1.2.3 From fbe82b3db3e58edca33a7e7d9157eb7bdda6e537 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Thu, 8 Apr 2021 15:00:41 +0800 Subject: net: qed: remove unused including Remove including that don't need it. Signed-off-by: Tian Tao Signed-off-by: Zhiqi Song Signed-off-by: David S. Miller --- include/linux/qed/qed_ll2_if.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index 2f64ed79cee9..ea273ba1c991 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 714638e02d94fa28c9e030d13d03e663fe24925e Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 29 Mar 2021 13:50:36 +0300 Subject: i2c: Add support for software nodes This makes it possible for the drivers to assign complete software fwnodes to the devices instead of only the device properties in those nodes. Signed-off-by: Heikki Krogerus Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 56622658b215..cb1f882a3e88 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -391,7 +391,8 @@ static inline bool i2c_detect_slave_mode(struct device *dev) { return false; } * @platform_data: stored in i2c_client.dev.platform_data * @of_node: pointer to OpenFirmware device node * @fwnode: device node supplied by the platform firmware - * @properties: additional device properties for the device + * @properties: Deprecated - use swnode instead + * @swnode: software node for the device * @resources: resources associated with the device * @num_resources: number of resources in the @resources array * @irq: stored in i2c_client.irq @@ -416,6 +417,7 @@ struct i2c_board_info { struct device_node *of_node; struct fwnode_handle *fwnode; const struct property_entry *properties; + const struct software_node *swnode; const struct resource *resources; unsigned int num_resources; int irq; -- cgit v1.2.3 From cf68fffb66d60d96209446bfc4a15291dc5a5d41 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 8 Apr 2021 11:28:26 -0700 Subject: add support for Clang CFI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds support for Clang’s forward-edge Control Flow Integrity (CFI) checking. With CONFIG_CFI_CLANG, the compiler injects a runtime check before each indirect function call to ensure the target is a valid function with the correct static type. This restricts possible call targets and makes it more difficult for an attacker to exploit bugs that allow the modification of stored function pointers. For more details, see: https://clang.llvm.org/docs/ControlFlowIntegrity.html Clang requires CONFIG_LTO_CLANG to be enabled with CFI to gain visibility to possible call targets. Kernel modules are supported with Clang’s cross-DSO CFI mode, which allows checking between independently compiled components. With CFI enabled, the compiler injects a __cfi_check() function into the kernel and each module for validating local call targets. For cross-module calls that cannot be validated locally, the compiler calls the global __cfi_slowpath_diag() function, which determines the target module and calls the correct __cfi_check() function. This patch includes a slowpath implementation that uses __module_address() to resolve call targets, and with CONFIG_CFI_CLANG_SHADOW enabled, a shadow map that speeds up module look-ups by ~3x. Clang implements indirect call checking using jump tables and offers two methods of generating them. With canonical jump tables, the compiler renames each address-taken function to .cfi and points the original symbol to a jump table entry, which passes __cfi_check() validation. This isn’t compatible with stand-alone assembly code, which the compiler doesn’t instrument, and would result in indirect calls to assembly code to fail. Therefore, we default to using non-canonical jump tables instead, where the compiler generates a local jump table entry .cfi_jt for each address-taken function, and replaces all references to the function with the address of the jump table entry. Note that because non-canonical jump table addresses are local to each component, they break cross-module function address equality. Specifically, the address of a global function will be different in each module, as it's replaced with the address of a local jump table entry. If this address is passed to a different module, it won’t match the address of the same function taken there. This may break code that relies on comparing addresses passed from other components. CFI checking can be disabled in a function with the __nocfi attribute. Additionally, CFI can be disabled for an entire compilation unit by filtering out CC_FLAGS_CFI. By default, CFI failures result in a kernel panic to stop a potential exploit. CONFIG_CFI_PERMISSIVE enables a permissive mode, where the kernel prints out a rate-limited warning instead, and allows execution to continue. This option is helpful for locating type mismatches, but should only be enabled during development. Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Tested-by: Nathan Chancellor Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210408182843.1754385-2-samitolvanen@google.com --- include/linux/cfi.h | 41 +++++++++++++++++++++++++++++++++++++++++ include/linux/compiler-clang.h | 2 ++ include/linux/compiler_types.h | 4 ++++ include/linux/init.h | 2 +- include/linux/module.h | 13 +++++++++++-- 5 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 include/linux/cfi.h (limited to 'include/linux') diff --git a/include/linux/cfi.h b/include/linux/cfi.h new file mode 100644 index 000000000000..879744aaa6e0 --- /dev/null +++ b/include/linux/cfi.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Clang Control Flow Integrity (CFI) support. + * + * Copyright (C) 2021 Google LLC + */ +#ifndef _LINUX_CFI_H +#define _LINUX_CFI_H + +#ifdef CONFIG_CFI_CLANG +typedef void (*cfi_check_fn)(uint64_t id, void *ptr, void *diag); + +/* Compiler-generated function in each module, and the kernel */ +extern void __cfi_check(uint64_t id, void *ptr, void *diag); + +/* + * Force the compiler to generate a CFI jump table entry for a function + * and store the jump table address to __cfi_jt_. + */ +#define __CFI_ADDRESSABLE(fn, __attr) \ + const void *__cfi_jt_ ## fn __visible __attr = (void *)&fn + +#ifdef CONFIG_CFI_CLANG_SHADOW + +extern void cfi_module_add(struct module *mod, unsigned long base_addr); +extern void cfi_module_remove(struct module *mod, unsigned long base_addr); + +#else + +static inline void cfi_module_add(struct module *mod, unsigned long base_addr) {} +static inline void cfi_module_remove(struct module *mod, unsigned long base_addr) {} + +#endif /* CONFIG_CFI_CLANG_SHADOW */ + +#else /* !CONFIG_CFI_CLANG */ + +#define __CFI_ADDRESSABLE(fn, __attr) + +#endif /* CONFIG_CFI_CLANG */ + +#endif /* _LINUX_CFI_H */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index d217c382b02d..6de9d0c9377e 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -61,3 +61,5 @@ #if __has_feature(shadow_call_stack) # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif + +#define __nocfi __attribute__((__no_sanitize__("cfi"))) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index e5dd5a4ae946..796935a37e37 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -242,6 +242,10 @@ struct ftrace_likely_data { # define __noscs #endif +#ifndef __nocfi +# define __nocfi +#endif + #ifndef asm_volatile_goto #define asm_volatile_goto(x...) asm goto(x) #endif diff --git a/include/linux/init.h b/include/linux/init.h index 31f54de58429..b3ea15348fbd 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -47,7 +47,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(".init.text") __cold __latent_entropy __noinitretpoline +#define __init __section(".init.text") __cold __latent_entropy __noinitretpoline __nocfi #define __initdata __section(".init.data") #define __initconst __section(".init.rodata") #define __exitdata __section(".exit.data") diff --git a/include/linux/module.h b/include/linux/module.h index da4b6fbe8ebe..8100bb477d86 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -128,13 +129,17 @@ extern void cleanup_module(void); #define module_init(initfn) \ static inline initcall_t __maybe_unused __inittest(void) \ { return initfn; } \ - int init_module(void) __copy(initfn) __attribute__((alias(#initfn))); + int init_module(void) __copy(initfn) \ + __attribute__((alias(#initfn))); \ + __CFI_ADDRESSABLE(init_module, __initdata); /* This is only required if you want to be unloadable. */ #define module_exit(exitfn) \ static inline exitcall_t __maybe_unused __exittest(void) \ { return exitfn; } \ - void cleanup_module(void) __copy(exitfn) __attribute__((alias(#exitfn))); + void cleanup_module(void) __copy(exitfn) \ + __attribute__((alias(#exitfn))); \ + __CFI_ADDRESSABLE(cleanup_module, __exitdata); #endif @@ -376,6 +381,10 @@ struct module { const s32 *crcs; unsigned int num_syms; +#ifdef CONFIG_CFI_CLANG + cfi_check_fn cfi_check; +#endif + /* Kernel parameters. */ #ifdef CONFIG_SYSFS struct mutex param_lock; -- cgit v1.2.3 From ff301ceb5299551c3650d0e07ba879b766da4cc0 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 8 Apr 2021 11:28:27 -0700 Subject: cfi: add __cficanonical With CONFIG_CFI_CLANG, the compiler replaces a function address taken in C code with the address of a local jump table entry, which passes runtime indirect call checks. However, the compiler won't replace addresses taken in assembly code, which will result in a CFI failure if we later jump to such an address in instrumented C code. The code generated for the non-canonical jump table looks this: : /* In C, &noncanonical points here */ jmp noncanonical ... : /* function body */ ... This change adds the __cficanonical attribute, which tells the compiler to use a canonical jump table for the function instead. This means the compiler will rename the actual function to .cfi and points the original symbol to the jump table entry instead: : /* jump table entry */ jmp canonical.cfi ... : /* function body */ ... As a result, the address taken in assembly, or other non-instrumented code always points to the jump table and therefore, can be used for indirect calls in instrumented code without tripping CFI checks. Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Acked-by: Bjorn Helgaas # pci.h Tested-by: Nathan Chancellor Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210408182843.1754385-3-samitolvanen@google.com --- include/linux/compiler-clang.h | 1 + include/linux/compiler_types.h | 4 ++++ include/linux/init.h | 4 ++-- include/linux/pci.h | 4 ++-- 4 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 6de9d0c9377e..adbe76b203e2 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -63,3 +63,4 @@ #endif #define __nocfi __attribute__((__no_sanitize__("cfi"))) +#define __cficanonical __attribute__((__cfi_canonical_jump_table__)) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 796935a37e37..d29bda7f6ebd 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -246,6 +246,10 @@ struct ftrace_likely_data { # define __nocfi #endif +#ifndef __cficanonical +# define __cficanonical +#endif + #ifndef asm_volatile_goto #define asm_volatile_goto(x...) asm goto(x) #endif diff --git a/include/linux/init.h b/include/linux/init.h index b3ea15348fbd..045ad1650ed1 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -220,8 +220,8 @@ extern bool initcall_debug; __initcall_name(initstub, __iid, id) #define __define_initcall_stub(__stub, fn) \ - int __init __stub(void); \ - int __init __stub(void) \ + int __init __cficanonical __stub(void); \ + int __init __cficanonical __stub(void) \ { \ return fn(); \ } \ diff --git a/include/linux/pci.h b/include/linux/pci.h index 86c799c97b77..39684b72db91 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1944,8 +1944,8 @@ enum pci_fixup_pass { #ifdef CONFIG_LTO_CLANG #define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ class_shift, hook, stub) \ - void stub(struct pci_dev *dev); \ - void stub(struct pci_dev *dev) \ + void __cficanonical stub(struct pci_dev *dev); \ + void __cficanonical stub(struct pci_dev *dev) \ { \ hook(dev); \ } \ -- cgit v1.2.3 From 5caf968262df0ec7a3377fb67d4a6bfa979cb028 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 8 Apr 2021 11:28:28 -0700 Subject: mm: add generic function_nocfi macro With CONFIG_CFI_CLANG, the compiler replaces function addresses in instrumented C code with jump table addresses. This means that __pa_symbol(function) returns the physical address of the jump table entry instead of the actual function, which may not work as the jump table code will immediately jump to a virtual address that may not be mapped. To avoid this address space confusion, this change adds a generic definition for function_nocfi(), which architectures that support CFI can override. The typical implementation of would use inline assembly to take the function address, which avoids compiler instrumentation. Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Acked-by: Mark Rutland Tested-by: Nathan Chancellor Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210408182843.1754385-4-samitolvanen@google.com --- include/linux/mm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8ba434287387..22cce9c7dd05 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -124,6 +124,16 @@ extern int mmap_rnd_compat_bits __read_mostly; #define lm_alias(x) __va(__pa_symbol(x)) #endif +/* + * With CONFIG_CFI_CLANG, the compiler replaces function addresses in + * instrumented C code with jump table addresses. Architectures that + * support CFI can define this macro to return the actual function address + * when needed. + */ +#ifndef function_nocfi +#define function_nocfi(x) (x) +#endif + /* * To prevent common memory management code establishing * a zero page mapping on a read fault. -- cgit v1.2.3 From 9f5b4009980f369acb80b72235b2d66c3fd6eca6 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 8 Apr 2021 11:28:33 -0700 Subject: bpf: disable CFI in dispatcher functions BPF dispatcher functions are patched at runtime to perform direct instead of indirect calls. Disable CFI for the dispatcher functions to avoid conflicts. Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Tested-by: Nathan Chancellor Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210408182843.1754385-9-samitolvanen@google.com --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3625f019767d..2f46f98479e1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -650,7 +650,7 @@ struct bpf_dispatcher { struct bpf_ksym ksym; }; -static __always_inline unsigned int bpf_dispatcher_nop_func( +static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( const void *ctx, const struct bpf_insn *insnsi, unsigned int (*bpf_func)(const void *, @@ -678,7 +678,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr); } #define DEFINE_BPF_DISPATCHER(name) \ - noinline unsigned int bpf_dispatcher_##name##_func( \ + noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ unsigned int (*bpf_func)(const void *, \ -- cgit v1.2.3 From 4f0f586bf0c898233d8f316f471a21db2abd522d Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 8 Apr 2021 11:28:34 -0700 Subject: treewide: Change list_sort to use const pointers list_sort() internally casts the comparison function passed to it to a different type with constant struct list_head pointers, and uses this pointer to call the functions, which trips indirect call Control-Flow Integrity (CFI) checking. Instead of removing the consts, this change defines the list_cmp_func_t type and changes the comparison function types of all list_sort() callers to use const pointers, thus avoiding type mismatches. Suggested-by: Nick Desaulniers Signed-off-by: Sami Tolvanen Reviewed-by: Nick Desaulniers Reviewed-by: Christoph Hellwig Reviewed-by: Kees Cook Tested-by: Nick Desaulniers Tested-by: Nathan Chancellor Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210408182843.1754385-10-samitolvanen@google.com --- include/linux/list_sort.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list_sort.h b/include/linux/list_sort.h index 20f178c24e9d..453105f74e05 100644 --- a/include/linux/list_sort.h +++ b/include/linux/list_sort.h @@ -6,8 +6,9 @@ struct list_head; +typedef int __attribute__((nonnull(2,3))) (*list_cmp_func_t)(void *, + const struct list_head *, const struct list_head *); + __attribute__((nonnull(2,3))) -void list_sort(void *priv, struct list_head *head, - int (*cmp)(void *priv, struct list_head *a, - struct list_head *b)); +void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp); #endif -- cgit v1.2.3 From 2361db89aaadfb671db6911b0063e01ec8922c28 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 9 Mar 2021 17:43:38 -0800 Subject: libnvdimm: Notify disk drivers to revalidate region read-only Previous kernels allowed the BLKROSET to override the disk's read-only status. With that situation fixed the pmem driver needs to rely on notification events to reevaluate the disk read-only status after the host region has been marked read-write. Recall that when libnvdimm determines that the persistent memory has lost persistence (for example lack of energy to flush from DRAM to FLASH on an NVDIMM-N device) it marks the region read-only, but that state can be overridden by the user via: echo 0 > /sys/bus/nd/devices/regionX/read_only ...to date there is no notification that the region has restored persistence, so the user override is the only recovery. Fixes: 52f019d43c22 ("block: add a hard-readonly flag to struct gendisk") Reported-by: kernel test robot Reported-by: Vishal Verma Tested-by: Vishal Verma Reviewed-by: Christoph Hellwig Cc: Christoph Hellwig Cc: Ming Lei Cc: Martin K. Petersen Cc: Hannes Reinecke Cc: Jens Axboe Link: https://lore.kernel.org/r/161534060720.528671.2341213328968989192.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- include/linux/nd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nd.h b/include/linux/nd.h index cec526c8043d..ee9ad76afbba 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -11,6 +11,7 @@ enum nvdimm_event { NVDIMM_REVALIDATE_POISON, + NVDIMM_REVALIDATE_REGION, }; enum nvdimm_claim_class { -- cgit v1.2.3 From 9432bbd969c667fc9c4b1c140c5a745ff2a7b540 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Mar 2021 16:49:03 +0100 Subject: static_call: Relax static_call_update() function argument type static_call_update() had stronger type requirements than regular C, relax them to match. Instead of requiring the @func argument has the exact matching type, allow any type which C is willing to promote to the right (function) pointer type. Specifically this allows (void *) arguments. This cleans up a bunch of static_call_update() callers for PREEMPT_DYNAMIC and should get around silly GCC11 warnings for free. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/YFoN7nCl8OfGtpeh@hirez.programming.kicks-ass.net --- include/linux/static_call.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 85ecc789f4ff..8d50f62420ca 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -113,9 +113,9 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool #define static_call_update(name, func) \ ({ \ - BUILD_BUG_ON(!__same_type(*(func), STATIC_CALL_TRAMP(name))); \ + typeof(&STATIC_CALL_TRAMP(name)) __F = (func); \ __static_call_update(&STATIC_CALL_KEY(name), \ - STATIC_CALL_TRAMP_ADDR(name), func); \ + STATIC_CALL_TRAMP_ADDR(name), __F); \ }) #ifdef CONFIG_HAVE_STATIC_CALL_INLINE -- cgit v1.2.3 From 1a7a6e8072ea0e4582de2da63a9088841fde798e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 6 Apr 2021 09:30:36 +0200 Subject: pwm: Clarify which state pwm_get_state() returns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Given that lowlevel drivers usually cannot implement exactly what a consumer requests with pwm_apply_state() there is some rounding involved. pwm_get_state() returns the setting that was requested most recently by the consumer (opposed to what was actually implemented in hardware in reply to the last request). Clarify this in the function kerneldoc. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/pwm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 8f4eefd129aa..5bb90af4997e 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -91,6 +91,11 @@ struct pwm_device { * pwm_get_state() - retrieve the current PWM state * @pwm: PWM device * @state: state to fill with the current PWM state + * + * The returned PWM state represents the state that was applied by a previous call to + * pwm_apply_state(). Drivers may have to slightly tweak that state before programming it to + * hardware. If pwm_apply_state() was never called, this returns either the current hardware + * state (if supported) or the default settings. */ static inline void pwm_get_state(const struct pwm_device *pwm, struct pwm_state *state) -- cgit v1.2.3 From a503d1628c9c341dc5e1a26272e38182dca3e823 Mon Sep 17 00:00:00 2001 From: Jarvis Jiang Date: Thu, 8 Apr 2021 03:02:20 -0700 Subject: bus: mhi: fix typo in comments for struct mhi_channel_config The word 'rung' is a typo in below comment, fix it. * @event_ring: The event rung index that services this channel Signed-off-by: Jarvis Jiang Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20210408100220.3853-1-jarvis.w.jiang@gmail.com Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index d095fba37d1e..944aa3aa3035 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -205,7 +205,7 @@ enum mhi_db_brst_mode { * @num: The number assigned to this channel * @num_elements: The number of elements that can be queued to this channel * @local_elements: The local ring length of the channel - * @event_ring: The event rung index that services this channel + * @event_ring: The event ring index that services this channel * @dir: Direction that data may flow on this channel * @type: Channel type * @ee_mask: Execution Environment mask for this channel -- cgit v1.2.3 From ae196ddb0d3186bc08e529b8ea4bf62161ddfce2 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 7 Apr 2021 09:55:52 +0300 Subject: usb: typec: Port mapping utility Adding functions that can be used to link/unlink ports - USB ports, TBT3/USB4 ports, DisplayPorts and so on - to the USB Type-C connectors they are attached to inside a system. The symlink that is created for the port device is named "connector". Initially only ACPI is supported. ACPI port object shares the _PLD (Physical Location of Device) with the USB Type-C connector that it's attached to. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20210407065555.88110-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 91b4303ca305..e2714722b0c9 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -298,4 +298,17 @@ int typec_find_port_data_role(const char *name); void typec_partner_set_svdm_version(struct typec_partner *partner, enum usb_pd_svdm_ver svdm_version); int typec_get_negotiated_svdm_version(struct typec_port *port); + +#if IS_REACHABLE(CONFIG_TYPEC) +int typec_link_port(struct device *port); +void typec_unlink_port(struct device *port); +#else +static inline int typec_link_port(struct device *port) +{ + return 0; +} + +static inline void typec_unlink_port(struct device *port) { } +#endif + #endif /* __LINUX_USB_TYPEC_H */ -- cgit v1.2.3 From b433c4c789d612cf58739a772bbddbd949bafd20 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 7 Apr 2021 09:55:54 +0300 Subject: usb: Iterator for ports Introducing usb_for_each_port(). It works the same way as usb_for_each_dev(), but instead of going through every USB device in the system, it walks through the USB ports in the system. Acked-by: Alan Stern Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20210407065555.88110-4-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index ddd2f5b2a282..eaae24217e8a 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -882,6 +882,15 @@ extern struct usb_host_interface *usb_find_alt_setting( unsigned int iface_num, unsigned int alt_num); +#if IS_REACHABLE(CONFIG_USB) +int usb_for_each_port(void *data, int (*fn)(struct device *, void *)); +#else +static inline int usb_for_each_port(void *data, int (*fn)(struct device *, void *)) +{ + return 0; +} +#endif + /* port claiming functions */ int usb_hub_claim_port(struct usb_device *hdev, unsigned port1, struct usb_dev_state *owner); -- cgit v1.2.3 From 7b458a4c5d7302947556e12c83cfe4da769665d0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 9 Apr 2021 15:40:31 +0200 Subject: usb: typec: Add typec_port_register_altmodes() This can be used by Type-C controller drivers which use a standard usb-connector fwnode, with altmodes sub-node, to describe the available altmodes. Note there are is no devicetree bindings documentation for the altmodes node, this is deliberate. ATM the fwnodes used to register the altmodes are only used internally to pass platform info from a drivers/platform/x86 driver to the type-c subsystem. When a devicetree user of this functionally comes up and the dt-bindings have been hashed out the internal use can be adjusted to match the dt-bindings. Currently the typec_port_register_altmodes() function expects an "altmodes" child fwnode on port->dev with this "altmodes" fwnode having child fwnodes itself with each child containing 2 integer properties: 1. A "svid" property, which sets the id of the altmode, e.g. displayport altmode has a svid of 0xff01. 2. A "vdo" property, typically used as a bitmask describing the capabilities of the altmode, the bits in the vdo are specified in the specification of the altmode. Reviewed-by: Heikki Krogerus Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210409134033.105834-2-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index e2714722b0c9..e2e44bb1dad8 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -17,6 +17,7 @@ struct typec_partner; struct typec_cable; struct typec_plug; struct typec_port; +struct typec_altmode_ops; struct fwnode_handle; struct device; @@ -138,6 +139,11 @@ struct typec_altmode struct typec_altmode *typec_port_register_altmode(struct typec_port *port, const struct typec_altmode_desc *desc); + +void typec_port_register_altmodes(struct typec_port *port, + const struct typec_altmode_ops *ops, void *drvdata, + struct typec_altmode **altmodes, size_t n); + void typec_unregister_altmode(struct typec_altmode *altmode); struct typec_port *typec_altmode2port(struct typec_altmode *alt); -- cgit v1.2.3 From b37c38484375f5a204500e0b10b40da268090995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Fri, 9 Apr 2021 13:27:04 -0700 Subject: treewide: change my e-mail address, fix my name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change my e-mail address to kabel@kernel.org, and fix my name in non-code parts (add diacritical mark). Link: https://lkml.kernel.org/r/20210325171123.28093-2-kabel@kernel.org Signed-off-by: Marek Behún Cc: Bartosz Golaszewski Cc: Greg Kroah-Hartman Cc: Jassi Brar Cc: Linus Walleij Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/armada-37xx-rwtm-mailbox.h | 2 +- include/linux/moxtet.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/armada-37xx-rwtm-mailbox.h b/include/linux/armada-37xx-rwtm-mailbox.h index 57bb54f6767a..ef4bd705eb65 100644 --- a/include/linux/armada-37xx-rwtm-mailbox.h +++ b/include/linux/armada-37xx-rwtm-mailbox.h @@ -2,7 +2,7 @@ /* * rWTM BIU Mailbox driver for Armada 37xx * - * Author: Marek Behun + * Author: Marek Behún */ #ifndef _LINUX_ARMADA_37XX_RWTM_MAILBOX_H_ diff --git a/include/linux/moxtet.h b/include/linux/moxtet.h index 490db6886dcc..79184948fab4 100644 --- a/include/linux/moxtet.h +++ b/include/linux/moxtet.h @@ -2,7 +2,7 @@ /* * Turris Mox module configuration bus driver * - * Copyright (C) 2019 Marek Behun + * Copyright (C) 2019 Marek Behún */ #ifndef __LINUX_MOXTET_H -- cgit v1.2.3 From fba863b816049b03f3fbb07b10ebdcfe5c4141f7 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 7 Apr 2021 17:51:56 +0200 Subject: net: phy: make PHY PM ops a no-op if MAC driver manages PHY PM Resume callback of the PHY driver is called after the one for the MAC driver. The PHY driver resume callback calls phy_init_hw(), and this is potentially problematic if the MAC driver calls phy_start() in its resume callback. One issue was reported with the fec driver and a KSZ8081 PHY which seems to become unstable if a soft reset is triggered during aneg. The new flag allows MAC drivers to indicate that they take care of suspending/resuming the PHY. Then the MAC PM callbacks can handle any dependency between MAC and PHY PM. Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 8e2cf84b2318..98fb441dd72e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -493,6 +493,7 @@ struct macsec_ops; * @loopback_enabled: Set true if this PHY has been loopbacked successfully. * @downshifted_rate: Set true if link speed has been downshifted. * @is_on_sfp_module: Set true if PHY is located on an SFP module. + * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY * @state: State of the PHY for management purposes * @dev_flags: Device-specific flags used by the PHY driver. * @irq: IRQ number of the PHY's interrupt (-1 if none) @@ -567,6 +568,7 @@ struct phy_device { unsigned loopback_enabled:1; unsigned downshifted_rate:1; unsigned is_on_sfp_module:1; + unsigned mac_managed_pm:1; unsigned autoneg:1; /* The most recently read link state */ -- cgit v1.2.3 From 1b8b20868a6d64cfe8174a21b25b74367bdf0560 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 7 Apr 2021 11:52:02 +0200 Subject: tty: fix return value for unsupported ioctls Drivers should return -ENOTTY ("Inappropriate I/O control operation") when an ioctl isn't supported, while -EINVAL is used for invalid arguments. Fix up the TIOCMGET, TIOCMSET and TIOCGICOUNT helpers which returned -EINVAL when a tty driver did not implement the corresponding operations. Note that the TIOCMGET and TIOCMSET helpers predate git and do not get a corresponding Fixes tag below. Fixes: d281da7ff6f7 ("tty: Make tiocgicount a handler") Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210407095208.31838-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 61c3372d3f32..2f719b471d52 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -228,7 +228,7 @@ * * Called when the device receives a TIOCGICOUNT ioctl. Passed a kernel * structure to complete. This method is optional and will only be called - * if provided (otherwise EINVAL will be returned). + * if provided (otherwise ENOTTY will be returned). */ #include -- cgit v1.2.3 From 86b20677e8b657c8c9701edd907f356795e9e427 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 9 Apr 2021 09:35:12 +0200 Subject: tty: clarify that not all ttys have a class device Commit 30004ac9c090 ("tty: add tty_struct->dev pointer to corresponding device instance") added a struct device pointer field to struct tty_struct which was populated with the corresponding tty class device during initialisation. Unfortunately, not all ttys have a class device (e.g. pseudoterminals and serdev) in which case the device pointer will be set to NULL, something which have bit driver authors over the years. In retrospect perhaps this field should never have been added, but let's at least document the current behaviour. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210409073512.6876-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 51f56e5ec955..a228c0ee484f 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -284,7 +284,7 @@ struct tty_operations; struct tty_struct { int magic; struct kref kref; - struct device *dev; + struct device *dev; /* class device or NULL (e.g. ptys, serdev) */ struct tty_driver *driver; const struct tty_operations *ops; int index; -- cgit v1.2.3 From 7ee3c61dcd28bf6e290e06ad382f13511dc790e9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Apr 2021 21:43:39 +0200 Subject: netfilter: bridge: add pre_exit hooks for ebtable unregistration Just like ip/ip6/arptables, the hooks have to be removed, then synchronize_rcu() has to be called to make sure no more packets are being processed before the ruleset data is released. Place the hook unregistration in the pre_exit hook, then call the new ebtables pre_exit function from there. Years ago, when first netns support got added for netfilter+ebtables, this used an older (now removed) netfilter hook unregister API, that did a unconditional synchronize_rcu(). Now that all is done with call_rcu, ebtable_{filter,nat,broute} pernet exit handlers may free the ebtable ruleset while packets are still in flight. This can only happens on module removal, not during netns exit. The new function expects the table name, not the table struct. This is because upcoming patch set (targeting -next) will remove all net->xt.{nat,filter,broute}_table instances, this makes it necessary to avoid external references to those member variables. The existing APIs will be converted, so follow the upcoming scheme of passing name + hook type instead. Fixes: aee12a0a3727e ("ebtables: remove nf_hook_register usage") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 2f5c4e6ecd8a..3a956145a25c 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -110,8 +110,9 @@ extern int ebt_register_table(struct net *net, const struct ebt_table *table, const struct nf_hook_ops *ops, struct ebt_table **res); -extern void ebt_unregister_table(struct net *net, struct ebt_table *table, - const struct nf_hook_ops *); +extern void ebt_unregister_table(struct net *net, struct ebt_table *table); +void ebt_unregister_table_pre_exit(struct net *net, const char *tablename, + const struct nf_hook_ops *ops); extern unsigned int ebt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct ebt_table *table); -- cgit v1.2.3 From d163a925ebbc6eb5b562b0f1d72c7e817aa75c40 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Apr 2021 21:43:40 +0200 Subject: netfilter: arp_tables: add pre_exit hook for table unregister Same problem that also existed in iptables/ip(6)tables, when arptable_filter is removed there is no longer a wait period before the table/ruleset is free'd. Unregister the hook in pre_exit, then remove the table in the exit function. This used to work correctly because the old nf_hook_unregister API did unconditional synchronize_net. The per-net hook unregister function uses call_rcu instead. Fixes: b9e69e127397 ("netfilter: xtables: don't hook tables by default") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_arp/arp_tables.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 7d3537c40ec9..26a13294318c 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -52,8 +52,9 @@ extern void *arpt_alloc_initial_table(const struct xt_table *); int arpt_register_table(struct net *net, const struct xt_table *table, const struct arpt_replace *repl, const struct nf_hook_ops *ops, struct xt_table **res); -void arpt_unregister_table(struct net *net, struct xt_table *table, - const struct nf_hook_ops *ops); +void arpt_unregister_table(struct net *net, struct xt_table *table); +void arpt_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops); extern unsigned int arpt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table); -- cgit v1.2.3 From 4b2b4cc50ba6d607d1611ea6b2046a58d16e45eb Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 29 Mar 2021 13:50:47 +0300 Subject: i2c: Remove support for dangling device properties From now on only accepting complete software nodes. Signed-off-by: Heikki Krogerus Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index cb1f882a3e88..54b3ccc71e37 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -391,7 +391,6 @@ static inline bool i2c_detect_slave_mode(struct device *dev) { return false; } * @platform_data: stored in i2c_client.dev.platform_data * @of_node: pointer to OpenFirmware device node * @fwnode: device node supplied by the platform firmware - * @properties: Deprecated - use swnode instead * @swnode: software node for the device * @resources: resources associated with the device * @num_resources: number of resources in the @resources array @@ -416,7 +415,6 @@ struct i2c_board_info { void *platform_data; struct device_node *of_node; struct fwnode_handle *fwnode; - const struct property_entry *properties; const struct software_node *swnode; const struct resource *resources; unsigned int num_resources; -- cgit v1.2.3 From 07740c92ae57ca21204f1e0c6f59272cdf3190cc Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 8 Apr 2021 19:17:17 +0800 Subject: i2c: core: add managed function for adding i2c adapters Some I2C controller drivers will only unregister the I2C adapter in their .remove() callback, which can be done by simply using a managed variant to add the I2C adapter. So add the managed functions for adding the I2C adapter. Reviewed-by: Andy Shevchenko Reviewed-by: Dmitry Osipenko Signed-off-by: Yicong Yang Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 59b892c5bb05..ce473ddc0378 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -846,6 +846,7 @@ static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap) */ #if IS_ENABLED(CONFIG_I2C) int i2c_add_adapter(struct i2c_adapter *adap); +int devm_i2c_add_adapter(struct device *dev, struct i2c_adapter *adapter); void i2c_del_adapter(struct i2c_adapter *adap); int i2c_add_numbered_adapter(struct i2c_adapter *adap); -- cgit v1.2.3 From 3b4c747cd32078172dd238929e38a43cfed83580 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 8 Apr 2021 19:17:18 +0800 Subject: i2c: core: add api to provide frequency mode strings Some I2C drivers like Designware and HiSilicon will print the bus frequency mode information, so add a public one that everyone can make use of. Tested-by: Jarkko Nikula Reviewed-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Yicong Yang Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index ce473ddc0378..e5d8b9dad6bf 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -51,6 +51,9 @@ struct module; struct property_entry; #if IS_ENABLED(CONFIG_I2C) +/* Return the Frequency mode string based on the bus frequency */ +const char *i2c_freq_mode_string(u32 bus_freq_hz); + /* * The master routines are the ones normally used to transmit data to devices * on a bus (or read from them). Apart from two basic transfer functions to -- cgit v1.2.3 From 5b5475826c5265cead7ce4ca6d34ec0c566c70aa Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 21 Mar 2021 18:38:32 -0700 Subject: i2c: ensure timely release of driver-allocated resources More and more drivers rely on devres to manage their resources, however if bus' probe() and release() methods are not trivial and control some of resources as well (for example enable or disable clocks, or attach device to a power domain), we need to make sure that driver-allocated resources are released immediately after driver's remove() method returns, and not postponed until driver core gets around to releasing resources. To fix that we open a new devres group before calling driver's probe() and explicitly release it when we return from driver's remove(). Tested-by: Jeff LaBundy Signed-off-by: Dmitry Torokhov Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index e5d8b9dad6bf..e8f2ac8c9c3d 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -309,6 +309,8 @@ struct i2c_driver { * userspace_devices list * @slave_cb: Callback when I2C slave mode of an adapter is used. The adapter * calls it to pass on slave events to the slave driver. + * @devres_group_id: id of the devres group that will be created for resources + * acquired when probing this device. * * An i2c_client identifies a single device (i.e. chip) connected to an * i2c bus. The behaviour exposed to Linux is defined by the driver @@ -337,6 +339,7 @@ struct i2c_client { #if IS_ENABLED(CONFIG_I2C_SLAVE) i2c_slave_cb_t slave_cb; /* callback for slave mode */ #endif + void *devres_group_id; /* ID of probe devres group */ }; #define to_i2c_client(d) container_of(d, struct i2c_client, dev) -- cgit v1.2.3 From 8f7b5054755e48cc7b217a41e3f1891e01338d2f Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Mar 2021 13:20:14 +0000 Subject: kasan: Add report for async mode KASAN provides an asynchronous mode of execution. Add reporting functionality for this mode. Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Reviewed-by: Andrey Konovalov Acked-by: Catalin Marinas Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/20210315132019.33202-5-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- include/linux/kasan.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b91732bd05d7..e7c20c79b342 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -376,6 +376,12 @@ static inline void *kasan_reset_tag(const void *addr) #endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS*/ +#ifdef CONFIG_KASAN_HW_TAGS + +void kasan_report_async(void); + +#endif /* CONFIG_KASAN_HW_TAGS */ + #ifdef CONFIG_KASAN_SW_TAGS void __init kasan_init_sw_tags(void); #else -- cgit v1.2.3 From c781ff12a2f37a9795e13bf328e5053d3e69f9e0 Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Fri, 9 Apr 2021 11:06:34 +0300 Subject: ethtool: Allow network drivers to dump arbitrary EEPROM data Define get_module_eeprom_by_page() ethtool callback and implement netlink infrastructure. get_module_eeprom_by_page() allows network drivers to dump a part of module's EEPROM specified by page and bank numbers along with offset and length. It is effectively a netlink replacement for get_module_info() and get_module_eeprom() pair, which is needed due to emergence of complex non-linear EEPROM layouts. Signed-off-by: Vladyslav Tarasiuk Signed-off-by: David S. Miller --- include/linux/ethtool.h | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 4290e2fa3117..9f6f323af59a 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -81,6 +81,7 @@ enum { #define ETH_RSS_HASH_NO_CHANGE 0 struct net_device; +struct netlink_ext_ack; /* Some generic methods drivers may use in their ethtool_ops */ u32 ethtool_op_get_link(struct net_device *dev); @@ -262,6 +263,31 @@ struct ethtool_pause_stats { u64 rx_pause_frames; }; +#define ETH_MODULE_EEPROM_PAGE_LEN 128 +#define ETH_MODULE_MAX_I2C_ADDRESS 0x7f + +/** + * struct ethtool_module_eeprom - EEPROM dump from specified page + * @offset: Offset within the specified EEPROM page to begin read, in bytes. + * @length: Number of bytes to read. + * @page: Page number to read from. + * @bank: Page bank number to read from, if applicable by EEPROM spec. + * @i2c_address: I2C address of a page. Value less than 0x7f expected. Most + * EEPROMs use 0x50 or 0x51. + * @data: Pointer to buffer with EEPROM data of @length size. + * + * This can be used to manage pages during EEPROM dump in ethtool and pass + * required information to the driver. + */ +struct ethtool_module_eeprom { + __u32 offset; + __u32 length; + __u8 page; + __u8 bank; + __u8 i2c_address; + __u8 *data; +}; + /** * struct ethtool_ops - optional netdev operations * @cap_link_lanes_supported: indicates if the driver supports lanes @@ -414,6 +440,9 @@ struct ethtool_pause_stats { * cannot use the standard PHY library helpers. * @get_phy_tunable: Read the value of a PHY tunable. * @set_phy_tunable: Set the value of a PHY tunable. + * @get_module_eeprom_by_page: Get a region of plug-in module EEPROM data from + * specified page. Returns a negative error code or the amount of bytes + * read. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must @@ -519,6 +548,9 @@ struct ethtool_ops { const struct ethtool_tunable *, void *); int (*set_phy_tunable)(struct net_device *, const struct ethtool_tunable *, const void *); + int (*get_module_eeprom_by_page)(struct net_device *dev, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack); }; int ethtool_check_ops(const struct ethtool_ops *ops); @@ -542,7 +574,6 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd, u32 *dev_speed, u8 *dev_duplex); -struct netlink_ext_ack; struct phy_device; struct phy_tdr_config; -- cgit v1.2.3 From e19b0a3474ab9ef90dd110af9f39fc87329755f1 Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Fri, 9 Apr 2021 11:06:35 +0300 Subject: net/mlx5: Refactor module EEPROM query Prepare for ethtool_ops::get_module_eeprom_data() implementation by extracting common part of mlx5_query_module_eeprom() into a separate function. Signed-off-by: Vladyslav Tarasiuk Signed-off-by: David S. Miller --- include/linux/mlx5/port.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 23edd2db4803..90b87aa82db3 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -62,6 +62,15 @@ enum mlx5_an_status { #define MLX5_EEPROM_PAGE_LENGTH 256 #define MLX5_EEPROM_HIGH_PAGE_LENGTH 128 +struct mlx5_module_eeprom_query_params { + u16 size; + u16 offset; + u16 i2c_address; + u32 page; + u32 bank; + u32 module_number; +}; + enum mlx5e_link_mode { MLX5E_1000BASE_CX_SGMII = 0, MLX5E_1000BASE_KX = 1, -- cgit v1.2.3 From e109d2b204daa223e6d3cdaa369071c3ea96dcbf Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Fri, 9 Apr 2021 11:06:36 +0300 Subject: net/mlx5: Implement get_module_eeprom_by_page() Implement ethtool_ops::get_module_eeprom_by_page() to enable support of new SFP standards. Signed-off-by: Vladyslav Tarasiuk Signed-off-by: David S. Miller --- include/linux/mlx5/port.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 90b87aa82db3..58d56adb9842 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -209,6 +209,8 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, bool *enabled); int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, u16 offset, u16 size, u8 *data); +int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, + struct mlx5_module_eeprom_query_params *params, u8 *data); int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out); int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in); -- cgit v1.2.3 From 4c88fa412a100f925b8ab1aa952a672895f69d35 Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Fri, 9 Apr 2021 11:06:37 +0300 Subject: net/mlx5: Add support for DSFP module EEPROM dumps Allow the driver to recognise DSFP transceiver module ID and therefore allow its EEPROM dumps using ethtool. Signed-off-by: Vladyslav Tarasiuk Signed-off-by: David S. Miller --- include/linux/mlx5/port.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 58d56adb9842..77ea4f9c5265 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -45,6 +45,7 @@ enum mlx5_module_id { MLX5_MODULE_ID_QSFP = 0xC, MLX5_MODULE_ID_QSFP_PLUS = 0xD, MLX5_MODULE_ID_QSFP28 = 0x11, + MLX5_MODULE_ID_DSFP = 0x1B, }; enum mlx5_an_status { -- cgit v1.2.3 From d740513f05a24b1a46722325974223980f068728 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 9 Apr 2021 11:06:40 +0300 Subject: phy: sfp: add netlink SFP support to generic SFP code The new netlink API for reading SFP data requires a new op to be implemented. The idea of the new netlink SFP code is that userspace is responsible to parsing the EEPROM data and requesting pages, rather than have the kernel decide what pages are interesting and returning them. This allows greater flexibility for newer formats. Currently the generic SFP code only supports simple SFPs. Allow i2c address 0x50 and 0x51 to be accessed with page and bank must always be 0. This interface will later be extended when for example QSFP support is added. Signed-off-by: Andrew Lunn Signed-off-by: Vladyslav Tarasiuk Signed-off-by: David S. Miller --- include/linux/sfp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 38893e4dd0f0..302094b855fb 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -542,6 +542,9 @@ phy_interface_t sfp_select_interface(struct sfp_bus *bus, int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo); int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee, u8 *data); +int sfp_get_module_eeprom_by_page(struct sfp_bus *bus, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack); void sfp_upstream_start(struct sfp_bus *bus); void sfp_upstream_stop(struct sfp_bus *bus); void sfp_bus_put(struct sfp_bus *bus); @@ -587,6 +590,13 @@ static inline int sfp_get_module_eeprom(struct sfp_bus *bus, return -EOPNOTSUPP; } +static inline int sfp_get_module_eeprom_by_page(struct sfp_bus *bus, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + static inline void sfp_upstream_start(struct sfp_bus *bus) { } -- cgit v1.2.3 From c7aab1a7c52b82d9afd7e03c398eb03dc2aa0507 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 1 Apr 2021 19:53:29 -0600 Subject: task_work: add helper for more targeted task_work canceling The only exported helper we have right now is task_work_cancel(), which cancels any task_work from a given task where func matches the queued work item. This is a bit too coarse for some use cases. Add a task_work_cancel_match() that allows to more specifically target individual work items outside of purely the callback function used. task_work_cancel() can be trivially implemented on top of that, hence do so. Reviewed-by: Oleg Nesterov Signed-off-by: Jens Axboe --- include/linux/task_work.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 0d848a1e9e62..5b8a93f288bb 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -22,6 +22,8 @@ enum task_work_notify_mode { int task_work_add(struct task_struct *task, struct callback_head *twork, enum task_work_notify_mode mode); +struct callback_head *task_work_cancel_match(struct task_struct *task, + bool (*match)(struct callback_head *, void *data), void *data); struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t); void task_work_run(void); -- cgit v1.2.3 From 3f48cf18f886c97a7e775af10696bfed9ddcff31 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 11 Apr 2021 01:46:27 +0100 Subject: io_uring: unify files and task cancel Now __io_uring_cancel() and __io_uring_files_cancel() are very similar and mostly differ by how we count requests, merge them and allow tctx_inflight() to handle counting. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/1a5986a97df4dc1378f3fe0ca1eb483dbcf42112.1618101759.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 79cde9906be0..04b650bcbbe5 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -7,19 +7,17 @@ #if defined(CONFIG_IO_URING) struct sock *io_uring_get_socket(struct file *file); -void __io_uring_task_cancel(void); -void __io_uring_files_cancel(struct files_struct *files); +void __io_uring_cancel(struct files_struct *files); void __io_uring_free(struct task_struct *tsk); -static inline void io_uring_task_cancel(void) +static inline void io_uring_files_cancel(struct files_struct *files) { if (current->io_uring) - __io_uring_task_cancel(); + __io_uring_cancel(files); } -static inline void io_uring_files_cancel(struct files_struct *files) +static inline void io_uring_task_cancel(void) { - if (current->io_uring) - __io_uring_files_cancel(files); + return io_uring_files_cancel(NULL); } static inline void io_uring_free(struct task_struct *tsk) { -- cgit v1.2.3 From 907d52310024fae6632aabfc7e833decaf185e5f Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Thu, 1 Apr 2021 10:19:25 +0800 Subject: block: add queue_to_disk() to get gendisk from request_queue Sometimes we need to get the corresponding gendisk from request_queue. It is preferred that block drivers store private data in gendisk->private_data rather than request_queue->queuedata, e.g. see: commit c4a59c4e5db3 ("dm: stop using ->queuedata"). So if only request_queue is given, we need to get its corresponding gendisk to get the private data stored in that gendisk. Reviewed-by: Hannes Reinecke Reviewed-by: Mike Snitzer Signed-off-by: Jeffle Xu Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d5d320da51f8..30d2090583ad 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -689,6 +689,8 @@ static inline bool blk_account_rq(struct request *rq) dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \ (dir), (attrs)) +#define queue_to_disk(q) (dev_to_disk(kobj_to_dev((q)->kobj.parent))) + static inline bool queue_is_mq(struct request_queue *q) { return q->mq_ops; -- cgit v1.2.3 From 4c5b479975212065ef39786e115fde42847e95a9 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 7 Apr 2021 14:36:42 +0200 Subject: vfs: add fileattr ops There's a substantial amount of boilerplate in filesystems handling FS_IOC_[GS]ETFLAGS/ FS_IOC_FS[GS]ETXATTR ioctls. Also due to userspace buffers being involved in the ioctl API this is difficult to stack, as shown by overlayfs issues related to these ioctls. Introduce a new internal API named "fileattr" (fsxattr can be confused with xattr, xflags is inappropriate, since this is more than just flags). There's significant overlap between flags and xflags and this API handles the conversions automatically, so filesystems may choose which one to use. In ->fileattr_get() a hint is provided to the filesystem whether flags or xattr are being requested by userspace, but in this series this hint is ignored by all filesystems, since generating all the attributes is cheap. If a filesystem doesn't implemement the fileattr API, just fall back to f_op->ioctl(). When all filesystems are converted, the fallback can be removed. 32bit compat ioctls are now handled by the generic code as well. Signed-off-by: Miklos Szeredi --- include/linux/fileattr.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 4 ++++ 2 files changed, 63 insertions(+) create mode 100644 include/linux/fileattr.h (limited to 'include/linux') diff --git a/include/linux/fileattr.h b/include/linux/fileattr.h new file mode 100644 index 000000000000..9e37e063ac69 --- /dev/null +++ b/include/linux/fileattr.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_FILEATTR_H +#define _LINUX_FILEATTR_H + +/* Flags shared betwen flags/xflags */ +#define FS_COMMON_FL \ + (FS_SYNC_FL | FS_IMMUTABLE_FL | FS_APPEND_FL | \ + FS_NODUMP_FL | FS_NOATIME_FL | FS_DAX_FL | \ + FS_PROJINHERIT_FL) + +#define FS_XFLAG_COMMON \ + (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND | \ + FS_XFLAG_NODUMP | FS_XFLAG_NOATIME | FS_XFLAG_DAX | \ + FS_XFLAG_PROJINHERIT) + +/* + * Merged interface for miscellaneous file attributes. 'flags' originates from + * ext* and 'fsx_flags' from xfs. There's some overlap between the two, which + * is handled by the VFS helpers, so filesystems are free to implement just one + * or both of these sub-interfaces. + */ +struct fileattr { + u32 flags; /* flags (FS_IOC_GETFLAGS/FS_IOC_SETFLAGS) */ + /* struct fsxattr: */ + u32 fsx_xflags; /* xflags field value (get/set) */ + u32 fsx_extsize; /* extsize field value (get/set)*/ + u32 fsx_nextents; /* nextents field value (get) */ + u32 fsx_projid; /* project identifier (get/set) */ + u32 fsx_cowextsize; /* CoW extsize field value (get/set)*/ + /* selectors: */ + bool flags_valid:1; + bool fsx_valid:1; +}; + +int copy_fsxattr_to_user(const struct fileattr *fa, struct fsxattr __user *ufa); + +void fileattr_fill_xflags(struct fileattr *fa, u32 xflags); +void fileattr_fill_flags(struct fileattr *fa, u32 flags); + +/** + * fileattr_has_fsx - check for extended flags/attributes + * @fa: fileattr pointer + * + * Return: true if any attributes are present that are not represented in + * ->flags. + */ +static inline bool fileattr_has_fsx(const struct fileattr *fa) +{ + return fa->fsx_valid && + ((fa->fsx_xflags & ~FS_XFLAG_COMMON) || fa->fsx_extsize != 0 || + fa->fsx_projid != 0 || fa->fsx_cowextsize != 0); +} + +int vfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); +int vfs_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry, + struct fileattr *fa); + +#endif /* _LINUX_FILEATTR_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index ec8f3ddf4a6a..156b78f42a28 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -70,6 +70,7 @@ struct fsverity_info; struct fsverity_operations; struct fs_context; struct fs_parameter_spec; +struct fileattr; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -1963,6 +1964,9 @@ struct inode_operations { struct dentry *, umode_t); int (*set_acl)(struct user_namespace *, struct inode *, struct posix_acl *, int); + int (*fileattr_set)(struct user_namespace *mnt_userns, + struct dentry *dentry, struct fileattr *fa); + int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa); } ____cacheline_aligned; static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio, -- cgit v1.2.3 From 51db776a430edd7477a779be0dc5c6fef4a05884 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 7 Apr 2021 14:36:44 +0200 Subject: vfs: remove unused ioctl helpers Remove vfs_ioc_setflags_prepare(), vfs_ioc_fssetxattr_check() and simple_fill_fsxattr(), which are no longer used. Signed-off-by: Miklos Szeredi Reviewed-by: Darrick J. Wong --- include/linux/fs.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 156b78f42a28..820fdc62ac30 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3571,18 +3571,6 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice); -int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags, - unsigned int flags); - -int vfs_ioc_fssetxattr_check(struct inode *inode, const struct fsxattr *old_fa, - struct fsxattr *fa); - -static inline void simple_fill_fsxattr(struct fsxattr *fa, __u32 xflags) -{ - memset(fa, 0, sizeof(*fa)); - fa->fsx_xflags = xflags; -} - /* * Flush file data before changing attributes. Caller must hold any locks * required to prevent further writes to this file until we're done setting -- cgit v1.2.3 From 6f822e1b5d9dda3d20e87365de138046e3baa03a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Apr 2021 15:46:57 +0200 Subject: block: remove zero_fill_bio_iter zero_fill_bio_iter is only used to implement zero_fill_bio, so remove the indirection. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20210412134658.2623190-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index d0246c92a6e8..a8021d79d45d 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -485,14 +485,9 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_list_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); -void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); void bio_truncate(struct bio *bio, unsigned new_size); void guard_bio_eod(struct bio *bio); - -static inline void zero_fill_bio(struct bio *bio) -{ - zero_fill_bio_iter(bio, bio->bi_iter); -} +void zero_fill_bio(struct bio *bio); extern const char *bio_devname(struct bio *bio, char *buffer); -- cgit v1.2.3 From 5f03414d4098b5718f1a5e99b43b9d9cb6f3612a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Apr 2021 15:46:58 +0200 Subject: block: move bio_list_copy_data to pktcdvd bio_list_copy_data is only used by pktcdvd, so move it there. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210412134658.2623190-2-hch@lst.de Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index a8021d79d45d..a0b4cfdf62a4 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -483,7 +483,6 @@ extern void bio_check_pages_dirty(struct bio *bio); extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); -extern void bio_list_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); void bio_truncate(struct bio *bio, unsigned new_size); void guard_bio_eod(struct bio *bio); -- cgit v1.2.3 From 51e0158a54321a48d260e95998393934bb0de52c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 6 Apr 2021 20:21:11 -0700 Subject: skmsg: Pass psock pointer to ->psock_update_sk_prot() Using sk_psock() to retrieve psock pointer from sock requires RCU read lock, but we already get psock pointer before calling ->psock_update_sk_prot() in both cases, so we can just pass it without bothering sk_psock(). Fixes: 8a59f9d1e3d4 ("sock: Introduce sk->sk_prot->psock_update_sk_prot()") Reported-by: syzbot+320a3bc8d80f478c37e4@syzkaller.appspotmail.com Signed-off-by: Cong Wang Signed-off-by: Daniel Borkmann Tested-by: syzbot+320a3bc8d80f478c37e4@syzkaller.appspotmail.com Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210407032111.33398-1-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index f78e90a04a69..e2fb0a5a101e 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -99,7 +99,8 @@ struct sk_psock { void (*saved_close)(struct sock *sk, long timeout); void (*saved_write_space)(struct sock *sk); void (*saved_data_ready)(struct sock *sk); - int (*psock_update_sk_prot)(struct sock *sk, bool restore); + int (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock, + bool restore); struct proto *sk_proto; struct mutex work_mutex; struct sk_psock_work_state work_state; @@ -405,7 +406,7 @@ static inline void sk_psock_restore_proto(struct sock *sk, { sk->sk_prot->unhash = psock->saved_unhash; if (psock->psock_update_sk_prot) - psock->psock_update_sk_prot(sk, true); + psock->psock_update_sk_prot(sk, psock, true); } static inline void sk_psock_set_state(struct sk_psock *psock, -- cgit v1.2.3 From c2ef2f50ad0ccf5460bf4824bc6669240b6c7936 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 6 Apr 2021 16:40:40 -0300 Subject: vfio/mdev: Remove kobj from mdev_parent_ops->create() The kobj here is a type-erased version of mdev_type, which is already stored in the struct mdev_device being passed in. It was only ever used to compute the type_group_id, which is now extracted directly from the mdev. Reviewed-by: Kevin Tian Reviewed-by: Cornelia Huck Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Message-Id: <17-v2-d36939638fc6+d54-vfio2_jgg@nvidia.com> Signed-off-by: Alex Williamson --- include/linux/mdev.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 41e919365223..c3a800051d61 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -61,7 +61,6 @@ unsigned int mtype_get_type_group_id(struct kobject *mtype_kobj); * @create: Called to allocate basic resources in parent device's * driver for a particular mediated device. It is * mandatory to provide create ops. - * @kobj: kobject of type for which 'create' is called. * @mdev: mdev_device structure on of mediated device * that is being created * Returns integer: success (0) or error (< 0) @@ -107,7 +106,7 @@ struct mdev_parent_ops { const struct attribute_group **mdev_attr_groups; struct attribute_group **supported_type_groups; - int (*create)(struct kobject *kobj, struct mdev_device *mdev); + int (*create)(struct mdev_device *mdev); int (*remove)(struct mdev_device *mdev); int (*open)(struct mdev_device *mdev); void (*release)(struct mdev_device *mdev); -- cgit v1.2.3 From 9169cff168ff262b4b78597f542e23843d0c494a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 6 Apr 2021 16:40:41 -0300 Subject: vfio/mdev: Correct the function signatures for the mdev_type_attributes The driver core standard is to pass in the properly typed object, the properly typed attribute and the buffer data. It stems from the root kobject method: ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,..) Each subclass of kobject should provide their own function with the same signature but more specific types, eg struct device uses: ssize_t (*show)(struct device *dev, struct device_attribute *attr,..) In this case the existing signature is: ssize_t (*show)(struct kobject *kobj, struct device *dev,..) Where kobj is a 'struct mdev_type *' and dev is 'mdev_type->parent->dev'. Change the mdev_type related sysfs attribute functions to: ssize_t (*show)(struct mdev_type *mtype, struct mdev_type_attribute *attr,..) In order to restore type safety and match the driver core standard There are no current users of 'attr', but if it is ever needed it would be hard to add in retroactively, so do it now. Reviewed-by: Kevin Tian Reviewed-by: Cornelia Huck Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Message-Id: <18-v2-d36939638fc6+d54-vfio2_jgg@nvidia.com> Signed-off-by: Alex Williamson --- include/linux/mdev.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mdev.h b/include/linux/mdev.h index c3a800051d61..1fb34ea394ad 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -47,7 +47,8 @@ static inline struct device *mdev_get_iommu_device(struct mdev_device *mdev) } unsigned int mdev_get_type_group_id(struct mdev_device *mdev); -unsigned int mtype_get_type_group_id(struct kobject *mtype_kobj); +unsigned int mtype_get_type_group_id(struct mdev_type *mtype); +struct device *mtype_get_parent_dev(struct mdev_type *mtype); /** * struct mdev_parent_ops - Structure to be registered for each parent device to @@ -123,9 +124,11 @@ struct mdev_parent_ops { /* interface for exporting mdev supported type attributes */ struct mdev_type_attribute { struct attribute attr; - ssize_t (*show)(struct kobject *kobj, struct device *dev, char *buf); - ssize_t (*store)(struct kobject *kobj, struct device *dev, - const char *buf, size_t count); + ssize_t (*show)(struct mdev_type *mtype, + struct mdev_type_attribute *attr, char *buf); + ssize_t (*store)(struct mdev_type *mtype, + struct mdev_type_attribute *attr, const char *buf, + size_t count); }; #define MDEV_TYPE_ATTR(_name, _mode, _show, _store) \ -- cgit v1.2.3 From 332d1a0373be32a3a3c152756bca45ff4f4e11b5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 25 Mar 2021 18:15:36 -0400 Subject: NFS: nfs4_bitmask_adjust() must not change the server global bitmasks As currently set, the calls to nfs4_bitmask_adjust() will end up overwriting the contents of the nfs_server cache_consistency_bitmask field. The intention here should be to modify a private copy of that mask in the close/delegreturn/write arguments. Fixes: 76bd5c016ef4 ("NFSv4: make cache consistency bitmask dynamic") Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 3327239fa2f9..cc29dee508f7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -15,6 +15,8 @@ #define NFS_DEF_FILE_IO_SIZE (4096U) #define NFS_MIN_FILE_IO_SIZE (1024U) +#define NFS_BITMASK_SZ 3 + struct nfs4_string { unsigned int len; char *data; @@ -525,7 +527,8 @@ struct nfs_closeargs { struct nfs_seqid * seqid; fmode_t fmode; u32 share_access; - u32 * bitmask; + const u32 * bitmask; + u32 bitmask_store[NFS_BITMASK_SZ]; struct nfs4_layoutreturn_args *lr_args; }; @@ -608,7 +611,8 @@ struct nfs4_delegreturnargs { struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; const nfs4_stateid *stateid; - u32 * bitmask; + const u32 *bitmask; + u32 bitmask_store[NFS_BITMASK_SZ]; struct nfs4_layoutreturn_args *lr_args; }; @@ -648,7 +652,8 @@ struct nfs_pgio_args { union { unsigned int replen; /* used by read */ struct { - u32 * bitmask; /* used by write */ + const u32 * bitmask; /* used by write */ + u32 bitmask_store[NFS_BITMASK_SZ]; /* used by write */ enum nfs3_stable_how stable; /* used by write */ }; }; -- cgit v1.2.3 From 1fe976d308acb6374c899a4ee8025a0a016e453e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 12 Apr 2021 18:57:39 +0200 Subject: net: phy: marvell: fix detection of PHY on Topaz switches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit fee2d546414d ("net: phy: marvell: mv88e6390 temperature sensor reading"), Linux reports the temperature of Topaz hwmon as constant -75°C. This is because switches from the Topaz family (88E6141 / 88E6341) have the address of the temperature sensor register different from Peridot. This address is instead compatible with 88E1510 PHYs, as was used for Topaz before the above mentioned commit. Create a new mapping table between switch family and PHY ID for families which don't have a model number. And define PHY IDs for Topaz and Peridot families. Create a new PHY ID and a new PHY driver for Topaz's internal PHY. The only difference from Peridot's PHY driver is the HWMON probing method. Prior this change Topaz's internal PHY is detected by kernel as: PHY [...] driver [Marvell 88E6390] (irq=63) And afterwards as: PHY [...] driver [Marvell 88E6341 Family] (irq=63) Signed-off-by: Pali Rohár BugLink: https://github.com/globalscaletechnologies/linux/issues/1 Fixes: fee2d546414d ("net: phy: marvell: mv88e6390 temperature sensor reading") Reviewed-by: Marek Behún Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/marvell_phy.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index 52b1610eae68..c544b70dfbd2 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -28,11 +28,12 @@ /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ #define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 -/* The MV88e6390 Ethernet switch contains embedded PHYs. These PHYs do +/* These Ethernet switch families contain embedded PHYs, but they do * not have a model ID. So the switch driver traps reads to the ID2 * register and returns the switch family ID */ -#define MARVELL_PHY_ID_88E6390 0x01410f90 +#define MARVELL_PHY_ID_88E6341_FAMILY 0x01410f41 +#define MARVELL_PHY_ID_88E6390_FAMILY 0x01410f90 #define MARVELL_PHY_FAMILY_ID(id) ((id) >> 4) -- cgit v1.2.3 From 1f3208b2d6975f31b9c7c6bf174b84fe9c97492f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 25 Mar 2021 11:04:34 -0400 Subject: NFS: Add a cache validity flag argument to nfs_revalidate_inode() Add an argument to nfs_revalidate_inode() to allow callers to specify which attributes they need to check for validity. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index eadaabd18dc7..624ffd47a9d4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -386,7 +386,7 @@ extern void nfs_access_set_mask(struct nfs_access_entry *, u32); extern int nfs_permission(struct user_namespace *, struct inode *, int); extern int nfs_open(struct inode *, struct file *); extern int nfs_attribute_cache_expired(struct inode *inode); -extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); +extern int nfs_revalidate_inode(struct inode *inode, unsigned long flags); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_clear_invalid_mapping(struct address_space *mapping); extern bool nfs_mapping_need_revalidate_inode(struct inode *inode); -- cgit v1.2.3 From fabf2b341502e894001d70f91309dd6f3785e2dc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 25 Mar 2021 13:14:42 -0400 Subject: NFS: Separate tracking of file nlinks cache validity from the mode/uid/gid Rename can cause us to revalidate the access cache, so lets track the nlinks separately from the mode/uid/gid. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 624ffd47a9d4..41165b988dfb 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -246,11 +246,13 @@ struct nfs4_copy_state { BIT(13) /* Deferred cache invalidation */ #define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */ #define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */ +#define NFS_INO_INVALID_NLINK BIT(16) /* cached nlinks is invalid */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ | NFS_INO_INVALID_MTIME \ | NFS_INO_INVALID_SIZE \ + | NFS_INO_INVALID_NLINK \ | NFS_INO_INVALID_OTHER) /* inode metadata is invalid */ /* -- cgit v1.2.3 From 720869eb19f3161980d6d4631d3df7e8c5355993 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 13 Apr 2021 09:41:16 -0400 Subject: NFS: Separate tracking of file mode cache validity from the uid/gid chown()/chgrp() and chmod() are separate operations, and in addition, there are mode operations that are performed automatically by the server. So let's track mode validity separately from the file ownership validity. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 41165b988dfb..ffba254d2098 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -247,12 +247,14 @@ struct nfs4_copy_state { #define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */ #define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */ #define NFS_INO_INVALID_NLINK BIT(16) /* cached nlinks is invalid */ +#define NFS_INO_INVALID_MODE BIT(17) /* cached mode is invalid */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ | NFS_INO_INVALID_MTIME \ | NFS_INO_INVALID_SIZE \ | NFS_INO_INVALID_NLINK \ + | NFS_INO_INVALID_MODE \ | NFS_INO_INVALID_OTHER) /* inode metadata is invalid */ /* -- cgit v1.2.3 From 7f08a3359a3c1e39c2a118fbbe583d8c8db14ace Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 26 Mar 2021 09:50:19 -0400 Subject: NFSv4: Add support for the NFSv4.2 "change_attr_type" attribute The change_attr_type allows the server to provide a description of how the change attribute will behave. This again will allow the client to optimise its behaviour w.r.t. attribute revalidation. Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 9 +++++++++ include/linux/nfs_fs_sb.h | 3 +++ include/linux/nfs_xdr.h | 2 ++ 3 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 5b4c67c91f56..15004c469807 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -452,6 +452,7 @@ enum lock_type4 { #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) #define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) +#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) #define FATTR4_WORD2_MODE_UMASK (1UL << 17) #define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18) @@ -709,6 +710,14 @@ struct nl4_server { } u; }; +enum nfs4_change_attr_type { + NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0, + NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1, + NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2, + NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3, + NFS4_CHANGE_TYPE_IS_UNDEFINED = 4, +}; + /* * Options for setxattr. These match the flags for setxattr(2). */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6f76b32a0238..fbcdfd9f7a7f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -180,6 +180,9 @@ struct nfs_server { #define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */ #define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */ + enum nfs4_change_attr_type + change_attr_type;/* Description of change attribute */ + struct nfs_fsid fsid; __u64 maxfilesize; /* maximum file size */ struct timespec64 time_delta; /* smallest time granularity */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index cc29dee508f7..717ecc87c9e7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -152,6 +152,8 @@ struct nfs_fsinfo { __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ __u32 blksize; /* preferred pnfs io block size */ __u32 clone_blksize; /* granularity of a CLONE operation */ + enum nfs4_change_attr_type + change_attr_type; /* Info about change attr */ __u32 xattr_support; /* User xattrs supported */ }; -- cgit v1.2.3 From 655cdafdec1105d0552aa19ffb5ffef7aead1548 Mon Sep 17 00:00:00 2001 From: Zhang Yunkai Date: Tue, 13 Apr 2021 10:52:56 +0000 Subject: lightnvm: remove duplicate include in lightnvm.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'linux/blkdev.h' and 'uapi/linux/lightnvm.h' included in 'lightnvm.h' is duplicated.It is also included in the 5th and 7th line. Signed-off-by: Zhang Yunkai Signed-off-by: Matias Bjørling Link: https://lore.kernel.org/r/20210413105257.159260-4-matias.bjorling@wdc.com Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 1db223710b28..0908abda9c1b 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -112,10 +112,8 @@ struct nvm_dev_ops { #ifdef CONFIG_NVM -#include #include #include -#include enum { /* HW Responsibilities */ -- cgit v1.2.3 From 63f9c44bca5e10fb1fd86aee7e38039ed98f95cc Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 11 Apr 2021 15:29:18 +0300 Subject: net/mlx5: Add MEMIC operations related bits Add the MEMIC operations bits and structures to the mlx5_ifc file. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 432290b58a0b..47241ebfcf7d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -133,6 +133,7 @@ enum { MLX5_CMD_OP_PAGE_FAULT_RESUME = 0x204, MLX5_CMD_OP_ALLOC_MEMIC = 0x205, MLX5_CMD_OP_DEALLOC_MEMIC = 0x206, + MLX5_CMD_OP_MODIFY_MEMIC = 0x207, MLX5_CMD_OP_CREATE_EQ = 0x301, MLX5_CMD_OP_DESTROY_EQ = 0x302, MLX5_CMD_OP_QUERY_EQ = 0x303, @@ -1017,7 +1018,11 @@ struct mlx5_ifc_device_mem_cap_bits { u8 header_modify_sw_icm_start_address[0x40]; - u8 reserved_at_180[0x680]; + u8 reserved_at_180[0x80]; + + u8 memic_operations[0x20]; + + u8 reserved_at_220[0x5e0]; }; struct mlx5_ifc_device_event_cap_bits { @@ -10417,6 +10422,41 @@ struct mlx5_ifc_destroy_vport_lag_in_bits { u8 reserved_at_40[0x40]; }; +enum { + MLX5_MODIFY_MEMIC_OP_MOD_ALLOC, + MLX5_MODIFY_MEMIC_OP_MOD_DEALLOC, +}; + +struct mlx5_ifc_modify_memic_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x18]; + u8 memic_operation_type[0x8]; + + u8 memic_start_addr[0x40]; + + u8 reserved_at_c0[0x140]; +}; + +struct mlx5_ifc_modify_memic_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + u8 memic_operation_addr[0x40]; + + u8 reserved_at_c0[0x140]; +}; + struct mlx5_ifc_alloc_memic_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; -- cgit v1.2.3 From 83216e3988cd196183542937c9bd58b279f946af Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 12 Apr 2021 19:47:17 +0200 Subject: of: net: pass the dst buffer to of_get_mac_address() of_get_mac_address() returns a "const void*" pointer to a MAC address. Lately, support to fetch the MAC address by an NVMEM provider was added. But this will only work with platform devices. It will not work with PCI devices (e.g. of an integrated root complex) and esp. not with DSA ports. There is an of_* variant of the nvmem binding which works without devices. The returned data of a nvmem_cell_read() has to be freed after use. On the other hand the return of_get_mac_address() points to some static data without a lifetime. The trick for now, was to allocate a device resource managed buffer which is then returned. This will only work if we have an actual device. Change it, so that the caller of of_get_mac_address() has to supply a buffer where the MAC address is written to. Unfortunately, this will touch all drivers which use the of_get_mac_address(). Usually the code looks like: const char *addr; addr = of_get_mac_address(np); if (!IS_ERR(addr)) ether_addr_copy(ndev->dev_addr, addr); This can then be simply rewritten as: of_get_mac_address(np, ndev->dev_addr); Sometimes is_valid_ether_addr() is used to test the MAC address. of_get_mac_address() already makes sure, it just returns a valid MAC address. Thus we can just test its return code. But we have to be careful if there are still other sources for the MAC address before the of_get_mac_address(). In this case we have to keep the is_valid_ether_addr() call. The following coccinelle patch was used to convert common cases to the new style. Afterwards, I've manually gone over the drivers and fixed the return code variable: either used a new one or if one was already available use that. Mansour Moufid, thanks for that coccinelle patch! @a@ identifier x; expression y, z; @@ - x = of_get_mac_address(y); + x = of_get_mac_address(y, z); <... - ether_addr_copy(z, x); ...> @@ identifier a.x; @@ - if (<+... x ...+>) {} @@ identifier a.x; @@ if (<+... x ...+>) { ... } - else {} @@ identifier a.x; expression e; @@ - if (<+... x ...+>@e) - {} - else + if (!(e)) {...} @@ expression x, y, z; @@ - x = of_get_mac_address(y, z); + of_get_mac_address(y, z); ... when != x All drivers, except drivers/net/ethernet/aeroflex/greth.c, were compile-time tested. Suggested-by: Andrew Lunn Signed-off-by: Michael Walle Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/of_net.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_net.h b/include/linux/of_net.h index 71bbfcf3adcd..daef3b0d9270 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -13,7 +13,7 @@ struct net_device; extern int of_get_phy_mode(struct device_node *np, phy_interface_t *interface); -extern const void *of_get_mac_address(struct device_node *np); +extern int of_get_mac_address(struct device_node *np, u8 *mac); extern struct net_device *of_find_net_device_by_node(struct device_node *np); #else static inline int of_get_phy_mode(struct device_node *np, @@ -22,9 +22,9 @@ static inline int of_get_phy_mode(struct device_node *np, return -ENODEV; } -static inline const void *of_get_mac_address(struct device_node *np) +static inline int of_get_mac_address(struct device_node *np, u8 *mac) { - return ERR_PTR(-ENODEV); + return -ENODEV; } static inline struct net_device *of_find_net_device_by_node(struct device_node *np) -- cgit v1.2.3 From 441e8c66b23e027c00ccebd70df9fd933918eefe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 13 Apr 2021 11:16:06 +0200 Subject: bpf: Return target info when a tracing bpf_link is queried MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is currently no way to discover the target of a tracing program attachment after the fact. Add this information to bpf_link_info and return it when querying the bpf_link fd. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210413091607.58945-1-toke@redhat.com --- include/linux/bpf_verifier.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 51c2ffa3d901..6023a1367853 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -487,6 +487,15 @@ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, return ((u64)btf_obj_id(btf) << 32) | 0x80000000 | btf_id; } +/* unpack the IDs from the key as constructed above */ +static inline void bpf_trampoline_unpack_key(u64 key, u32 *obj_id, u32 *btf_id) +{ + if (obj_id) + *obj_id = key >> 32; + if (btf_id) + *btf_id = key & 0x7FFFFFFF; +} + int bpf_check_attach_target(struct bpf_verifier_log *log, const struct bpf_prog *prog, const struct bpf_prog *tgt_prog, -- cgit v1.2.3 From 316d0d92fbc9b926bda8ce7ccc109de0dccb4d92 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 5 Apr 2021 14:40:44 +0300 Subject: mfd: bd718x7: simplify by cleaning unnecessary device data Most ROHM PMIC sub-devices only use the regmap pointer from parent device. They can obtain this by dev_get_regamap so in most cases the MFD device does not need to allocate and populate the driver data. Simplify drivers by removing this. The BD70528 still needs the access to watchdog mutex so keep rohm_regmap_dev in use on BD70528 RTC and WDG drivers for now. Signed-off-by: Matti Vaittinen Signed-off-by: Lee Jones --- include/linux/mfd/rohm-bd718x7.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rohm-bd718x7.h b/include/linux/mfd/rohm-bd718x7.h index bee2474a8f9f..df2918198d37 100644 --- a/include/linux/mfd/rohm-bd718x7.h +++ b/include/linux/mfd/rohm-bd718x7.h @@ -310,17 +310,4 @@ enum { BD718XX_PWRBTN_LONG_PRESS_15S }; -struct bd718xx { - /* - * Please keep this as the first member here as some - * drivers (clk) supporting more than one chip may only know this - * generic struct 'struct rohm_regmap_dev' and assume it is - * the first chunk of parent device's private data. - */ - struct rohm_regmap_dev chip; - - int chip_irq; - struct regmap_irq_chip_data *irq_data; -}; - #endif /* __LINUX_MFD_BD718XX_H__ */ -- cgit v1.2.3 From 488b205e57181a56f3503cb97240d32798d3d3bd Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 5 Apr 2021 14:42:13 +0300 Subject: mfd: Add ROHM BD71815 ID Add chip ID for ROHM BD71815 and PMIC so that drivers can identify this IC. Signed-off-by: Matti Vaittinen Signed-off-by: Lee Jones --- include/linux/mfd/rohm-generic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h index 66f673c35303..e107b4769101 100644 --- a/include/linux/mfd/rohm-generic.h +++ b/include/linux/mfd/rohm-generic.h @@ -11,6 +11,7 @@ enum rohm_chip_type { ROHM_CHIP_TYPE_BD71837 = 0, ROHM_CHIP_TYPE_BD71847, ROHM_CHIP_TYPE_BD70528, + ROHM_CHIP_TYPE_BD71815, ROHM_CHIP_TYPE_BD71828, ROHM_CHIP_TYPE_BD9571, ROHM_CHIP_TYPE_BD9574, -- cgit v1.2.3 From 06b2f5f5ecf3e3a3b74b42057b654a5dba7e63bb Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 5 Apr 2021 14:42:34 +0300 Subject: mfd: Sort ROHM chip ID list for better readability Sort the ID list so it is easier to see which ICs are present. Signed-off-by: Matti Vaittinen Suggested-by: Lee Jones Signed-off-by: Lee Jones --- include/linux/mfd/rohm-generic.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h index e107b4769101..9e2880e06950 100644 --- a/include/linux/mfd/rohm-generic.h +++ b/include/linux/mfd/rohm-generic.h @@ -8,13 +8,13 @@ #include enum rohm_chip_type { - ROHM_CHIP_TYPE_BD71837 = 0, - ROHM_CHIP_TYPE_BD71847, + ROHM_CHIP_TYPE_BD9571, + ROHM_CHIP_TYPE_BD9574, ROHM_CHIP_TYPE_BD70528, ROHM_CHIP_TYPE_BD71815, ROHM_CHIP_TYPE_BD71828, - ROHM_CHIP_TYPE_BD9571, - ROHM_CHIP_TYPE_BD9574, + ROHM_CHIP_TYPE_BD71837, + ROHM_CHIP_TYPE_BD71847, ROHM_CHIP_TYPE_AMOUNT }; -- cgit v1.2.3 From 4dcdcfd5abb34d3139669fcd830b756d45678c47 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 5 Apr 2021 14:42:51 +0300 Subject: mfd: Support for ROHM BD71815 PMIC core Add core support for ROHM BD71815 Power Management IC. The IC integrates regulators, a battery charger with a coulomb counter, a real-time clock (RTC), clock gate and general-purpose outputs (GPO). Signed-off-by: Matti Vaittinen Signed-off-by: Lee Jones --- include/linux/mfd/rohm-bd71815.h | 562 +++++++++++++++++++++++++++++++++++++++ include/linux/mfd/rohm-bd71828.h | 3 + 2 files changed, 565 insertions(+) create mode 100644 include/linux/mfd/rohm-bd71815.h (limited to 'include/linux') diff --git a/include/linux/mfd/rohm-bd71815.h b/include/linux/mfd/rohm-bd71815.h new file mode 100644 index 000000000000..ec6d9612bebe --- /dev/null +++ b/include/linux/mfd/rohm-bd71815.h @@ -0,0 +1,562 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2021 ROHM Semiconductors. + * + * Author: Matti Vaittinen + * + * Copyright 2014 Embest Technology Co. Ltd. Inc. + * + * Author: yanglsh@embest-tech.com + */ + +#ifndef _MFD_BD71815_H +#define _MFD_BD71815_H + +#include + +enum { + BD71815_BUCK1 = 0, + BD71815_BUCK2, + BD71815_BUCK3, + BD71815_BUCK4, + BD71815_BUCK5, + /* General Purpose */ + BD71815_LDO1, + BD71815_LDO2, + BD71815_LDO3, + /* LDOs for SD Card and SD Card Interface */ + BD71815_LDO4, + BD71815_LDO5, + /* LDO for DDR Reference Voltage */ + BD71815_LDODVREF, + /* LDO for Low-Power State Retention */ + BD71815_LDOLPSR, + BD71815_WLED, + BD71815_REGULATOR_CNT, +}; + +#define BD71815_SUPPLY_STATE_ENABLED 0x1 + +enum { + BD71815_REG_DEVICE = 0, + BD71815_REG_PWRCTRL, + BD71815_REG_BUCK1_MODE, + BD71815_REG_BUCK2_MODE, + BD71815_REG_BUCK3_MODE, + BD71815_REG_BUCK4_MODE, + BD71815_REG_BUCK5_MODE, + BD71815_REG_BUCK1_VOLT_H, + BD71815_REG_BUCK1_VOLT_L, + BD71815_REG_BUCK2_VOLT_H, + BD71815_REG_BUCK2_VOLT_L, + BD71815_REG_BUCK3_VOLT, + BD71815_REG_BUCK4_VOLT, + BD71815_REG_BUCK5_VOLT, + BD71815_REG_LED_CTRL, + BD71815_REG_LED_DIMM, + BD71815_REG_LDO_MODE1, + BD71815_REG_LDO_MODE2, + BD71815_REG_LDO_MODE3, + BD71815_REG_LDO_MODE4, + BD71815_REG_LDO1_VOLT, + BD71815_REG_LDO2_VOLT, + BD71815_REG_LDO3_VOLT, + BD71815_REG_LDO4_VOLT, + BD71815_REG_LDO5_VOLT_H, + BD71815_REG_LDO5_VOLT_L, + BD71815_REG_BUCK_PD_DIS, + BD71815_REG_LDO_PD_DIS, + BD71815_REG_GPO, + BD71815_REG_OUT32K, + BD71815_REG_SEC, + BD71815_REG_MIN, + BD71815_REG_HOUR, + BD71815_REG_WEEK, + BD71815_REG_DAY, + BD71815_REG_MONTH, + BD71815_REG_YEAR, + BD71815_REG_ALM0_SEC, + + BD71815_REG_ALM1_SEC = 0x2C, + + BD71815_REG_ALM0_MASK = 0x33, + BD71815_REG_ALM1_MASK, + BD71815_REG_ALM2, + BD71815_REG_TRIM, + BD71815_REG_CONF, + BD71815_REG_SYS_INIT, + BD71815_REG_CHG_STATE, + BD71815_REG_CHG_LAST_STATE, + BD71815_REG_BAT_STAT, + BD71815_REG_DCIN_STAT, + BD71815_REG_VSYS_STAT, + BD71815_REG_CHG_STAT, + BD71815_REG_CHG_WDT_STAT, + BD71815_REG_BAT_TEMP, + BD71815_REG_IGNORE_0, + BD71815_REG_INHIBIT_0, + BD71815_REG_DCIN_CLPS, + BD71815_REG_VSYS_REG, + BD71815_REG_VSYS_MAX, + BD71815_REG_VSYS_MIN, + BD71815_REG_CHG_SET1, + BD71815_REG_CHG_SET2, + BD71815_REG_CHG_WDT_PRE, + BD71815_REG_CHG_WDT_FST, + BD71815_REG_CHG_IPRE, + BD71815_REG_CHG_IFST, + BD71815_REG_CHG_IFST_TERM, + BD71815_REG_CHG_VPRE, + BD71815_REG_CHG_VBAT_1, + BD71815_REG_CHG_VBAT_2, + BD71815_REG_CHG_VBAT_3, + BD71815_REG_CHG_LED_1, + BD71815_REG_VF_TH, + BD71815_REG_BAT_SET_1, + BD71815_REG_BAT_SET_2, + BD71815_REG_BAT_SET_3, + BD71815_REG_ALM_VBAT_TH_U, + BD71815_REG_ALM_VBAT_TH_L, + BD71815_REG_ALM_DCIN_TH, + BD71815_REG_ALM_VSYS_TH, + BD71815_REG_VM_IBAT_U, + BD71815_REG_VM_IBAT_L, + BD71815_REG_VM_VBAT_U, + BD71815_REG_VM_VBAT_L, + BD71815_REG_VM_BTMP, + BD71815_REG_VM_VTH, + BD71815_REG_VM_DCIN_U, + BD71815_REG_VM_DCIN_L, + BD71815_REG_VM_VSYS, + BD71815_REG_VM_VF, + BD71815_REG_VM_OCI_PRE_U, + BD71815_REG_VM_OCI_PRE_L, + BD71815_REG_VM_OCV_PRE_U, + BD71815_REG_VM_OCV_PRE_L, + BD71815_REG_VM_OCI_PST_U, + BD71815_REG_VM_OCI_PST_L, + BD71815_REG_VM_OCV_PST_U, + BD71815_REG_VM_OCV_PST_L, + BD71815_REG_VM_SA_VBAT_U, + BD71815_REG_VM_SA_VBAT_L, + BD71815_REG_VM_SA_IBAT_U, + BD71815_REG_VM_SA_IBAT_L, + BD71815_REG_CC_CTRL, + BD71815_REG_CC_BATCAP1_TH_U, + BD71815_REG_CC_BATCAP1_TH_L, + BD71815_REG_CC_BATCAP2_TH_U, + BD71815_REG_CC_BATCAP2_TH_L, + BD71815_REG_CC_BATCAP3_TH_U, + BD71815_REG_CC_BATCAP3_TH_L, + BD71815_REG_CC_STAT, + BD71815_REG_CC_CCNTD_3, + BD71815_REG_CC_CCNTD_2, + BD71815_REG_CC_CCNTD_1, + BD71815_REG_CC_CCNTD_0, + BD71815_REG_CC_CURCD_U, + BD71815_REG_CC_CURCD_L, + BD71815_REG_VM_OCUR_THR_1, + BD71815_REG_VM_OCUR_DUR_1, + BD71815_REG_VM_OCUR_THR_2, + BD71815_REG_VM_OCUR_DUR_2, + BD71815_REG_VM_OCUR_THR_3, + BD71815_REG_VM_OCUR_DUR_3, + BD71815_REG_VM_OCUR_MON, + BD71815_REG_VM_BTMP_OV_THR, + BD71815_REG_VM_BTMP_OV_DUR, + BD71815_REG_VM_BTMP_LO_THR, + BD71815_REG_VM_BTMP_LO_DUR, + BD71815_REG_VM_BTMP_MON, + BD71815_REG_INT_EN_01, + + BD71815_REG_INT_EN_11 = 0x95, + BD71815_REG_INT_EN_12, + BD71815_REG_INT_STAT, + BD71815_REG_INT_STAT_01, + BD71815_REG_INT_STAT_02, + BD71815_REG_INT_STAT_03, + BD71815_REG_INT_STAT_04, + BD71815_REG_INT_STAT_05, + BD71815_REG_INT_STAT_06, + BD71815_REG_INT_STAT_07, + BD71815_REG_INT_STAT_08, + BD71815_REG_INT_STAT_09, + BD71815_REG_INT_STAT_10, + BD71815_REG_INT_STAT_11, + BD71815_REG_INT_STAT_12, + BD71815_REG_INT_UPDATE, + + BD71815_REG_VM_VSYS_U = 0xC0, + BD71815_REG_VM_VSYS_L, + BD71815_REG_VM_SA_VSYS_U, + BD71815_REG_VM_SA_VSYS_L, + + BD71815_REG_VM_SA_IBAT_MIN_U = 0xD0, + BD71815_REG_VM_SA_IBAT_MIN_L, + BD71815_REG_VM_SA_IBAT_MAX_U, + BD71815_REG_VM_SA_IBAT_MAX_L, + BD71815_REG_VM_SA_VBAT_MIN_U, + BD71815_REG_VM_SA_VBAT_MIN_L, + BD71815_REG_VM_SA_VBAT_MAX_U, + BD71815_REG_VM_SA_VBAT_MAX_L, + BD71815_REG_VM_SA_VSYS_MIN_U, + BD71815_REG_VM_SA_VSYS_MIN_L, + BD71815_REG_VM_SA_VSYS_MAX_U, + BD71815_REG_VM_SA_VSYS_MAX_L, + BD71815_REG_VM_SA_MINMAX_CLR, + + BD71815_REG_REX_CCNTD_3 = 0xE0, + BD71815_REG_REX_CCNTD_2, + BD71815_REG_REX_CCNTD_1, + BD71815_REG_REX_CCNTD_0, + BD71815_REG_REX_SA_VBAT_U, + BD71815_REG_REX_SA_VBAT_L, + BD71815_REG_REX_CTRL_1, + BD71815_REG_REX_CTRL_2, + BD71815_REG_FULL_CCNTD_3, + BD71815_REG_FULL_CCNTD_2, + BD71815_REG_FULL_CCNTD_1, + BD71815_REG_FULL_CCNTD_0, + BD71815_REG_FULL_CTRL, + + BD71815_REG_CCNTD_CHG_3 = 0xF0, + BD71815_REG_CCNTD_CHG_2, + + BD71815_REG_TEST_MODE = 0xFE, + BD71815_MAX_REGISTER, +}; + +/* BD71815_REG_BUCK1_MODE bits */ +#define BD71815_BUCK_RAMPRATE_MASK 0xC0 +#define BD71815_BUCK_RAMPRATE_10P00MV 0x0 +#define BD71815_BUCK_RAMPRATE_5P00MV 0x01 +#define BD71815_BUCK_RAMPRATE_2P50MV 0x02 +#define BD71815_BUCK_RAMPRATE_1P25MV 0x03 + +#define BD71815_BUCK_PWM_FIXED BIT(4) +#define BD71815_BUCK_SNVS_ON BIT(3) +#define BD71815_BUCK_RUN_ON BIT(2) +#define BD71815_BUCK_LPSR_ON BIT(1) +#define BD71815_BUCK_SUSP_ON BIT(0) + +/* BD71815_REG_BUCK1_VOLT_H bits */ +#define BD71815_BUCK_DVSSEL BIT(7) +#define BD71815_BUCK_STBY_DVS BIT(6) +#define BD71815_VOLT_MASK 0x3F +#define BD71815_BUCK1_H_DEFAULT 0x14 +#define BD71815_BUCK1_L_DEFAULT 0x14 + +/* BD71815_REG_BUCK2_VOLT_H bits */ +#define BD71815_BUCK2_H_DEFAULT 0x14 +#define BD71815_BUCK2_L_DEFAULT 0x14 + +/* WLED output */ +/* current register mask */ +#define LED_DIMM_MASK 0x3f +/* LED enable bits at LED_CTRL reg */ +#define LED_CHGDONE_EN BIT(4) +#define LED_RUN_ON BIT(2) +#define LED_LPSR_ON BIT(1) +#define LED_SUSP_ON BIT(0) + +/* BD71815_REG_LDO1_CTRL bits */ +#define LDO1_EN BIT(0) +#define LDO2_EN BIT(1) +#define LDO3_EN BIT(2) +#define DVREF_EN BIT(3) +#define VOSNVS_SW_EN BIT(4) + +/* LDO_MODE1_register */ +#define LDO1_SNVS_ON BIT(7) +#define LDO1_RUN_ON BIT(6) +#define LDO1_LPSR_ON BIT(5) +#define LDO1_SUSP_ON BIT(4) +/* set => register control, unset => GPIO control */ +#define LDO4_MODE_MASK BIT(3) +#define LDO4_MODE_I2C BIT(3) +#define LDO4_MODE_GPIO 0 +/* set => register control, unset => start when DCIN connected */ +#define LDO3_MODE_MASK BIT(2) +#define LDO3_MODE_I2C BIT(2) +#define LDO3_MODE_DCIN 0 + +/* LDO_MODE2 register */ +#define LDO3_SNVS_ON BIT(7) +#define LDO3_RUN_ON BIT(6) +#define LDO3_LPSR_ON BIT(5) +#define LDO3_SUSP_ON BIT(4) +#define LDO2_SNVS_ON BIT(3) +#define LDO2_RUN_ON BIT(2) +#define LDO2_LPSR_ON BIT(1) +#define LDO2_SUSP_ON BIT(0) + + +/* LDO_MODE3 register */ +#define LDO5_SNVS_ON BIT(7) +#define LDO5_RUN_ON BIT(6) +#define LDO5_LPSR_ON BIT(5) +#define LDO5_SUSP_ON BIT(4) +#define LDO4_SNVS_ON BIT(3) +#define LDO4_RUN_ON BIT(2) +#define LDO4_LPSR_ON BIT(1) +#define LDO4_SUSP_ON BIT(0) + +/* LDO_MODE4 register */ +#define DVREF_SNVS_ON BIT(7) +#define DVREF_RUN_ON BIT(6) +#define DVREF_LPSR_ON BIT(5) +#define DVREF_SUSP_ON BIT(4) +#define LDO_LPSR_SNVS_ON BIT(3) +#define LDO_LPSR_RUN_ON BIT(2) +#define LDO_LPSR_LPSR_ON BIT(1) +#define LDO_LPSR_SUSP_ON BIT(0) + +/* BD71815_REG_OUT32K bits */ +#define OUT32K_EN BIT(0) +#define OUT32K_MODE BIT(1) +#define OUT32K_MODE_CMOS BIT(1) +#define OUT32K_MODE_OPEN_DRAIN 0 + +/* BD71815_REG_BAT_STAT bits */ +#define BAT_DET BIT(5) +#define BAT_DET_OFFSET 5 +#define BAT_DET_DONE BIT(4) +#define VBAT_OV BIT(3) +#define DBAT_DET BIT(0) + +/* BD71815_REG_VBUS_STAT bits */ +#define VBUS_DET BIT(0) + +#define BD71815_REG_RTC_START BD71815_REG_SEC +#define BD71815_REG_RTC_ALM_START BD71815_REG_ALM0_SEC + +/* BD71815_REG_ALM0_MASK bits */ +#define A0_ONESEC BIT(7) + +/* BD71815_REG_INT_EN_00 bits */ +#define ALMALE BIT(0) + +/* BD71815_REG_INT_STAT_03 bits */ +#define DCIN_MON_DET BIT(1) +#define DCIN_MON_RES BIT(0) +#define POWERON_LONG BIT(2) +#define POWERON_MID BIT(3) +#define POWERON_SHORT BIT(4) +#define POWERON_PRESS BIT(5) + +/* BD71805_REG_INT_STAT_08 bits */ +#define VBAT_MON_DET BIT(1) +#define VBAT_MON_RES BIT(0) + +/* BD71805_REG_INT_STAT_11 bits */ +#define INT_STAT_11_VF_DET BIT(7) +#define INT_STAT_11_VF_RES BIT(6) +#define INT_STAT_11_VF125_DET BIT(5) +#define INT_STAT_11_VF125_RES BIT(4) +#define INT_STAT_11_OVTMP_DET BIT(3) +#define INT_STAT_11_OVTMP_RES BIT(2) +#define INT_STAT_11_LOTMP_DET BIT(1) +#define INT_STAT_11_LOTMP_RES BIT(0) + +#define VBAT_MON_DET BIT(1) +#define VBAT_MON_RES BIT(0) + +/* BD71815_REG_PWRCTRL bits */ +#define RESTARTEN BIT(0) + +/* BD71815_REG_GPO bits */ +#define READY_FORCE_LOW BIT(2) +#define BD71815_GPIO_DRIVE_MASK BIT(4) +#define BD71815_GPIO_OPEN_DRAIN 0 +#define BD71815_GPIO_CMOS BIT(4) + +/* BD71815 interrupt masks */ +enum { + BD71815_INT_EN_01_BUCKAST_MASK = 0x0F, + BD71815_INT_EN_02_DCINAST_MASK = 0x3E, + BD71815_INT_EN_03_DCINAST_MASK = 0x3F, + BD71815_INT_EN_04_VSYSAST_MASK = 0xCF, + BD71815_INT_EN_05_CHGAST_MASK = 0xFC, + BD71815_INT_EN_06_BATAST_MASK = 0xF3, + BD71815_INT_EN_07_BMONAST_MASK = 0xFE, + BD71815_INT_EN_08_BMONAST_MASK = 0x03, + BD71815_INT_EN_09_BMONAST_MASK = 0x07, + BD71815_INT_EN_10_BMONAST_MASK = 0x3F, + BD71815_INT_EN_11_TMPAST_MASK = 0xFF, + BD71815_INT_EN_12_ALMAST_MASK = 0x07, +}; +/* BD71815 interrupt irqs */ +enum { + /* BUCK reg interrupts */ + BD71815_INT_BUCK1_OCP, + BD71815_INT_BUCK2_OCP, + BD71815_INT_BUCK3_OCP, + BD71815_INT_BUCK4_OCP, + BD71815_INT_BUCK5_OCP, + BD71815_INT_LED_OVP, + BD71815_INT_LED_OCP, + BD71815_INT_LED_SCP, + /* DCIN1 interrupts */ + BD71815_INT_DCIN_RMV, + BD71815_INT_CLPS_OUT, + BD71815_INT_CLPS_IN, + BD71815_INT_DCIN_OVP_RES, + BD71815_INT_DCIN_OVP_DET, + /* DCIN2 interrupts */ + BD71815_INT_DCIN_MON_RES, + BD71815_INT_DCIN_MON_DET, + BD71815_INT_WDOG, + /* Vsys INT_STAT_04 */ + BD71815_INT_VSYS_UV_RES, + BD71815_INT_VSYS_UV_DET, + BD71815_INT_VSYS_LOW_RES, + BD71815_INT_VSYS_LOW_DET, + BD71815_INT_VSYS_MON_RES, + BD71815_INT_VSYS_MON_DET, + /* Charger INT_STAT_05 */ + BD71815_INT_CHG_WDG_TEMP, + BD71815_INT_CHG_WDG_TIME, + BD71815_INT_CHG_RECHARGE_RES, + BD71815_INT_CHG_RECHARGE_DET, + BD71815_INT_CHG_RANGED_TEMP_TRANSITION, + BD71815_INT_CHG_STATE_TRANSITION, + /* Battery INT_STAT_06 */ + BD71815_INT_BAT_TEMP_NORMAL, + BD71815_INT_BAT_TEMP_ERANGE, + BD71815_INT_BAT_REMOVED, + BD71815_INT_BAT_DETECTED, + BD71815_INT_THERM_REMOVED, + BD71815_INT_THERM_DETECTED, + /* Battery Mon 1 INT_STAT_07 */ + BD71815_INT_BAT_DEAD, + BD71815_INT_BAT_SHORTC_RES, + BD71815_INT_BAT_SHORTC_DET, + BD71815_INT_BAT_LOW_VOLT_RES, + BD71815_INT_BAT_LOW_VOLT_DET, + BD71815_INT_BAT_OVER_VOLT_RES, + BD71815_INT_BAT_OVER_VOLT_DET, + /* Battery Mon 2 INT_STAT_08 */ + BD71815_INT_BAT_MON_RES, + BD71815_INT_BAT_MON_DET, + /* Battery Mon 3 (Coulomb counter) INT_STAT_09 */ + BD71815_INT_BAT_CC_MON1, + BD71815_INT_BAT_CC_MON2, + BD71815_INT_BAT_CC_MON3, + /* Battery Mon 4 INT_STAT_10 */ + BD71815_INT_BAT_OVER_CURR_1_RES, + BD71815_INT_BAT_OVER_CURR_1_DET, + BD71815_INT_BAT_OVER_CURR_2_RES, + BD71815_INT_BAT_OVER_CURR_2_DET, + BD71815_INT_BAT_OVER_CURR_3_RES, + BD71815_INT_BAT_OVER_CURR_3_DET, + /* Temperature INT_STAT_11 */ + BD71815_INT_TEMP_BAT_LOW_RES, + BD71815_INT_TEMP_BAT_LOW_DET, + BD71815_INT_TEMP_BAT_HI_RES, + BD71815_INT_TEMP_BAT_HI_DET, + BD71815_INT_TEMP_CHIP_OVER_125_RES, + BD71815_INT_TEMP_CHIP_OVER_125_DET, + BD71815_INT_TEMP_CHIP_OVER_VF_RES, + BD71815_INT_TEMP_CHIP_OVER_VF_DET, + /* RTC Alarm INT_STAT_12 */ + BD71815_INT_RTC0, + BD71815_INT_RTC1, + BD71815_INT_RTC2, +}; + +#define BD71815_INT_BUCK1_OCP_MASK BIT(0) +#define BD71815_INT_BUCK2_OCP_MASK BIT(1) +#define BD71815_INT_BUCK3_OCP_MASK BIT(2) +#define BD71815_INT_BUCK4_OCP_MASK BIT(3) +#define BD71815_INT_BUCK5_OCP_MASK BIT(4) +#define BD71815_INT_LED_OVP_MASK BIT(5) +#define BD71815_INT_LED_OCP_MASK BIT(6) +#define BD71815_INT_LED_SCP_MASK BIT(7) + +#define BD71815_INT_DCIN_RMV_MASK BIT(1) +#define BD71815_INT_CLPS_OUT_MASK BIT(2) +#define BD71815_INT_CLPS_IN_MASK BIT(3) +#define BD71815_INT_DCIN_OVP_RES_MASK BIT(4) +#define BD71815_INT_DCIN_OVP_DET_MASK BIT(5) + +#define BD71815_INT_DCIN_MON_RES_MASK BIT(0) +#define BD71815_INT_DCIN_MON_DET_MASK BIT(1) +#define BD71815_INT_WDOG_MASK BIT(6) + +#define BD71815_INT_VSYS_UV_RES_MASK BIT(0) +#define BD71815_INT_VSYS_UV_DET_MASK BIT(1) +#define BD71815_INT_VSYS_LOW_RES_MASK BIT(2) +#define BD71815_INT_VSYS_LOW_DET_MASK BIT(3) +#define BD71815_INT_VSYS_MON_RES_MASK BIT(6) +#define BD71815_INT_VSYS_MON_DET_MASK BIT(7) + +#define BD71815_INT_CHG_WDG_TEMP_MASK BIT(2) +#define BD71815_INT_CHG_WDG_TIME_MASK BIT(3) +#define BD71815_INT_CHG_RECHARGE_RES_MASK BIT(4) +#define BD71815_INT_CHG_RECHARGE_DET_MASK BIT(5) +#define BD71815_INT_CHG_RANGED_TEMP_TRANSITION_MASK BIT(6) +#define BD71815_INT_CHG_STATE_TRANSITION_MASK BIT(7) + +#define BD71815_INT_BAT_TEMP_NORMAL_MASK BIT(0) +#define BD71815_INT_BAT_TEMP_ERANGE_MASK BIT(1) +#define BD71815_INT_BAT_REMOVED_MASK BIT(4) +#define BD71815_INT_BAT_DETECTED_MASK BIT(5) +#define BD71815_INT_THERM_REMOVED_MASK BIT(6) +#define BD71815_INT_THERM_DETECTED_MASK BIT(7) + +#define BD71815_INT_BAT_DEAD_MASK BIT(1) +#define BD71815_INT_BAT_SHORTC_RES_MASK BIT(2) +#define BD71815_INT_BAT_SHORTC_DET_MASK BIT(3) +#define BD71815_INT_BAT_LOW_VOLT_RES_MASK BIT(4) +#define BD71815_INT_BAT_LOW_VOLT_DET_MASK BIT(5) +#define BD71815_INT_BAT_OVER_VOLT_RES_MASK BIT(6) +#define BD71815_INT_BAT_OVER_VOLT_DET_MASK BIT(7) + +#define BD71815_INT_BAT_MON_RES_MASK BIT(0) +#define BD71815_INT_BAT_MON_DET_MASK BIT(1) + +#define BD71815_INT_BAT_CC_MON1_MASK BIT(0) +#define BD71815_INT_BAT_CC_MON2_MASK BIT(1) +#define BD71815_INT_BAT_CC_MON3_MASK BIT(2) + +#define BD71815_INT_BAT_OVER_CURR_1_RES_MASK BIT(0) +#define BD71815_INT_BAT_OVER_CURR_1_DET_MASK BIT(1) +#define BD71815_INT_BAT_OVER_CURR_2_RES_MASK BIT(2) +#define BD71815_INT_BAT_OVER_CURR_2_DET_MASK BIT(3) +#define BD71815_INT_BAT_OVER_CURR_3_RES_MASK BIT(4) +#define BD71815_INT_BAT_OVER_CURR_3_DET_MASK BIT(5) + +#define BD71815_INT_TEMP_BAT_LOW_RES_MASK BIT(0) +#define BD71815_INT_TEMP_BAT_LOW_DET_MASK BIT(1) +#define BD71815_INT_TEMP_BAT_HI_RES_MASK BIT(2) +#define BD71815_INT_TEMP_BAT_HI_DET_MASK BIT(3) +#define BD71815_INT_TEMP_CHIP_OVER_125_RES_MASK BIT(4) +#define BD71815_INT_TEMP_CHIP_OVER_125_DET_MASK BIT(5) +#define BD71815_INT_TEMP_CHIP_OVER_VF_RES_MASK BIT(6) +#define BD71815_INT_TEMP_CHIP_OVER_VF_DET_MASK BIT(7) + +#define BD71815_INT_RTC0_MASK BIT(0) +#define BD71815_INT_RTC1_MASK BIT(1) +#define BD71815_INT_RTC2_MASK BIT(2) + +/* BD71815_REG_CC_CTRL bits */ +#define CCNTRST 0x80 +#define CCNTENB 0x40 +#define CCCALIB 0x20 + +/* BD71815_REG_CC_CURCD */ +#define CURDIR_Discharging 0x8000 + +/* BD71815_REG_VM_SA_IBAT */ +#define IBAT_SA_DIR_Discharging 0x8000 + +/* BD71815_REG_REX_CTRL_1 bits */ +#define REX_CLR BIT(4) + +/* BD71815_REG_REX_CTRL_1 bits */ +#define REX_PMU_STATE_MASK BIT(2) + +/* BD71815_REG_LED_CTRL bits */ +#define CHGDONE_LED_EN BIT(4) + +#endif /* __LINUX_MFD_BD71815_H */ diff --git a/include/linux/mfd/rohm-bd71828.h b/include/linux/mfd/rohm-bd71828.h index 017a4c01cb31..c7ab69c87ee8 100644 --- a/include/linux/mfd/rohm-bd71828.h +++ b/include/linux/mfd/rohm-bd71828.h @@ -151,6 +151,9 @@ enum { #define BD71828_REG_GPIO_CTRL3 0x49 #define BD71828_REG_IO_STAT 0xed +/* clk */ +#define BD71828_REG_OUT32K 0x4b + /* RTC */ #define BD71828_REG_RTC_SEC 0x4c #define BD71828_REG_RTC_MINUTE 0x4d -- cgit v1.2.3 From 80a71170646df80914a7290a197aca1e6116a49d Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 5 Apr 2021 14:43:44 +0300 Subject: regulator: rohm-regulator: Support SNVS HW state. The ROHM BD71815 supports setting voltage levels/regulator status for HW-states "RUN", "SUSPEND", "LPSR" and "SNVS". Add DT parsing helper also for SNVS state. Signed-off-by: Matti Vaittinen Acked-by: Mark Brown Signed-off-by: Lee Jones --- include/linux/mfd/rohm-generic.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h index 9e2880e06950..a9144284cf6d 100644 --- a/include/linux/mfd/rohm-generic.h +++ b/include/linux/mfd/rohm-generic.h @@ -27,7 +27,8 @@ struct rohm_regmap_dev { #define ROHM_DVS_LEVEL_IDLE BIT(1) #define ROHM_DVS_LEVEL_SUSPEND BIT(2) #define ROHM_DVS_LEVEL_LPSR BIT(3) -#define ROHM_DVS_LEVEL_VALID_AMOUNT 4 +#define ROHM_DVS_LEVEL_SNVS BIT(4) +#define ROHM_DVS_LEVEL_VALID_AMOUNT 5 #define ROHM_DVS_LEVEL_UNKNOWN 0 /** @@ -66,6 +67,9 @@ struct rohm_dvs_config { unsigned int lpsr_reg; unsigned int lpsr_mask; unsigned int lpsr_on_mask; + unsigned int snvs_reg; + unsigned int snvs_mask; + unsigned int snvs_on_mask; }; #if IS_ENABLED(CONFIG_REGULATOR_ROHM) -- cgit v1.2.3 From b07067627cd5f1f6dc60c224b47c728f7f4b7b45 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 27 Jan 2021 11:06:13 -0800 Subject: lib: Add ASN.1 encoder We have a need in the TPM2 trusted keys to return the ASN.1 form of the TPM key blob so it can be operated on by tools outside of the kernel. The specific tools are the openssl_tpm2_engine, openconnect and the Intel tpm2-tss-engine. To do that, we have to be able to read and write the same binary key format the tools use. The current ASN.1 decoder does fine for reading, but we need pieces of an ASN.1 encoder to write the key blob in binary compatible form. For backwards compatibility, the trusted key reader code will still accept the two TPM2B quantities that it uses today, but the writer will only output the ASN.1 form. The current implementation only encodes the ASN.1 bits we actually need. Signed-off-by: James Bottomley Reviewed-by: David Howells Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/linux/asn1_encoder.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/linux/asn1_encoder.h (limited to 'include/linux') diff --git a/include/linux/asn1_encoder.h b/include/linux/asn1_encoder.h new file mode 100644 index 000000000000..08cd0c2ad34f --- /dev/null +++ b/include/linux/asn1_encoder.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _LINUX_ASN1_ENCODER_H +#define _LINUX_ASN1_ENCODER_H + +#include +#include +#include +#include + +#define asn1_oid_len(oid) (sizeof(oid)/sizeof(u32)) +unsigned char * +asn1_encode_integer(unsigned char *data, const unsigned char *end_data, + s64 integer); +unsigned char * +asn1_encode_oid(unsigned char *data, const unsigned char *end_data, + u32 oid[], int oid_len); +unsigned char * +asn1_encode_tag(unsigned char *data, const unsigned char *end_data, + u32 tag, const unsigned char *string, int len); +unsigned char * +asn1_encode_octet_string(unsigned char *data, + const unsigned char *end_data, + const unsigned char *string, u32 len); +unsigned char * +asn1_encode_sequence(unsigned char *data, const unsigned char *end_data, + const unsigned char *seq, int len); +unsigned char * +asn1_encode_boolean(unsigned char *data, const unsigned char *end_data, + bool val); + +#endif -- cgit v1.2.3 From 1c6476e9741e30be57e0b370d4405214f055607c Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 27 Jan 2021 11:06:14 -0800 Subject: oid_registry: Add TCG defined OIDS for TPM keys The TCG has defined an OID prefix "2.23.133.10.1" for the various TPM key uses. We've defined three of the available numbers: 2.23.133.10.1.3 TPM Loadable key. This is an asymmetric key (Usually RSA2048 or Elliptic Curve) which can be imported by a TPM2_Load() operation. 2.23.133.10.1.4 TPM Importable Key. This is an asymmetric key (Usually RSA2048 or Elliptic Curve) which can be imported by a TPM2_Import() operation. Both loadable and importable keys are specific to a given TPM, the difference is that a loadable key is wrapped with the symmetric secret, so must have been created by the TPM itself. An importable key is wrapped with a DH shared secret, and may be created without access to the TPM provided you know the public part of the parent key. 2.23.133.10.1.5 TPM Sealed Data. This is a set of data (up to 128 bytes) which is sealed by the TPM. It usually represents a symmetric key and must be unsealed before use. The ASN.1 binary key form starts of with this OID as the first element of a sequence, giving the binary form a unique recognizable identity marker regardless of encoding. Signed-off-by: James Bottomley Reviewed-by: David Howells Acked-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/linux/oid_registry.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 4462ed2c18cd..d06988d1565e 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -113,6 +113,11 @@ enum OID { OID_SM2_with_SM3, /* 1.2.156.10197.1.501 */ OID_sm3WithRSAEncryption, /* 1.2.156.10197.1.504 */ + /* TCG defined OIDS for TPM based keys */ + OID_TPMLoadableKey, /* 2.23.133.10.1.3 */ + OID_TPMImportableKey, /* 2.23.133.10.1.4 */ + OID_TPMSealedData, /* 2.23.133.10.1.5 */ + OID__NR }; -- cgit v1.2.3 From e5fb5d2c5a03e229ded1f45aa2a42f2c288689c7 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 27 Jan 2021 11:06:17 -0800 Subject: security: keys: trusted: Make sealed key properly interoperable The current implementation appends a migratable flag to the end of a key, meaning the format isn't exactly interoperable because the using party needs to know to strip this extra byte. However, all other consumers of TPM sealed blobs expect the unseal to return exactly the key. Since TPM2 keys have a key property flag that corresponds to migratable, use that flag instead and make the actual key the only sealed quantity. This is secure because the key properties are bound to a hash in the private part, so if they're altered the key won't load. Backwards compatibility is implemented by detecting whether we're loading a new format key or not and correctly setting migratable from the last byte of old format keys. Signed-off-by: James Bottomley Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 543aa3b1dedc..aa11fe323c56 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -305,6 +305,8 @@ struct tpm_buf { }; enum tpm2_object_attributes { + TPM2_OA_FIXED_TPM = BIT(1), + TPM2_OA_FIXED_PARENT = BIT(4), TPM2_OA_USER_WITH_AUTH = BIT(6), }; -- cgit v1.2.3 From ce62b114bbad9346641d16853c528ba01513e1b0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Mar 2018 15:01:18 -0500 Subject: NFS: Split attribute support out from the server capabilities There are lots of attributes, and they are crowding out the bit space. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index fbcdfd9f7a7f..d28d7a62864f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -191,6 +191,8 @@ struct nfs_server { dev_t s_dev; /* superblock dev numbers */ struct nfs_auth_info auth_info; /* parsed auth flavors */ + __u64 fattr_valid; /* Valid attributes */ + #ifdef CONFIG_NFS_FSCACHE struct nfs_fscache_key *fscache_key; /* unique key for superblock */ struct fscache_cookie *fscache; /* superblock cookie */ @@ -267,16 +269,7 @@ struct nfs_server { #define NFS_CAP_SYMLINKS (1U << 2) #define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ATOMIC_OPEN (1U << 4) -/* #define NFS_CAP_CHANGE_ATTR (1U << 5) */ #define NFS_CAP_LGOPEN (1U << 5) -#define NFS_CAP_FILEID (1U << 6) -#define NFS_CAP_MODE (1U << 7) -#define NFS_CAP_NLINK (1U << 8) -#define NFS_CAP_OWNER (1U << 9) -#define NFS_CAP_OWNER_GROUP (1U << 10) -#define NFS_CAP_ATIME (1U << 11) -#define NFS_CAP_CTIME (1U << 12) -#define NFS_CAP_MTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) #define NFS_CAP_STATEID_NFSV41 (1U << 16) -- cgit v1.2.3 From 123086843372bc93d26f52edfb71dbf951cd2f17 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 13 Apr 2021 19:39:58 -0700 Subject: usb: typec: tcpm: Honour pSnkStdby requirement during negotiation >From PD Spec: The Sink Shall transition to Sink Standby before a positive or negative voltage transition of VBUS. During Sink Standby the Sink Shall reduce its power draw to pSnkStdby. This allows the Source to manage the voltage transition as well as supply sufficient operating current to the Sink to maintain PD operation during the transition. The Sink Shall complete this transition to Sink Standby within tSnkStdby after evaluating the Accept Message from the Source. The transition when returning to Sink operation from Sink Standby Shall be completed within tSnkNewPower. The pSnkStdby requirement Shall only apply if the Sink power draw is higher than this level. The above requirement needs to be met to prevent hard resets from port partner. Without the patch: (5V/3A during SNK_DISCOVERY all the way through explicit contract) [ 95.711984] CC1: 0 -> 0, CC2: 0 -> 5 [state TOGGLING, polarity 0, connected] [ 95.712007] state change TOGGLING -> SNK_ATTACH_WAIT [rev3 NONE_AMS] [ 95.712017] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev3 NONE_AMS] [ 95.837190] VBUS on [ 95.882075] state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED [delayed 170 ms] [ 95.882082] state change SNK_DEBOUNCED -> SNK_ATTACHED [rev3 NONE_AMS] [ 95.882086] polarity 1 [ 95.883151] set_auto_vbus_discharge_threshold mode:0 pps_active:n vbus:5000 ret:0 [ 95.883441] enable vbus discharge ret:0 [ 95.883445] Requesting mux state 1, usb-role 2, orientation 2 [ 95.883776] state change SNK_ATTACHED -> SNK_STARTUP [rev3 NONE_AMS] [ 95.883879] pending state change SNK_STARTUP -> SNK_DISCOVERY @ 500 ms [rev3 NONE_AMS] [ 96.038960] VBUS on [ 96.383939] state change SNK_STARTUP -> SNK_DISCOVERY [delayed 500 ms] [ 96.383946] Setting voltage/current limit 5000 mV 3000 mA [ 96.383961] vbus=0 charge:=1 [ 96.386044] state change SNK_DISCOVERY -> SNK_WAIT_CAPABILITIES [rev3 NONE_AMS] [ 96.386309] pending state change SNK_WAIT_CAPABILITIES -> HARD_RESET_SEND @ 450 ms [rev3 NONE_AMS] [ 96.394404] PD RX, header: 0x2161 [1] [ 96.394408] PDO 0: type 0, 5000 mV, 3000 mA [E] [ 96.394410] PDO 1: type 0, 9000 mV, 2000 mA [] [ 96.394412] state change SNK_WAIT_CAPABILITIES -> SNK_NEGOTIATE_CAPABILITIES [rev2 POWER_NEGOTIATION] [ 96.394416] Setting usb_comm capable false [ 96.395083] cc=0 cc1=0 cc2=5 vbus=0 vconn=sink polarity=1 [ 96.395089] Requesting PDO 1: 9000 mV, 2000 mA [ 96.395093] PD TX, header: 0x1042 [ 96.397404] PD TX complete, status: 0 [ 96.397424] pending state change SNK_NEGOTIATE_CAPABILITIES -> HARD_RESET_SEND @ 60 ms [rev2 POWER_NEGOTIATION] [ 96.400826] PD RX, header: 0x363 [1] [ 96.400829] state change SNK_NEGOTIATE_CAPABILITIES -> SNK_TRANSITION_SINK [rev2 POWER_NEGOTIATION] [ 96.400832] pending state change SNK_TRANSITION_SINK -> HARD_RESET_SEND @ 500 ms [rev2 POWER_NEGOTIATION] [ 96.577315] PD RX, header: 0x566 [1] [ 96.577321] Setting voltage/current limit 9000 mV 2000 mA [ 96.578363] set_auto_vbus_discharge_threshold mode:3 pps_active:n vbus:9000 ret:0 [ 96.578370] state change SNK_TRANSITION_SINK -> SNK_READY [rev2 POWER_NEGOTIATION] With the patch: [ 168.398573] CC1: 0 -> 0, CC2: 0 -> 5 [state TOGGLING, polarity 0, connected] [ 168.398605] state change TOGGLING -> SNK_ATTACH_WAIT [rev3 NONE_AMS] [ 168.398619] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev3 NONE_AMS] [ 168.522348] VBUS on [ 168.568676] state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED [delayed 170 ms] [ 168.568684] state change SNK_DEBOUNCED -> SNK_ATTACHED [rev3 NONE_AMS] [ 168.568688] polarity 1 [ 168.569867] set_auto_vbus_discharge_threshold mode:0 pps_active:n vbus:5000 ret:0 [ 168.570158] enable vbus discharge ret:0 [ 168.570161] Requesting mux state 1, usb-role 2, orientation 2 [ 168.570504] state change SNK_ATTACHED -> SNK_STARTUP [rev3 NONE_AMS] [ 168.570634] pending state change SNK_STARTUP -> SNK_DISCOVERY @ 500 ms [rev3 NONE_AMS] [ 169.070689] state change SNK_STARTUP -> SNK_DISCOVERY [delayed 500 ms] [ 169.070695] Setting voltage/current limit 5000 mV 3000 mA [ 169.070702] vbus=0 charge:=1 [ 169.072719] state change SNK_DISCOVERY -> SNK_WAIT_CAPABILITIES [rev3 NONE_AMS] [ 169.073145] pending state change SNK_WAIT_CAPABILITIES -> HARD_RESET_SEND @ 450 ms [rev3 NONE_AMS] [ 169.077162] PD RX, header: 0x2161 [1] [ 169.077172] PDO 0: type 0, 5000 mV, 3000 mA [E] [ 169.077178] PDO 1: type 0, 9000 mV, 2000 mA [] [ 169.077183] state change SNK_WAIT_CAPABILITIES -> SNK_NEGOTIATE_CAPABILITIES [rev2 POWER_NEGOTIATION] [ 169.077191] Setting usb_comm capable false [ 169.077753] cc=0 cc1=0 cc2=5 vbus=0 vconn=sink polarity=1 [ 169.077759] Requesting PDO 1: 9000 mV, 2000 mA [ 169.077762] PD TX, header: 0x1042 [ 169.079990] PD TX complete, status: 0 [ 169.080013] pending state change SNK_NEGOTIATE_CAPABILITIES -> HARD_RESET_SEND @ 60 ms [rev2 POWER_NEGOTIATION] [ 169.083183] VBUS on [ 169.084195] PD RX, header: 0x363 [1] [ 169.084200] state change SNK_NEGOTIATE_CAPABILITIES -> SNK_TRANSITION_SINK [rev2 POWER_NEGOTIATION] [ 169.084206] Setting standby current 5000 mV @ 500 mA [ 169.084209] Setting voltage/current limit 5000 mV 500 mA [ 169.084220] pending state change SNK_TRANSITION_SINK -> HARD_RESET_SEND @ 500 ms [rev2 POWER_NEGOTIATION] [ 169.260222] PD RX, header: 0x566 [1] [ 169.260227] Setting voltage/current limit 9000 mV 2000 mA [ 169.261315] set_auto_vbus_discharge_threshold mode:3 pps_active:n vbus:9000 ret:0 [ 169.261321] state change SNK_TRANSITION_SINK -> SNK_READY [rev2 POWER_NEGOTIATION] [ 169.261570] AMS POWER_NEGOTIATION finished Fixes: f0690a25a140b ("staging: typec: USB Type-C Port Manager (tcpm)") Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Signed-off-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20210414024000.4175263-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index 70d681918d01..bf00259493e0 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -493,4 +493,6 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_N_CAPS_COUNT (PD_T_NO_RESPONSE / PD_T_SEND_SOURCE_CAP) #define PD_N_HARD_RESET_COUNT 2 +#define PD_P_SNK_STDBY_MW 2500 /* 2500 mW */ + #endif /* __LINUX_USB_PD_H */ -- cgit v1.2.3 From 1c7c1488898e1b3fd15d38f373a8c947a2d9b1e3 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 10 Feb 2021 15:07:36 +0800 Subject: mfd: dbx500-prcmu: Use true and false for bool variable Fix the following coccicheck warning: ./include/linux/mfd/db8500-prcmu.h:723:8-9: WARNING: return of 0/1 in function 'db8500_prcmu_is_ac_wake_requested' with return type bool. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Lee Jones --- include/linux/mfd/db8500-prcmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/db8500-prcmu.h b/include/linux/mfd/db8500-prcmu.h index 4b63d3ecdcff..a62de3d155ed 100644 --- a/include/linux/mfd/db8500-prcmu.h +++ b/include/linux/mfd/db8500-prcmu.h @@ -720,7 +720,7 @@ static inline int db8500_prcmu_load_a9wdog(u8 id, u32 val) static inline bool db8500_prcmu_is_ac_wake_requested(void) { - return 0; + return false; } static inline int db8500_prcmu_set_arm_opp(u8 opp) -- cgit v1.2.3 From 911490a5ca2a3289ac734ca7c127c7dbc72466cc Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Fri, 19 Feb 2021 23:39:09 +0100 Subject: mfd: lp87565: Remove unused define 'LP87565_NUM_BUCK' This define appears incorrect, but it is completely unused so it can be removed. Signed-off-by: Luca Ceresoli Signed-off-by: Lee Jones --- include/linux/mfd/lp87565.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/lp87565.h b/include/linux/mfd/lp87565.h index d44ddfb6bb63..5640e6088fe6 100644 --- a/include/linux/mfd/lp87565.h +++ b/include/linux/mfd/lp87565.h @@ -237,9 +237,6 @@ enum lp87565_device_type { #define LP87565_GOIO2_OUT BIT(1) #define LP87565_GOIO1_OUT BIT(0) -/* Number of step-down converters available */ -#define LP87565_NUM_BUCK 6 - enum LP87565_regulator_id { /* BUCK's */ LP87565_BUCK_0, -- cgit v1.2.3 From 16f961544bfd7170f75d805d7585e09023671dbc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 7 Mar 2021 21:23:25 +0100 Subject: mfd: Remove support for AB3100 The ST-Ericsson U300 platform has been removed, so this driver is no longer needed. Signed-off-by: Arnd Bergmann Reviewed-by: Linus Walleij Signed-off-by: Lee Jones --- include/linux/mfd/ab3100.h | 128 --------------------------------------------- 1 file changed, 128 deletions(-) delete mode 100644 include/linux/mfd/ab3100.h (limited to 'include/linux') diff --git a/include/linux/mfd/ab3100.h b/include/linux/mfd/ab3100.h deleted file mode 100644 index a881d8495186..000000000000 --- a/include/linux/mfd/ab3100.h +++ /dev/null @@ -1,128 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2007-2009 ST-Ericsson AB - * AB3100 core access functions - * Author: Linus Walleij - */ - -#include - -struct device; - -#ifndef MFD_AB3100_H -#define MFD_AB3100_H - - -#define AB3100_P1A 0xc0 -#define AB3100_P1B 0xc1 -#define AB3100_P1C 0xc2 -#define AB3100_P1D 0xc3 -#define AB3100_P1E 0xc4 -#define AB3100_P1F 0xc5 -#define AB3100_P1G 0xc6 -#define AB3100_R2A 0xc7 -#define AB3100_R2B 0xc8 - -/* - * AB3100, EVENTA1, A2 and A3 event register flags - * these are catenated into a single 32-bit flag in the code - * for event notification broadcasts. - */ -#define AB3100_EVENTA1_ONSWA (0x01<<16) -#define AB3100_EVENTA1_ONSWB (0x02<<16) -#define AB3100_EVENTA1_ONSWC (0x04<<16) -#define AB3100_EVENTA1_DCIO (0x08<<16) -#define AB3100_EVENTA1_OVER_TEMP (0x10<<16) -#define AB3100_EVENTA1_SIM_OFF (0x20<<16) -#define AB3100_EVENTA1_VBUS (0x40<<16) -#define AB3100_EVENTA1_VSET_USB (0x80<<16) - -#define AB3100_EVENTA2_READY_TX (0x01<<8) -#define AB3100_EVENTA2_READY_RX (0x02<<8) -#define AB3100_EVENTA2_OVERRUN_ERROR (0x04<<8) -#define AB3100_EVENTA2_FRAMING_ERROR (0x08<<8) -#define AB3100_EVENTA2_CHARG_OVERCURRENT (0x10<<8) -#define AB3100_EVENTA2_MIDR (0x20<<8) -#define AB3100_EVENTA2_BATTERY_REM (0x40<<8) -#define AB3100_EVENTA2_ALARM (0x80<<8) - -#define AB3100_EVENTA3_ADC_TRIG5 (0x01) -#define AB3100_EVENTA3_ADC_TRIG4 (0x02) -#define AB3100_EVENTA3_ADC_TRIG3 (0x04) -#define AB3100_EVENTA3_ADC_TRIG2 (0x08) -#define AB3100_EVENTA3_ADC_TRIGVBAT (0x10) -#define AB3100_EVENTA3_ADC_TRIGVTX (0x20) -#define AB3100_EVENTA3_ADC_TRIG1 (0x40) -#define AB3100_EVENTA3_ADC_TRIG0 (0x80) - -/* AB3100, STR register flags */ -#define AB3100_STR_ONSWA (0x01) -#define AB3100_STR_ONSWB (0x02) -#define AB3100_STR_ONSWC (0x04) -#define AB3100_STR_DCIO (0x08) -#define AB3100_STR_BOOT_MODE (0x10) -#define AB3100_STR_SIM_OFF (0x20) -#define AB3100_STR_BATT_REMOVAL (0x40) -#define AB3100_STR_VBUS (0x80) - -/* - * AB3100 contains 8 regulators, one external regulator controller - * and a buck converter, further the LDO E and buck converter can - * have separate settings if they are in sleep mode, this is - * modeled as a separate regulator. - */ -#define AB3100_NUM_REGULATORS 10 - -/** - * struct ab3100 - * @access_mutex: lock out concurrent accesses to the AB3100 registers - * @dev: pointer to the containing device - * @i2c_client: I2C client for this chip - * @testreg_client: secondary client for test registers - * @chip_name: name of this chip variant - * @chip_id: 8 bit chip ID for this chip variant - * @event_subscribers: event subscribers are listed here - * @startup_events: a copy of the first reading of the event registers - * @startup_events_read: whether the first events have been read - * - * This struct is PRIVATE and devices using it should NOT - * access ANY fields. It is used as a token for calling the - * AB3100 functions. - */ -struct ab3100 { - struct mutex access_mutex; - struct device *dev; - struct i2c_client *i2c_client; - struct i2c_client *testreg_client; - char chip_name[32]; - u8 chip_id; - struct blocking_notifier_head event_subscribers; - u8 startup_events[3]; - bool startup_events_read; -}; - -/** - * struct ab3100_platform_data - * Data supplied to initialize board connections to the AB3100 - * @reg_constraints: regulator constraints for target board - * the order of these constraints are: LDO A, C, D, E, - * F, G, H, K, EXT and BUCK. - * @reg_initvals: initial values for the regulator registers - * plus two sleep settings for LDO E and the BUCK converter. - * exactly AB3100_NUM_REGULATORS+2 values must be sent in. - * Order: LDO A, C, E, E sleep, F, G, H, K, EXT, BUCK, - * BUCK sleep, LDO D. (LDO D need to be initialized last.) - * @external_voltage: voltage level of the external regulator. - */ -struct ab3100_platform_data { - struct regulator_init_data reg_constraints[AB3100_NUM_REGULATORS]; - u8 reg_initvals[AB3100_NUM_REGULATORS+2]; - int external_voltage; -}; - -int ab3100_event_register(struct ab3100 *ab3100, - struct notifier_block *nb); -int ab3100_event_unregister(struct ab3100 *ab3100, - struct notifier_block *nb); - -#endif /* MFD_AB3100_H */ -- cgit v1.2.3 From d9b326b2c3673f939941806146aee38e5c635fd0 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Wed, 10 Mar 2021 23:55:45 +0800 Subject: mfd: intel-m10-bmc: Fix the register access range This patch fixes the max register address of MAX 10 BMC. The range 0x20000000 ~ 0x200000fc are for control registers of the QSPI flash controller, which are not accessible to host. Signed-off-by: Xu Yilun Reviewed-by: Tom Rix Signed-off-by: Lee Jones --- include/linux/mfd/intel-m10-bmc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index 74d4e193966a..9b54ca13eac3 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -11,7 +11,7 @@ #define M10BMC_LEGACY_SYS_BASE 0x300400 #define M10BMC_SYS_BASE 0x300800 -#define M10BMC_MEM_END 0x200000fc +#define M10BMC_MEM_END 0x1fffffff /* Register offset of system registers */ #define NIOS2_FW_VERSION 0x0 -- cgit v1.2.3 From 5893f4d1f43036664010e3ae1d3f7a98b2165a5d Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Wed, 10 Mar 2021 23:55:46 +0800 Subject: mfd: intel-m10-bmc: Simplify the legacy version reg definition The version register is the only one in the legacy I/O space to be accessed, so it is not necessary to define the legacy base & version register offset. A direct definition of the legacy version register address would be fine. Signed-off-by: Xu Yilun Reviewed-by: Tom Rix Signed-off-by: Lee Jones --- include/linux/mfd/intel-m10-bmc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index 9b54ca13eac3..4f1071febb9e 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -9,7 +9,7 @@ #include -#define M10BMC_LEGACY_SYS_BASE 0x300400 +#define M10BMC_LEGACY_BUILD_VER 0x300468 #define M10BMC_SYS_BASE 0x300800 #define M10BMC_MEM_END 0x1fffffff -- cgit v1.2.3 From 8169f74ca6f318f4187536050d2f5408fce9c264 Mon Sep 17 00:00:00 2001 From: Matthew Gerlach Date: Wed, 10 Mar 2021 23:55:47 +0800 Subject: mfd: intel-m10-bmc: Add access table configuration to the regmap This patch adds access tables to the MAX 10 BMC regmap. This prevents the host from accessing the unwanted I/O space. It also filters out the invalid outputs when reading the regmap debugfs interface. Signed-off-by: Matthew Gerlach Signed-off-by: Xu Yilun Reviewed-by: Tom Rix Signed-off-by: Lee Jones --- include/linux/mfd/intel-m10-bmc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index 4f1071febb9e..c4eb38c13eda 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -11,7 +11,10 @@ #define M10BMC_LEGACY_BUILD_VER 0x300468 #define M10BMC_SYS_BASE 0x300800 -#define M10BMC_MEM_END 0x1fffffff +#define M10BMC_SYS_END 0x300fff +#define M10BMC_FLASH_BASE 0x10000000 +#define M10BMC_FLASH_END 0x1fffffff +#define M10BMC_MEM_END M10BMC_FLASH_END /* Register offset of system registers */ #define NIOS2_FW_VERSION 0x0 -- cgit v1.2.3 From 586478bfc9f7e16504d6f64cf18bcbdf6fd0cbc9 Mon Sep 17 00:00:00 2001 From: Hubert Streidl Date: Tue, 16 Mar 2021 17:22:37 +0100 Subject: mfd: da9063: Support SMBus and I2C mode By default the PMIC DA9063 2-wire interface is SMBus compliant. This means the PMIC will automatically reset the interface when the clock signal ceases for more than the SMBus timeout of 35 ms. If the I2C driver / device is not capable of creating atomic I2C transactions, a context change can cause a ceasing of the clock signal. This can happen if for example a real-time thread is scheduled. Then the DA9063 in SMBus mode will reset the 2-wire interface. Subsequently a write message could end up in the wrong register. This could cause unpredictable system behavior. The DA9063 PMIC also supports an I2C compliant mode for the 2-wire interface. This mode does not reset the interface when the clock signal ceases. Thus the problem depicted above does not occur. This patch tests for the bus functionality "I2C_FUNC_I2C". It can reasonably be assumed that the bus cannot obey SMBus timings if this functionality is set. SMBus commands most probably are emulated in this case which is prone to the latency issue described above. This patch enables the I2C bus mode if I2C_FUNC_I2C is set or otherwise keeps the default SMBus mode. Signed-off-by: Hubert Streidl Signed-off-by: Mark Jonas Reviewed-by: Wolfram Sang Signed-off-by: Lee Jones --- include/linux/mfd/da9063/registers.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/da9063/registers.h b/include/linux/mfd/da9063/registers.h index 1dbabf1b3cb8..6e0f66a2e727 100644 --- a/include/linux/mfd/da9063/registers.h +++ b/include/linux/mfd/da9063/registers.h @@ -1037,6 +1037,9 @@ #define DA9063_NONKEY_PIN_AUTODOWN 0x02 #define DA9063_NONKEY_PIN_AUTOFLPRT 0x03 +/* DA9063_REG_CONFIG_J (addr=0x10F) */ +#define DA9063_TWOWIRE_TO 0x40 + /* DA9063_REG_MON_REG_5 (addr=0x116) */ #define DA9063_MON_A8_IDX_MASK 0x07 #define DA9063_MON_A8_IDX_NONE 0x00 -- cgit v1.2.3 From 42e59982917a25ad254b74e6e8decee5e684763d Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 1 Mar 2021 16:42:19 +0200 Subject: mfd: core: Add support for software nodes The old device property API is going to be removed and replaced with the newer software node API. This prepares MFD subsystem for the transition. Signed-off-by: Heikki Krogerus Signed-off-by: Andy Shevchenko Signed-off-by: Lee Jones --- include/linux/mfd/core.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index 2009c4b936d9..9ec599167fe6 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -50,6 +50,7 @@ #define MFD_DEP_LEVEL_HIGH 1 struct irq_domain; +struct software_node; struct property_entry; /* Matches ACPI PNP id, either _HID or _CID, or ACPI _ADR */ @@ -78,6 +79,9 @@ struct mfd_cell { void *platform_data; size_t pdata_size; + /* Software node for the device. */ + const struct software_node *swnode; + /* device properties passed to the sub devices drivers */ const struct property_entry *properties; -- cgit v1.2.3 From b4a66acc0997cff7cb9a4c3992e97808700aa1ff Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 1 Mar 2021 16:42:22 +0200 Subject: mfd: core: Remove support for dangling device properties From now on only accepting complete software nodes. Signed-off-by: Heikki Krogerus Signed-off-by: Andy Shevchenko Signed-off-by: Lee Jones --- include/linux/mfd/core.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index 9ec599167fe6..0bc7cba798a3 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -51,7 +51,6 @@ struct irq_domain; struct software_node; -struct property_entry; /* Matches ACPI PNP id, either _HID or _CID, or ACPI _ADR */ struct mfd_cell_acpi_match { @@ -82,9 +81,6 @@ struct mfd_cell { /* Software node for the device. */ const struct software_node *swnode; - /* device properties passed to the sub devices drivers */ - const struct property_entry *properties; - /* * Device Tree compatible string * See: Documentation/devicetree/usage-model.rst Chapter 2.2 for details -- cgit v1.2.3 From 4502647e60cb4afd74f74d648bc2990954c1b73a Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Sun, 14 Mar 2021 12:02:36 +0100 Subject: mfd: rn5t618: Do not cache various USB related registers These register get reset to their OTP defaults after USB plugging. And while at it, also add a missing register for detecting the charger type. Signed-off-by: Andreas Kemnade Signed-off-by: Lee Jones --- include/linux/mfd/rn5t618.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/rn5t618.h b/include/linux/mfd/rn5t618.h index fba0df13d9a8..8aa0bda1af4f 100644 --- a/include/linux/mfd/rn5t618.h +++ b/include/linux/mfd/rn5t618.h @@ -188,6 +188,7 @@ #define RN5T618_CHGOSCSCORESET3 0xd7 #define RN5T618_CHGOSCFREQSET1 0xd8 #define RN5T618_CHGOSCFREQSET2 0xd9 +#define RN5T618_GCHGDET 0xda #define RN5T618_CONTROL 0xe0 #define RN5T618_SOC 0xe1 #define RN5T618_RE_CAP_H 0xe2 -- cgit v1.2.3 From d1157530d476ffce4485182eea5b492065362a09 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Mon, 15 Mar 2021 20:18:32 +0100 Subject: mfd: ntxec: Support for EC in Tolino Shine 2 HD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the version of the EC in the Tolino Shine 2 HD to the supported versions. It seems not to have an RTC and does not ack data written to it. The vendor kernel happily ignores write errors, using I2C via userspace i2c-set also shows the error. So add a quirk to ignore that error. PWM can be successfully configured despite of that error. Signed-off-by: Andreas Kemnade Reviewed-by: Jonathan Neuschäfer Signed-off-by: Lee Jones --- include/linux/mfd/ntxec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/ntxec.h b/include/linux/mfd/ntxec.h index 361204d125f1..26ab3b8eb612 100644 --- a/include/linux/mfd/ntxec.h +++ b/include/linux/mfd/ntxec.h @@ -33,5 +33,6 @@ static inline __be16 ntxec_reg8(u8 value) /* Known firmware versions */ #define NTXEC_VERSION_KOBO_AURA 0xd726 /* found in Kobo Aura */ +#define NTXEC_VERSION_TOLINO_SHINE2 0xf110 /* found in Tolino Shine 2 HD */ #endif -- cgit v1.2.3 From 0b79c53e8a1c5c26c5a364c8f041ca6890a29f08 Mon Sep 17 00:00:00 2001 From: Timon Baetz Date: Wed, 27 Jan 2021 07:32:42 +0000 Subject: mfd: max8997: Replace 8998 with 8997 The max8997 header is using "max8998" in some identifiers. Fix it by replacing 8998 with 8997 in enum and macro. Signed-off-by: Timon Baetz Signed-off-by: Lee Jones --- include/linux/mfd/max8997.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h index e955e2f0a2cc..6c98edcf4b0b 100644 --- a/include/linux/mfd/max8997.h +++ b/include/linux/mfd/max8997.h @@ -14,13 +14,13 @@ * others and b) it can be enabled simply by using MAX17042 driver. */ -#ifndef __LINUX_MFD_MAX8998_H -#define __LINUX_MFD_MAX8998_H +#ifndef __LINUX_MFD_MAX8997_H +#define __LINUX_MFD_MAX8997_H #include /* MAX8997/8966 regulator IDs */ -enum max8998_regulators { +enum max8997_regulators { MAX8997_LDO1 = 0, MAX8997_LDO2, MAX8997_LDO3, @@ -207,4 +207,4 @@ struct max8997_platform_data { struct max8997_led_platform_data *led_pdata; }; -#endif /* __LINUX_MFD_MAX8998_H */ +#endif /* __LINUX_MFD_MAX8997_H */ -- cgit v1.2.3 From 5a517b5bf687028149d55ba50b393c288a054601 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 31 Mar 2021 18:48:51 +0300 Subject: i2c: designware: Get rid of legacy platform data Platform data is a legacy interface to supply device properties to the driver. In this case we don't have anymore in-kernel users for it. Just remove it for good. Signed-off-by: Andy Shevchenko Acked-by: Wolfram Sang Acked-by: Jarkko Nikula Signed-off-by: Lee Jones --- include/linux/platform_data/i2c-designware.h | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 include/linux/platform_data/i2c-designware.h (limited to 'include/linux') diff --git a/include/linux/platform_data/i2c-designware.h b/include/linux/platform_data/i2c-designware.h deleted file mode 100644 index 014c4a5a7e13..000000000000 --- a/include/linux/platform_data/i2c-designware.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright(c) 2014 Intel Corporation. - */ - -#ifndef I2C_DESIGNWARE_H -#define I2C_DESIGNWARE_H - -struct dw_i2c_platform_data { - unsigned int i2c_scl_freq; -}; - -#endif -- cgit v1.2.3 From ed25b4f00b61e109b29dc443dd1333b9b0da9bb4 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 1 Apr 2021 22:18:40 +0800 Subject: mfd: twl: Remove unused inline function twl4030charger_usb_en() There is no caller in tree, so can remove it. Signed-off-by: YueHaibing Signed-off-by: Lee Jones --- include/linux/mfd/twl.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/twl.h b/include/linux/mfd/twl.h index 089e8942223a..8871cc5188a0 100644 --- a/include/linux/mfd/twl.h +++ b/include/linux/mfd/twl.h @@ -781,8 +781,6 @@ int twl4030_sih_setup(struct device *dev, int module, int irq_base); #define TWL4030_VAUX3_DEV_GRP 0x1F #define TWL4030_VAUX3_DEDICATED 0x22 -static inline int twl4030charger_usb_en(int enable) { return 0; } - /*----------------------------------------------------------------------*/ /* Linux-specific regulator identifiers ... for now, we only support -- cgit v1.2.3 From f9386c91574fe6da9f4fca9a47734816b0db0019 Mon Sep 17 00:00:00 2001 From: Russ Weight Date: Mon, 12 Apr 2021 12:53:28 -0700 Subject: mfd: intel-m10-bmc: Add support for MAX10 BMC Secure Updates Add macros and definitions required by the MAX10 BMC Secure Update driver. Signed-off-by: Russ Weight Signed-off-by: Lee Jones --- include/linux/mfd/intel-m10-bmc.h | 85 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index c4eb38c13eda..f0044b14136e 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -16,6 +16,9 @@ #define M10BMC_FLASH_END 0x1fffffff #define M10BMC_MEM_END M10BMC_FLASH_END +#define M10BMC_STAGING_BASE 0x18000000 +#define M10BMC_STAGING_SIZE 0x3800000 + /* Register offset of system registers */ #define NIOS2_FW_VERSION 0x0 #define M10BMC_MAC_LOW 0x10 @@ -33,6 +36,88 @@ #define M10BMC_VER_PCB_INFO_MSK GENMASK(31, 24) #define M10BMC_VER_LEGACY_INVALID 0xffffffff +/* Secure update doorbell register, in system register region */ +#define M10BMC_DOORBELL 0x400 + +/* Authorization Result register, in system register region */ +#define M10BMC_AUTH_RESULT 0x404 + +/* Doorbell register fields */ +#define DRBL_RSU_REQUEST BIT(0) +#define DRBL_RSU_PROGRESS GENMASK(7, 4) +#define DRBL_HOST_STATUS GENMASK(11, 8) +#define DRBL_RSU_STATUS GENMASK(23, 16) +#define DRBL_PKVL_EEPROM_LOAD_SEC BIT(24) +#define DRBL_PKVL1_POLL_EN BIT(25) +#define DRBL_PKVL2_POLL_EN BIT(26) +#define DRBL_CONFIG_SEL BIT(28) +#define DRBL_REBOOT_REQ BIT(29) +#define DRBL_REBOOT_DISABLED BIT(30) + +/* Progress states */ +#define RSU_PROG_IDLE 0x0 +#define RSU_PROG_PREPARE 0x1 +#define RSU_PROG_READY 0x3 +#define RSU_PROG_AUTHENTICATING 0x4 +#define RSU_PROG_COPYING 0x5 +#define RSU_PROG_UPDATE_CANCEL 0x6 +#define RSU_PROG_PROGRAM_KEY_HASH 0x7 +#define RSU_PROG_RSU_DONE 0x8 +#define RSU_PROG_PKVL_PROM_DONE 0x9 + +/* Device and error states */ +#define RSU_STAT_NORMAL 0x0 +#define RSU_STAT_TIMEOUT 0x1 +#define RSU_STAT_AUTH_FAIL 0x2 +#define RSU_STAT_COPY_FAIL 0x3 +#define RSU_STAT_FATAL 0x4 +#define RSU_STAT_PKVL_REJECT 0x5 +#define RSU_STAT_NON_INC 0x6 +#define RSU_STAT_ERASE_FAIL 0x7 +#define RSU_STAT_WEAROUT 0x8 +#define RSU_STAT_NIOS_OK 0x80 +#define RSU_STAT_USER_OK 0x81 +#define RSU_STAT_FACTORY_OK 0x82 +#define RSU_STAT_USER_FAIL 0x83 +#define RSU_STAT_FACTORY_FAIL 0x84 +#define RSU_STAT_NIOS_FLASH_ERR 0x85 +#define RSU_STAT_FPGA_FLASH_ERR 0x86 + +#define HOST_STATUS_IDLE 0x0 +#define HOST_STATUS_WRITE_DONE 0x1 +#define HOST_STATUS_ABORT_RSU 0x2 + +#define rsu_prog(doorbell) FIELD_GET(DRBL_RSU_PROGRESS, doorbell) +#define rsu_stat(doorbell) FIELD_GET(DRBL_RSU_STATUS, doorbell) + +/* interval 100ms and timeout 5s */ +#define NIOS_HANDSHAKE_INTERVAL_US (100 * 1000) +#define NIOS_HANDSHAKE_TIMEOUT_US (5 * 1000 * 1000) + +/* RSU PREP Timeout (2 minutes) to erase flash staging area */ +#define RSU_PREP_INTERVAL_MS 100 +#define RSU_PREP_TIMEOUT_MS (2 * 60 * 1000) + +/* RSU Complete Timeout (40 minutes) for full flash update */ +#define RSU_COMPLETE_INTERVAL_MS 1000 +#define RSU_COMPLETE_TIMEOUT_MS (40 * 60 * 1000) + +/* Addresses for security related data in FLASH */ +#define BMC_REH_ADDR 0x17ffc004 +#define BMC_PROG_ADDR 0x17ffc000 +#define BMC_PROG_MAGIC 0x5746 + +#define SR_REH_ADDR 0x17ffd004 +#define SR_PROG_ADDR 0x17ffd000 +#define SR_PROG_MAGIC 0x5253 + +#define PR_REH_ADDR 0x17ffe004 +#define PR_PROG_ADDR 0x17ffe000 +#define PR_PROG_MAGIC 0x5250 + +/* Address of 4KB inverted bit vector containing staging area FLASH count */ +#define STAGING_FLASH_COUNT 0x17ffb000 + /** * struct intel_m10bmc - Intel MAX 10 BMC parent driver data structure * @dev: this device -- cgit v1.2.3 From 4bad58ebc8bc4f20d89cff95417c9b4674769709 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 23 Mar 2021 22:05:39 +0100 Subject: signal: Allow tasks to cache one sigqueue struct The idea for this originates from the real time tree to make signal delivery for realtime applications more efficient. In quite some of these application scenarios a control tasks signals workers to start their computations. There is usually only one signal per worker on flight. This works nicely as long as the kmem cache allocations do not hit the slow path and cause latencies. To cure this an optimistic caching was introduced (limited to RT tasks) which allows a task to cache a single sigqueue in a pointer in task_struct instead of handing it back to the kmem cache after consuming a signal. When the next signal is sent to the task then the cached sigqueue is used instead of allocating a new one. This solved the problem for this set of application scenarios nicely. The task cache is not preallocated so the first signal sent to a task goes always to the cache allocator. The cached sigqueue stays around until the task exits and is freed when task::sighand is dropped. After posting this solution for mainline the discussion came up whether this would be useful in general and should not be limited to realtime tasks: https://lore.kernel.org/r/m11rcu7nbr.fsf@fess.ebiederm.org One concern leading to the original limitation was to avoid a large amount of pointlessly cached sigqueues in alive tasks. The other concern was vs. RLIMIT_SIGPENDING as these cached sigqueues are not accounted for. The accounting problem is real, but on the other hand slightly academic. After gathering some statistics it turned out that after boot of a regular distro install there are less than 10 sigqueues cached in ~1500 tasks. In case of a 'mass fork and fire signal to child' scenario the extra 80 bytes of memory per task are well in the noise of the overall memory consumption of the fork bomb. If this should be limited then this would need an extra counter in struct user, more atomic instructions and a seperate rlimit. Yet another tunable which is mostly unused. The caching is actually used. After boot and a full kernel compile on a 64CPU machine with make -j128 the number of 'allocations' looks like this: From slab: 23996 From task cache: 52223 I.e. it reduces the number of slab cache operations by ~68%. A typical pattern there is: <...>-58490 __sigqueue_alloc: for 58488 from slab ffff8881132df460 <...>-58488 __sigqueue_free: cache ffff8881132df460 <...>-58488 __sigqueue_alloc: for 1149 from cache ffff8881103dc550 bash-1149 exit_task_sighand: free ffff8881132df460 bash-1149 __sigqueue_free: cache ffff8881103dc550 The interesting sequence is that the exiting task 58488 grabs the sigqueue from bash's task cache to signal exit and bash sticks it back into it's own cache. Lather, rinse and repeat. The caching is probably not noticable for the general use case, but the benefit for latency sensitive applications is clear. While kmem caches are usually just serving from the fast path the slab merging (default) can depending on the usage pattern of the merged slabs cause occasional slow path allocations. The time spared per cached entry is a few micro seconds per signal which is not relevant for e.g. a kernel build, but for signal heavy workloads it's measurable. As there is no real downside of this caching mechanism making it unconditionally available is preferred over more conditional code or new magic tunables. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Link: https://lkml.kernel.org/r/87sg4lbmxo.fsf@nanos.tec.linutronix.de --- include/linux/sched.h | 1 + include/linux/signal.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 05572e2140ad..f5ca798acb3a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -984,6 +984,7 @@ struct task_struct { /* Signal handlers: */ struct signal_struct *signal; struct sighand_struct __rcu *sighand; + struct sigqueue *sigqueue_cache; sigset_t blocked; sigset_t real_blocked; /* Restored if set_restore_sigmask() was used: */ diff --git a/include/linux/signal.h b/include/linux/signal.h index 205526c4003a..c3cbea266136 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -265,6 +265,7 @@ static inline void init_sigpending(struct sigpending *sig) } extern void flush_sigqueue(struct sigpending *queue); +extern void exit_task_sigqueue_cache(struct task_struct *tsk); /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ static inline int valid_signal(unsigned long sig) -- cgit v1.2.3 From 6308a5f06be08f3ea1f1a895a9ef54c7b65c4c35 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 2 Mar 2021 21:27:47 +0200 Subject: net/mlx5: E-Switch, Make vport number u16 Vport number is 16-bit field in hardware. Make it u16. Move location of vport in the structure so that it reduces a hole in the structure. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 429a710c5a99..9cf1da2883c6 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -152,8 +152,7 @@ mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) }; static inline u32 -mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, - int vport_num) +mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num) { return 0; }; -- cgit v1.2.3 From f4da56529da602010979e8497d1f02eaf5df8883 Mon Sep 17 00:00:00 2001 From: Tan Tee Min Date: Wed, 14 Apr 2021 08:16:17 +0800 Subject: net: stmmac: Add support for external trigger timestamping The Synopsis MAC controller supports auxiliary snapshot feature that allows user to store a snapshot of the system time based on an external event. This patch add supports to the above mentioned feature. Users will be able to triggered capturing the time snapshot from user-space using application such as testptp or any other applications that uses the PTP_EXTTS_REQUEST ioctl request. Cc: Richard Cochran Signed-off-by: Tan Tee Min Co-developed-by: Wong Vee Khee Signed-off-by: Wong Vee Khee Acked-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/stmmac.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index e338ef7abc00..97edb31d6310 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -238,6 +238,8 @@ struct plat_stmmacenet_data { struct pci_dev *pdev; bool has_crossts; int int_snapshot_num; + int ext_snapshot_num; + bool ext_snapshot_en; bool multi_msi_en; int msi_mac_vec; int msi_wol_vec; -- cgit v1.2.3 From 98602c010ceba82f2c2384122dbd07bc965fd367 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Apr 2021 14:51:22 +0200 Subject: tty: create internal tty.h file There are a number of functions and #defines in include/linux/tty.h that do not belong there as they are private to the tty core code. Create an initial drivers/tty/tty.h file and copy the odd "tty logging" macros into it to seed the file with some initial things that we know nothing outside of the tty core should be calling. Cc: Tetsuo Handa Cc: Jiri Slaby Link: https://lore.kernel.org/r/20210408125134.3016837-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index a228c0ee484f..c8af52451587 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -772,16 +772,4 @@ static inline void proc_tty_register_driver(struct tty_driver *d) {} static inline void proc_tty_unregister_driver(struct tty_driver *d) {} #endif -#define tty_msg(fn, tty, f, ...) \ - fn("%s %s: " f, tty_driver_name(tty), tty_name(tty), ##__VA_ARGS__) - -#define tty_debug(tty, f, ...) tty_msg(pr_debug, tty, f, ##__VA_ARGS__) -#define tty_info(tty, f, ...) tty_msg(pr_info, tty, f, ##__VA_ARGS__) -#define tty_notice(tty, f, ...) tty_msg(pr_notice, tty, f, ##__VA_ARGS__) -#define tty_warn(tty, f, ...) tty_msg(pr_warn, tty, f, ##__VA_ARGS__) -#define tty_err(tty, f, ...) tty_msg(pr_err, tty, f, ##__VA_ARGS__) - -#define tty_info_ratelimited(tty, f, ...) \ - tty_msg(pr_info_ratelimited, tty, f, ##__VA_ARGS__) - #endif -- cgit v1.2.3 From da5d669e00d2c437b3f508d60add417fc74f4bb6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Apr 2021 14:51:29 +0200 Subject: tty: audit: move some local functions out of tty.h The functions tty_audit_add_data() and tty_audit_tiocsti() are local to the tty core code, and do not need to be in a "kernel-wide" header file so move them to drivers/tty/tty.h Cc: Jiri Slaby Link: https://lore.kernel.org/r/20210408125134.3016837-9-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index c8af52451587..516c63c1bfe2 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -717,20 +717,10 @@ static inline void n_tty_init(void) { } /* tty_audit.c */ #ifdef CONFIG_AUDIT -extern void tty_audit_add_data(struct tty_struct *tty, const void *data, - size_t size); extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); -extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern int tty_audit_push(void); #else -static inline void tty_audit_add_data(struct tty_struct *tty, const void *data, - size_t size) -{ -} -static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) -{ -} static inline void tty_audit_exit(void) { } -- cgit v1.2.3 From 6c80c0b94b94192d9a34b400f8237703c6475f4d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Apr 2021 14:51:30 +0200 Subject: tty: move some internal tty lock enums and functions out of tty.h Move the TTY_LOCK_* enums and tty_ldisc lock functions out of the global tty.h into the local header file to clean things up. Cc: Jiri Slaby Link: https://lore.kernel.org/r/20210408125134.3016837-10-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 516c63c1bfe2..031e7e4b5c97 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -16,30 +16,6 @@ #include -/* - * Lock subclasses for tty locks - * - * TTY_LOCK_NORMAL is for normal ttys and master ptys. - * TTY_LOCK_SLAVE is for slave ptys only. - * - * Lock subclasses are necessary for handling nested locking with pty pairs. - * tty locks which use nested locking: - * - * legacy_mutex - Nested tty locks are necessary for releasing pty pairs. - * The stable lock order is master pty first, then slave pty. - * termios_rwsem - The stable lock order is tty_buffer lock->termios_rwsem. - * Subclassing this lock enables the slave pty to hold its - * termios_rwsem when claiming the master tty_buffer lock. - * tty_buffer lock - slave ptys can claim nested buffer lock when handling - * signal chars. The stable lock order is slave pty, then - * master. - */ - -enum { - TTY_LOCK_NORMAL = 0, - TTY_LOCK_SLAVE, -}; - /* * (Note: the *_driver.minor_start values 1, 64, 128, 192 are * hardcoded at present.) @@ -419,8 +395,6 @@ extern struct tty_struct *tty_kopen_exclusive(dev_t device); extern struct tty_struct *tty_kopen_shared(dev_t device); extern void tty_kclose(struct tty_struct *tty); extern int tty_dev_name_to_number(const char *name, dev_t *number); -extern int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); -extern void tty_ldisc_unlock(struct tty_struct *tty); extern ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); extern struct file *tty_release_redirect(struct tty_struct *tty); #else -- cgit v1.2.3 From dd9f611442578ca8392e90adbca5815d79946b5e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Apr 2021 14:51:31 +0200 Subject: tty: make tty_release_redirect() static No one calls this outside of the tty_io.c file, so mark this static and do not export the symbol anymore. Cc: Jiri Slaby Link: https://lore.kernel.org/r/20210408125134.3016837-11-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 031e7e4b5c97..23e5028ab52d 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -396,7 +396,6 @@ extern struct tty_struct *tty_kopen_shared(dev_t device); extern void tty_kclose(struct tty_struct *tty); extern int tty_dev_name_to_number(const char *name, dev_t *number); extern ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); -extern struct file *tty_release_redirect(struct tty_struct *tty); #else static inline void tty_kref_put(struct tty_struct *tty) { } -- cgit v1.2.3 From 9f72cab1596327e1011ab4599c07b165e0fb45db Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Apr 2021 14:51:32 +0200 Subject: tty: move some tty-only functions to drivers/tty/tty.h The flow change and restricted_tty_write() logic is internal to the tty core only, so move it out of the include/linux/tty.h file. Cc: Jiri Slaby Link: https://lore.kernel.org/r/20210408125134.3016837-12-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 23e5028ab52d..cf65618e7bd9 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -349,21 +349,6 @@ struct tty_file_private { #define TTY_LDISC_CHANGING 20 /* Change pending - non-block IO */ #define TTY_LDISC_HALTED 22 /* Line discipline is halted */ -/* Values for tty->flow_change */ -#define TTY_THROTTLE_SAFE 1 -#define TTY_UNTHROTTLE_SAFE 2 - -static inline void __tty_set_flow_change(struct tty_struct *tty, int val) -{ - tty->flow_change = val; -} - -static inline void tty_set_flow_change(struct tty_struct *tty, int val) -{ - tty->flow_change = val; - smp_mb(); -} - static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file) { return file->f_flags & O_NONBLOCK || @@ -395,7 +380,6 @@ extern struct tty_struct *tty_kopen_exclusive(dev_t device); extern struct tty_struct *tty_kopen_shared(dev_t device); extern void tty_kclose(struct tty_struct *tty); extern int tty_dev_name_to_number(const char *name, dev_t *number); -extern ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); #else static inline void tty_kref_put(struct tty_struct *tty) { } -- cgit v1.2.3 From 5ffa6e344a1c92a27c242f500fc74e6eb361a4bc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Apr 2021 14:51:34 +0200 Subject: tty: clean include/linux/tty.h up There are a lot of tty-core-only functions that are listed in include/linux/tty.h. Move them to drivers/tty/tty.h so that no one else can accidentally call them or think that they are public functions. Cc: Jiri Slaby Link: https://lore.kernel.org/r/20210408125134.3016837-14-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index cf65618e7bd9..e5d6b1f28823 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -432,11 +432,7 @@ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) extern const char *tty_driver_name(const struct tty_struct *tty); extern void tty_wait_until_sent(struct tty_struct *tty, long timeout); -extern int __tty_check_change(struct tty_struct *tty, int sig); -extern int tty_check_change(struct tty_struct *tty); -extern void __stop_tty(struct tty_struct *tty); extern void stop_tty(struct tty_struct *tty); -extern void __start_tty(struct tty_struct *tty); extern void start_tty(struct tty_struct *tty); extern int tty_register_driver(struct tty_driver *driver); extern void tty_unregister_driver(struct tty_driver *driver); @@ -463,23 +459,11 @@ extern int tty_get_icount(struct tty_struct *tty, extern int is_current_pgrp_orphaned(void); extern void tty_hangup(struct tty_struct *tty); extern void tty_vhangup(struct tty_struct *tty); -extern void tty_vhangup_session(struct tty_struct *tty); extern int tty_hung_up_p(struct file *filp); extern void do_SAK(struct tty_struct *tty); extern void __do_SAK(struct tty_struct *tty); -extern void tty_open_proc_set_tty(struct file *filp, struct tty_struct *tty); -extern int tty_signal_session_leader(struct tty_struct *tty, int exit_session); -extern void session_clear_tty(struct pid *session); extern void no_tty(void); -extern void tty_buffer_free_all(struct tty_port *port); -extern void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld); -extern void tty_buffer_init(struct tty_port *port); -extern void tty_buffer_set_lock_subclass(struct tty_port *port); -extern bool tty_buffer_restart_work(struct tty_port *port); -extern bool tty_buffer_cancel_work(struct tty_port *port); -extern void tty_buffer_flush_work(struct tty_port *port); extern speed_t tty_termios_baud_rate(struct ktermios *termios); -extern speed_t tty_termios_input_baud_rate(struct ktermios *termios); extern void tty_termios_encode_baud_rate(struct ktermios *termios, speed_t ibaud, speed_t obaud); extern void tty_encode_baud_rate(struct tty_struct *tty, @@ -507,27 +491,16 @@ extern int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *); extern void tty_ldisc_deref(struct tty_ldisc *); extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *); -extern void tty_ldisc_hangup(struct tty_struct *tty, bool reset); -extern int tty_ldisc_reinit(struct tty_struct *tty, int disc); extern const struct seq_operations tty_ldiscs_seq_ops; extern void tty_wakeup(struct tty_struct *tty); extern void tty_ldisc_flush(struct tty_struct *tty); -extern long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg); extern int tty_mode_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); -extern long tty_jobctrl_ioctl(struct tty_struct *tty, struct tty_struct *real_tty, - struct file *file, unsigned int cmd, unsigned long arg); extern int tty_perform_flush(struct tty_struct *tty, unsigned long arg); -extern void tty_default_fops(struct file_operations *fops); -extern struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx); -extern int tty_alloc_file(struct file *file); -extern void tty_add_file(struct tty_struct *tty, struct file *file); -extern void tty_free_file(struct file *file); extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx); extern void tty_release_struct(struct tty_struct *tty, int idx); -extern int tty_release(struct inode *inode, struct file *filp); extern void tty_init_termios(struct tty_struct *tty); extern void tty_save_termios(struct tty_struct *tty); extern int tty_standard_install(struct tty_driver *driver, @@ -535,8 +508,6 @@ extern int tty_standard_install(struct tty_driver *driver, extern struct mutex tty_mutex; -#define tty_is_writelocked(tty) (mutex_is_locked(&tty->atomic_write_lock)) - extern void tty_port_init(struct tty_port *port); extern void tty_port_link_device(struct tty_port *port, struct tty_driver *driver, unsigned index); @@ -656,13 +627,8 @@ static inline int tty_port_users(struct tty_port *port) extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc); extern int tty_unregister_ldisc(int disc); extern int tty_set_ldisc(struct tty_struct *tty, int disc); -extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty); -extern void tty_ldisc_release(struct tty_struct *tty); -extern int __must_check tty_ldisc_init(struct tty_struct *tty); -extern void tty_ldisc_deinit(struct tty_struct *tty); extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p, char *f, int count); -extern void tty_sysctl_init(void); /* n_tty.c */ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); -- cgit v1.2.3 From d3debfcc4e3f65f1370ad4ca2ab61e7f0ff683cd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Apr 2021 17:41:14 +0100 Subject: bug: Provide dummy version of bug_get_file_line() when !GENERIC_BUG Provide the missing dummy bug_get_file_line() implementation when GENENERIC_BUG isn't selected. Reported-by: kernel test robot Fixes: 26dbc7e299c7 ("bug: Factor out a getter for a bug's file line") Cc: Andrew Scull Signed-off-by: Marc Zyngier --- include/linux/bug.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index e3841bee4c8d..348acf2558f3 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -61,6 +61,13 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr, return BUG_TRAP_TYPE_BUG; } +struct bug_entry; +static inline void bug_get_file_line(struct bug_entry *bug, const char **file, + unsigned int *line) +{ + *file = NULL; + *line = 0; +} static inline void generic_bug_clear_once(void) {} -- cgit v1.2.3 From c5797f8a64158f724238d13fa5a4b351b03fe42d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 15 Apr 2021 15:53:13 -0700 Subject: ethtool: move ethtool_stats_init We'll need it for FEC stats as well. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 9f6f323af59a..069100b252bd 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -244,6 +244,12 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, #define ETHTOOL_STAT_NOT_SET (~0ULL) +static inline void ethtool_stats_init(u64 *stats, unsigned int n) +{ + while (n--) + stats[n] = ETHTOOL_STAT_NOT_SET; +} + /** * struct ethtool_pause_stats - statistics for IEEE 802.3x pause frames * @tx_pause_frames: transmitted pause frame count. Reported to user space -- cgit v1.2.3 From be85dbfeb37c8c4d4344da2ee594d78034b82489 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 15 Apr 2021 15:53:15 -0700 Subject: ethtool: add FEC statistics Similarly to pause statistics add stats for FEC. The IEEE standard mandates two sets of counters: - 30.5.1.1.17 aFECCorrectedBlocks - 30.5.1.1.18 aFECUncorrectableBlocks where block is a block of bits FEC operates on. Each of these counters is defined per lane (PCS instance). Multiple vendors provide number of corrected _bits_ rather than/as well as blocks. This set adds the 2 standard-based block counters and a extra one for corrected bits. Counters are exposed to user space via netlink in new attributes. Each attribute carries an array of u64s, first element is the total count, and the following ones are a per-lane break down. Much like with pause stats the operation will not fail when driver does not implement the get_fec_stats callback (nor can the driver fail the operation by returning an error). If stats can't be reported the relevant attributes will be empty. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 069100b252bd..112a85b57f1f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -269,6 +269,39 @@ struct ethtool_pause_stats { u64 rx_pause_frames; }; +#define ETHTOOL_MAX_LANES 8 + +/** + * struct ethtool_fec_stats - statistics for IEEE 802.3 FEC + * @corrected_blocks: number of received blocks corrected by FEC + * Reported to user space as %ETHTOOL_A_FEC_STAT_CORRECTED. + * + * Equivalent to `30.5.1.1.17 aFECCorrectedBlocks` from the standard. + * + * @uncorrectable_blocks: number of received blocks FEC was not able to correct + * Reported to user space as %ETHTOOL_A_FEC_STAT_UNCORR. + * + * Equivalent to `30.5.1.1.18 aFECUncorrectableBlocks` from the standard. + * + * @corrected_bits: number of bits corrected by FEC + * Similar to @corrected_blocks but counts individual bit changes, + * not entire FEC data blocks. This is a non-standard statistic. + * Reported to user space as %ETHTOOL_A_FEC_STAT_CORR_BITS. + * + * @lane: per-lane/PCS-instance counts as defined by the standard + * @total: error counts for the entire port, for drivers incapable of reporting + * per-lane stats + * + * Drivers should fill in either only total or per-lane statistics, core + * will take care of adding lane values up to produce the total. + */ +struct ethtool_fec_stats { + struct ethtool_fec_stat { + u64 total; + u64 lanes[ETHTOOL_MAX_LANES]; + } corrected_blocks, uncorrectable_blocks, corrected_bits; +}; + #define ETH_MODULE_EEPROM_PAGE_LEN 128 #define ETH_MODULE_MAX_I2C_ADDRESS 0x7f @@ -439,6 +472,11 @@ struct ethtool_module_eeprom { * ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS instead of the latter), * any change to them will be overwritten by kernel. Returns a negative * error code or zero. + * @get_fec_stats: Report FEC statistics. + * Core will sum up per-lane stats to get the total. + * Drivers must not zero statistics which they don't report. The stats + * structure is initialized to ETHTOOL_STAT_NOT_SET indicating driver does + * not report statistics. * @get_fecparam: Get the network device Forward Error Correction parameters. * @set_fecparam: Set the network device Forward Error Correction parameters. * @get_ethtool_phy_stats: Return extended statistics about the PHY device. @@ -544,6 +582,8 @@ struct ethtool_ops { struct ethtool_link_ksettings *); int (*set_link_ksettings)(struct net_device *, const struct ethtool_link_ksettings *); + void (*get_fec_stats)(struct net_device *dev, + struct ethtool_fec_stats *fec_stats); int (*get_fecparam)(struct net_device *, struct ethtool_fecparam *); int (*set_fecparam)(struct net_device *, -- cgit v1.2.3 From 80e5d1ff5d5f1ed5167a69b7c2fe86071b615f6b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 15 Apr 2021 19:46:50 -0400 Subject: useful constants: struct qstr for ".." Signed-off-by: Al Viro --- include/linux/dcache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4ecde5d8250c..9e23d33bb6f1 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -59,6 +59,7 @@ struct qstr { extern const struct qstr empty_name; extern const struct qstr slash_name; +extern const struct qstr dotdot_name; struct dentry_stat_t { long nr_dentry; -- cgit v1.2.3 From 42eb0d54c08a0331d6d295420f602237968d792b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 25 Mar 2021 09:22:09 +0100 Subject: fs: split receive_fd_replace from __receive_fd receive_fd_replace shares almost no code with the general case, so split it out. Also remove the "Bump the sock usage counts" comment from both copies, as that is now what __receive_sock actually does. [AV: ... and make the only user of receive_fd_replace() choose between it and receive_fd() according to what userland had passed to it in flags] Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/file.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/file.h b/include/linux/file.h index 225982792fa2..2de2e4613d7b 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -92,23 +92,20 @@ extern void put_unused_fd(unsigned int fd); extern void fd_install(unsigned int fd, struct file *file); -extern int __receive_fd(int fd, struct file *file, int __user *ufd, +extern int __receive_fd(struct file *file, int __user *ufd, unsigned int o_flags); static inline int receive_fd_user(struct file *file, int __user *ufd, unsigned int o_flags) { if (ufd == NULL) return -EFAULT; - return __receive_fd(-1, file, ufd, o_flags); + return __receive_fd(file, ufd, o_flags); } static inline int receive_fd(struct file *file, unsigned int o_flags) { - return __receive_fd(-1, file, NULL, o_flags); -} -static inline int receive_fd_replace(int fd, struct file *file, unsigned int o_flags) -{ - return __receive_fd(fd, file, NULL, o_flags); + return __receive_fd(file, NULL, o_flags); } +int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags); extern void flush_delayed_fput(void); extern void __fput_sync(struct file *); -- cgit v1.2.3 From 8d663f34f8afcf5fc6a84c3cc4fa28cc84d58e39 Mon Sep 17 00:00:00 2001 From: Lin Feng Date: Thu, 15 Apr 2021 11:39:20 +0800 Subject: blk-mq: bypass IO scheduler's limit_depth for passthrough request Commit 01e99aeca39796003 ("blk-mq: insert passthrough request into hctx->dispatch directly") gives high priority to passthrough requests and bypass underlying IO scheduler. But as we allocate tag for such request it still runs io-scheduler's callback limit_depth, while we really want is to give full sbitmap-depth capabity to such request for acquiring available tag. blktrace shows PC requests(dmraid -s -c -i) hit bfq's limit_depth: 8,0 2 0 0.000000000 39952 1,0 m N bfq [bfq_limit_depth] wr_busy 0 sync 0 depth 8 8,0 2 1 0.000008134 39952 D R 4 [dmraid] 8,0 2 2 0.000021538 24 C R [0] 8,0 2 0 0.000035442 39952 1,0 m N bfq [bfq_limit_depth] wr_busy 0 sync 0 depth 8 8,0 2 3 0.000038813 39952 D R 24 [dmraid] 8,0 2 4 0.000044356 24 C R [0] This patch introduce a new wrapper to make code not that ugly. Signed-off-by: Lin Feng Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210415033920.213963-1-linf@wangsu.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 30d2090583ad..f2e77ba97550 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -274,6 +274,12 @@ static inline bool bio_is_passthrough(struct bio *bio) return blk_op_is_scsi(op) || blk_op_is_private(op); } +static inline bool blk_op_is_passthrough(unsigned int op) +{ + return (blk_op_is_scsi(op & REQ_OP_MASK) || + blk_op_is_private(op & REQ_OP_MASK)); +} + static inline unsigned short req_get_ioprio(struct request *req) { return req->ioprio; -- cgit v1.2.3 From ef54c1a476aef7eef26fe13ea10dc090952c00f8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 8 Apr 2021 12:35:56 +0200 Subject: perf: Rework perf_event_exit_event() Make perf_event_exit_event() more robust, such that we can use it from other contexts. Specifically the up and coming remove_on_exec. For this to work we need to address a few issues. Remove_on_exec will not destroy the entire context, so we cannot rely on TASK_TOMBSTONE to disable event_function_call() and we thus have to use perf_remove_from_context(). When using perf_remove_from_context(), there's two races to consider. The first is against close(), where we can have concurrent tear-down of the event. The second is against child_list iteration, which should not find a half baked event. To address this, teach perf_remove_from_context() to special case !ctx->is_active and about DETACH_CHILD. [ elver@google.com: fix racing parent/child exit in sync_child_event(). ] Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210408103605.1676875-2-elver@google.com --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3f7f89ea5e51..3d478abf411c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -607,6 +607,7 @@ struct swevent_hlist { #define PERF_ATTACH_TASK_DATA 0x08 #define PERF_ATTACH_ITRACE 0x10 #define PERF_ATTACH_SCHED_CB 0x20 +#define PERF_ATTACH_CHILD 0x40 struct perf_cgroup; struct perf_buffer; -- cgit v1.2.3 From 2b26f0aa004995f49f7b6f4100dd0e4c39a9ed5f Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 8 Apr 2021 12:35:58 +0200 Subject: perf: Support only inheriting events if cloned with CLONE_THREAD Adds bit perf_event_attr::inherit_thread, to restricting inheriting events only if the child was cloned with CLONE_THREAD. This option supports the case where an event is supposed to be process-wide only (including subthreads), but should not propagate beyond the current process's shared environment. Suggested-by: Peter Zijlstra Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/lkml/YBvj6eJR%2FDY2TsEB@hirez.programming.kicks-ass.net/ --- include/linux/perf_event.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3d478abf411c..1660039199b2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -958,7 +958,7 @@ extern void __perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task); extern void __perf_event_task_sched_out(struct task_struct *prev, struct task_struct *next); -extern int perf_event_init_task(struct task_struct *child); +extern int perf_event_init_task(struct task_struct *child, u64 clone_flags); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); extern void perf_event_delayed_put(struct task_struct *task); @@ -1449,7 +1449,8 @@ perf_event_task_sched_in(struct task_struct *prev, static inline void perf_event_task_sched_out(struct task_struct *prev, struct task_struct *next) { } -static inline int perf_event_init_task(struct task_struct *child) { return 0; } +static inline int perf_event_init_task(struct task_struct *child, + u64 clone_flags) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } static inline void perf_event_delayed_put(struct task_struct *task) { } -- cgit v1.2.3 From fb6cc127e0b6e629252cdd0f77d5a1f49db95b92 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 8 Apr 2021 12:36:00 +0200 Subject: signal: Introduce TRAP_PERF si_code and si_perf to siginfo Introduces the TRAP_PERF si_code, and associated siginfo_t field si_perf. These will be used by the perf event subsystem to send signals (if requested) to the task where an event occurred. Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Acked-by: Geert Uytterhoeven # m68k Acked-by: Arnd Bergmann # asm-generic Link: https://lkml.kernel.org/r/20210408103605.1676875-6-elver@google.com --- include/linux/compat.h | 2 ++ include/linux/signal.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 6e65be753603..c8821d966812 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -236,6 +236,8 @@ typedef struct compat_siginfo { char _dummy_pkey[__COMPAT_ADDR_BND_PKEY_PAD]; u32 _pkey; } _addr_pkey; + /* used when si_code=TRAP_PERF */ + compat_u64 _perf; }; } _sigfault; diff --git a/include/linux/signal.h b/include/linux/signal.h index 205526c4003a..1e98548d7cf6 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -43,6 +43,7 @@ enum siginfo_layout { SIL_FAULT_MCEERR, SIL_FAULT_BNDERR, SIL_FAULT_PKUERR, + SIL_PERF_EVENT, SIL_CHLD, SIL_RT, SIL_SYS, -- cgit v1.2.3 From 97ba62b278674293762c3d91f724f1bb922f04e0 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 8 Apr 2021 12:36:01 +0200 Subject: perf: Add support for SIGTRAP on perf events Adds bit perf_event_attr::sigtrap, which can be set to cause events to send SIGTRAP (with si_code TRAP_PERF) to the task where the event occurred. The primary motivation is to support synchronous signals on perf events in the task where an event (such as breakpoints) triggered. To distinguish perf events based on the event type, the type is set in si_errno. For events that are associated with an address, si_addr is copied from perf_sample_data. The new field perf_event_attr::sig_data is copied to si_perf, which allows user space to disambiguate which event (of the same type) triggered the signal. For example, user space could encode the relevant information it cares about in sig_data. We note that the choice of an opaque u64 provides the simplest and most flexible option. Alternatives where a reference to some user space data is passed back suffer from the problem that modification of referenced data (be it the event fd, or the perf_event_attr) can race with the signal being delivered (of course, the same caveat applies if user space decides to store a pointer in sig_data, but the ABI explicitly avoids prescribing such a design). Suggested-by: Peter Zijlstra Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Acked-by: Dmitry Vyukov Link: https://lore.kernel.org/lkml/YBv3rAT566k+6zjg@hirez.programming.kicks-ass.net/ --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1660039199b2..7d7280aa4e22 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -735,6 +735,7 @@ struct perf_event { int pending_wakeup; int pending_kill; int pending_disable; + unsigned long pending_addr; /* SIGTRAP */ struct irq_work pending; atomic_t event_limit; -- cgit v1.2.3 From b02a4fd8148f655095d9e3d6eddd8f0042bcc27c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Jan 2021 16:46:49 +0100 Subject: cpumask: Make cpu_{online,possible,present,active}() inline Prepare for addition of another mask. Primarily a code movement to avoid having to create more #ifdef, but while there, convert everything with an argument to an inline function. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210310150109.045447765@infradead.org --- include/linux/cpumask.h | 97 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 66 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 383684e30f12..a58433668bb2 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -98,37 +98,6 @@ extern struct cpumask __cpu_active_mask; extern atomic_t __num_online_cpus; -#if NR_CPUS > 1 -/** - * num_online_cpus() - Read the number of online CPUs - * - * Despite the fact that __num_online_cpus is of type atomic_t, this - * interface gives only a momentary snapshot and is not protected against - * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held - * region. - */ -static inline unsigned int num_online_cpus(void) -{ - return atomic_read(&__num_online_cpus); -} -#define num_possible_cpus() cpumask_weight(cpu_possible_mask) -#define num_present_cpus() cpumask_weight(cpu_present_mask) -#define num_active_cpus() cpumask_weight(cpu_active_mask) -#define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask) -#define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask) -#define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask) -#define cpu_active(cpu) cpumask_test_cpu((cpu), cpu_active_mask) -#else -#define num_online_cpus() 1U -#define num_possible_cpus() 1U -#define num_present_cpus() 1U -#define num_active_cpus() 1U -#define cpu_online(cpu) ((cpu) == 0) -#define cpu_possible(cpu) ((cpu) == 0) -#define cpu_present(cpu) ((cpu) == 0) -#define cpu_active(cpu) ((cpu) == 0) -#endif - extern cpumask_t cpus_booted_once_mask; static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) @@ -894,6 +863,72 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) return to_cpumask(p); } +#if NR_CPUS > 1 +/** + * num_online_cpus() - Read the number of online CPUs + * + * Despite the fact that __num_online_cpus is of type atomic_t, this + * interface gives only a momentary snapshot and is not protected against + * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held + * region. + */ +static inline unsigned int num_online_cpus(void) +{ + return atomic_read(&__num_online_cpus); +} +#define num_possible_cpus() cpumask_weight(cpu_possible_mask) +#define num_present_cpus() cpumask_weight(cpu_present_mask) +#define num_active_cpus() cpumask_weight(cpu_active_mask) + +static inline bool cpu_online(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_online_mask); +} + +static inline bool cpu_possible(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_possible_mask); +} + +static inline bool cpu_present(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_present_mask); +} + +static inline bool cpu_active(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_active_mask); +} + +#else + +#define num_online_cpus() 1U +#define num_possible_cpus() 1U +#define num_present_cpus() 1U +#define num_active_cpus() 1U + +static inline bool cpu_online(unsigned int cpu) +{ + return cpu == 0; +} + +static inline bool cpu_possible(unsigned int cpu) +{ + return cpu == 0; +} + +static inline bool cpu_present(unsigned int cpu) +{ + return cpu == 0; +} + +static inline bool cpu_active(unsigned int cpu) +{ + return cpu == 0; +} + +#endif /* NR_CPUS > 1 */ + #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) #if NR_CPUS <= BITS_PER_LONG -- cgit v1.2.3 From e40f74c535b8a0ecf3ef0388b51a34cdadb34fb5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Jan 2021 18:43:45 +0100 Subject: cpumask: Introduce DYING mask Introduce a cpumask that indicates (for each CPU) what direction the CPU hotplug is currently going. Notably, it tracks rollbacks. Eg. when an up fails and we do a roll-back down, it will accurately reflect the direction. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210310150109.151441252@infradead.org --- include/linux/cpumask.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index a58433668bb2..e6b948a6000d 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -91,10 +91,12 @@ extern struct cpumask __cpu_possible_mask; extern struct cpumask __cpu_online_mask; extern struct cpumask __cpu_present_mask; extern struct cpumask __cpu_active_mask; +extern struct cpumask __cpu_dying_mask; #define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask) #define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask) #define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask) #define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask) +#define cpu_dying_mask ((const struct cpumask *)&__cpu_dying_mask) extern atomic_t __num_online_cpus; @@ -826,6 +828,14 @@ set_cpu_active(unsigned int cpu, bool active) cpumask_clear_cpu(cpu, &__cpu_active_mask); } +static inline void +set_cpu_dying(unsigned int cpu, bool dying) +{ + if (dying) + cpumask_set_cpu(cpu, &__cpu_dying_mask); + else + cpumask_clear_cpu(cpu, &__cpu_dying_mask); +} /** * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * @@ -900,6 +910,11 @@ static inline bool cpu_active(unsigned int cpu) return cpumask_test_cpu(cpu, cpu_active_mask); } +static inline bool cpu_dying(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_dying_mask); +} + #else #define num_online_cpus() 1U @@ -927,6 +942,11 @@ static inline bool cpu_active(unsigned int cpu) return cpu == 0; } +static inline bool cpu_dying(unsigned int cpu) +{ + return false; +} + #endif /* NR_CPUS > 1 */ #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) -- cgit v1.2.3 From 8a99b6833c884fa0e7919030d93fecedc69fc625 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Mar 2021 11:43:21 +0100 Subject: sched: Move SCHED_DEBUG sysctl to debugfs Stop polluting sysctl with undocumented knobs that really are debug only, move them all to /debug/sched/ along with the existing /debug/sched_* files that already exist. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Greg Kroah-Hartman Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210412102001.287610138@infradead.org --- include/linux/sched/sysctl.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 3c31ba88aca5..0a3f34638cf5 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -26,10 +26,11 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, enum { sysctl_hung_task_timeout_secs = 0 }; #endif +extern unsigned int sysctl_sched_child_runs_first; + extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; -extern unsigned int sysctl_sched_child_runs_first; enum sched_tunable_scaling { SCHED_TUNABLESCALING_NONE, @@ -37,7 +38,7 @@ enum sched_tunable_scaling { SCHED_TUNABLESCALING_LINEAR, SCHED_TUNABLESCALING_END, }; -extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; +extern unsigned int sysctl_sched_tunable_scaling; extern unsigned int sysctl_numa_balancing_scan_delay; extern unsigned int sysctl_numa_balancing_scan_period_min; @@ -47,9 +48,6 @@ extern unsigned int sysctl_numa_balancing_scan_size; #ifdef CONFIG_SCHED_DEBUG extern __read_mostly unsigned int sysctl_sched_migration_cost; extern __read_mostly unsigned int sysctl_sched_nr_migrate; - -int sched_proc_update_handler(struct ctl_table *table, int write, - void *buffer, size_t *length, loff_t *ppos); #endif /* -- cgit v1.2.3 From 9af0440ec86ebdab075e1b3d231f81fe7decb575 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Mar 2021 10:53:55 +0100 Subject: debugfs: Implement debugfs_create_str() Implement debugfs_create_str() to easily display names and such in debugfs. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Greg Kroah-Hartman Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210412102001.415407080@infradead.org --- include/linux/debugfs.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index d6c4cc9ecc77..1fdb4343af9c 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -128,6 +128,8 @@ void debugfs_create_atomic_t(const char *name, umode_t mode, struct dentry *parent, atomic_t *value); struct dentry *debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, bool *value); +void debugfs_create_str(const char *name, umode_t mode, + struct dentry *parent, char **value); struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, @@ -156,6 +158,9 @@ ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf, ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos); +ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos); + #else #include @@ -297,6 +302,11 @@ static inline struct dentry *debugfs_create_bool(const char *name, umode_t mode, return ERR_PTR(-ENODEV); } +static inline void debugfs_create_str(const char *name, umode_t mode, + struct dentry *parent, + char **value) +{ } + static inline struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob) @@ -348,6 +358,13 @@ static inline ssize_t debugfs_write_file_bool(struct file *file, return -ENODEV; } +static inline ssize_t debugfs_read_file_str(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + return -ENODEV; +} + #endif /** -- cgit v1.2.3 From c0aec6680b6c82fe893a546e322e1130cd5cf21e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 1 Apr 2021 14:56:25 +0100 Subject: iommu: Statically set module owner It happens that the 3 drivers which first supported being modular are also ones which play games with their pgsize_bitmap, so have non-const iommu_ops where dynamically setting the owner manages to work out OK. However, it's less than ideal to force that upon all drivers which want to be modular - like the new sprd-iommu driver which now has a potential bug in that regard - so let's just statically set the module owner and let ops remain const wherever possible. Reviewed-by: Christoph Hellwig Acked-by: Will Deacon Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/31423b99ff609c3d4b291c701a7a7a810d9ce8dc.1617285386.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9223a8266b08..ce904cf4e774 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -361,19 +361,12 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link); void iommu_device_unlink(struct iommu_device *iommu, struct device *link); int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain); -static inline void __iommu_device_set_ops(struct iommu_device *iommu, +static inline void iommu_device_set_ops(struct iommu_device *iommu, const struct iommu_ops *ops) { iommu->ops = ops; } -#define iommu_device_set_ops(iommu, ops) \ -do { \ - struct iommu_ops *__ops = (struct iommu_ops *)(ops); \ - __ops->owner = THIS_MODULE; \ - __iommu_device_set_ops(iommu, __ops); \ -} while (0) - static inline void iommu_device_set_fwnode(struct iommu_device *iommu, struct fwnode_handle *fwnode) { -- cgit v1.2.3 From 2d471b20c55e13c98d1dba413bf2de618e89cdac Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 1 Apr 2021 14:56:26 +0100 Subject: iommu: Streamline registration interface Rather than have separate opaque setter functions that are easy to overlook and lead to repetitive boilerplate in drivers, let's pass the relevant initialisation parameters directly to iommu_device_register(). Acked-by: Will Deacon Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/ab001b87c533b6f4db71eb90db6f888953986c36.1617285386.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ce904cf4e774..32d448050bf7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -350,7 +350,9 @@ struct dev_iommu { void *priv; }; -int iommu_device_register(struct iommu_device *iommu); +int iommu_device_register(struct iommu_device *iommu, + const struct iommu_ops *ops, + struct device *hwdev); void iommu_device_unregister(struct iommu_device *iommu); int iommu_device_sysfs_add(struct iommu_device *iommu, struct device *parent, @@ -361,18 +363,6 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link); void iommu_device_unlink(struct iommu_device *iommu, struct device *link); int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain); -static inline void iommu_device_set_ops(struct iommu_device *iommu, - const struct iommu_ops *ops) -{ - iommu->ops = ops; -} - -static inline void iommu_device_set_fwnode(struct iommu_device *iommu, - struct fwnode_handle *fwnode) -{ - iommu->fwnode = fwnode; -} - static inline struct iommu_device *dev_to_iommu_device(struct device *dev) { return (struct iommu_device *)dev_get_drvdata(dev); @@ -858,21 +848,13 @@ static inline int iommu_set_pgtable_quirks(struct iommu_domain *domain, return 0; } -static inline int iommu_device_register(struct iommu_device *iommu) +static inline int iommu_device_register(struct iommu_device *iommu, + const struct iommu_ops *ops, + struct device *hwdev) { return -ENODEV; } -static inline void iommu_device_set_ops(struct iommu_device *iommu, - const struct iommu_ops *ops) -{ -} - -static inline void iommu_device_set_fwnode(struct iommu_device *iommu, - struct fwnode_handle *fwnode) -{ -} - static inline struct iommu_device *dev_to_iommu_device(struct device *dev) { return NULL; -- cgit v1.2.3 From 7f100744749e4fe547dece3bb6557fae5f0a7252 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Fri, 16 Apr 2021 19:15:37 +0530 Subject: PCI: tegra: Add Tegra194 MCFG quirks for ECAM errata The PCIe controller in Tegra194 SoC is not ECAM-compliant. With the current hardware design, ECAM can be enabled only for one controller (the C5 controller) with bus numbers starting from 160 instead of 0. A different approach is taken to avoid this abnormal way of enabling ECAM for just one controller but to enable configuration space access for all the other controllers. In this approach, ops are added through MCFG quirk mechanism which access the configuration spaces by dynamically programming iATU (internal AddressTranslation Unit) to generate respective configuration accesses just like the way it is done in DesignWare core sub-system. This issue is specific to Tegra194 and it would be fixed in the future generations of Tegra SoCs. Link: https://lore.kernel.org/r/20210416134537.19474-1-vidyas@nvidia.com Signed-off-by: Vidya Sagar Signed-off-by: Bjorn Helgaas --- include/linux/pci-ecam.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 65d3d83015c3..fbdadd4d8377 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -85,6 +85,7 @@ extern const struct pci_ecam_ops pci_thunder_ecam_ops; /* Cavium ThunderX 1.x */ extern const struct pci_ecam_ops xgene_v1_pcie_ecam_ops; /* APM X-Gene PCIe v1 */ extern const struct pci_ecam_ops xgene_v2_pcie_ecam_ops; /* APM X-Gene PCIe v2.x */ extern const struct pci_ecam_ops al_pcie_ops; /* Amazon Annapurna Labs PCIe */ +extern const struct pci_ecam_ops tegra194_pcie_ops; /* Tegra194 PCIe */ #endif #if IS_ENABLED(CONFIG_PCI_HOST_COMMON) -- cgit v1.2.3 From 7c8056bb366b1b2dc8e4a3cc0b876e15a8ebca2c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 10 Feb 2021 17:33:25 +0900 Subject: perf core: Factor out __perf_sw_event_sched In some cases, we need to check more than whether the software event is enabled. So split the condition check and the actual event handling. This is a preparation for the next change. Suggested-by: Peter Zijlstra Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210210083327.22726-1-namhyung@kernel.org --- include/linux/perf_event.h | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7d7280aa4e22..92d51a7beaea 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1178,30 +1178,24 @@ DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]); * which is guaranteed by us not actually scheduling inside other swevents * because those disable preemption. */ -static __always_inline void -perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) +static __always_inline void __perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { - if (static_key_false(&perf_swevent_enabled[event_id])) { - struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); + struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); - perf_fetch_caller_regs(regs); - ___perf_sw_event(event_id, nr, regs, addr); - } + perf_fetch_caller_regs(regs); + ___perf_sw_event(event_id, nr, regs, addr); } extern struct static_key_false perf_sched_events; -static __always_inline bool -perf_sw_migrate_enabled(void) +static __always_inline bool __perf_sw_enabled(int swevt) { - if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS])) - return true; - return false; + return static_key_false(&perf_swevent_enabled[swevt]); } static inline void perf_event_task_migrate(struct task_struct *task) { - if (perf_sw_migrate_enabled()) + if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS)) task->sched_migrated = 1; } @@ -1211,11 +1205,9 @@ static inline void perf_event_task_sched_in(struct task_struct *prev, if (static_branch_unlikely(&perf_sched_events)) __perf_event_task_sched_in(prev, task); - if (perf_sw_migrate_enabled() && task->sched_migrated) { - struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); - - perf_fetch_caller_regs(regs); - ___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0); + if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS) && + task->sched_migrated) { + __perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); task->sched_migrated = 0; } } @@ -1223,7 +1215,8 @@ static inline void perf_event_task_sched_in(struct task_struct *prev, static inline void perf_event_task_sched_out(struct task_struct *prev, struct task_struct *next) { - perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0); + if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES)) + __perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0); if (static_branch_unlikely(&perf_sched_events)) __perf_event_task_sched_out(prev, next); @@ -1480,8 +1473,6 @@ static inline int perf_event_refresh(struct perf_event *event, int refresh) static inline void perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } static inline void -perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { } -static inline void perf_bp_event(struct perf_event *event, void *data) { } static inline int perf_register_guest_info_callbacks -- cgit v1.2.3 From d0d1dd628527c77db2391ce0293c1ed344b2365f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 10 Feb 2021 17:33:26 +0900 Subject: perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event This patch adds a new software event to count context switches involving cgroup switches. So it's counted only if cgroups of previous and next tasks are different. Note that it only checks the cgroups in the perf_event subsystem. For cgroup v2, it shouldn't matter anyway. One can argue that we can do this by using existing sched_switch event with eBPF. But some systems might not have eBPF for some reason so I'd like to add this as a simple way. Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210210083327.22726-2-namhyung@kernel.org --- include/linux/perf_event.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 92d51a7beaea..8989b2b7268d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1218,6 +1218,13 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES)) __perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0); +#ifdef CONFIG_CGROUP_PERF + if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) && + perf_cgroup_from_task(prev, NULL) != + perf_cgroup_from_task(next, NULL)) + __perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0); +#endif + if (static_branch_unlikely(&perf_sched_events)) __perf_event_task_sched_out(prev, next); } -- cgit v1.2.3 From 1df1fc8c62f7527d953c7f3869930067bf5b3f29 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Sat, 3 Apr 2021 09:09:31 +0300 Subject: mtd: core: Constify buf in mtd_write_user_prot_reg() The write buffer comes from user and should be const. Constify write buffer in mtd core and across all _write_user_prot_reg() users. cfi_cmdset_{0001, 0002} and onenand_base will pay the cost of an explicit cast to discard the const qualifier since the beginning, since they are using an otp_op_t function prototype that is used for both reads and writes. mtd_dataflash and SPI NOR will benefit of the const buffer because they are using different paths for writes and reads. Signed-off-by: Tudor Ambarus Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210403060931.7119-1-tudor.ambarus@microchip.com --- include/linux/mtd/mtd.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 4aac200ca8b5..a89955f3cbc8 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -334,7 +334,8 @@ struct mtd_info { int (*_read_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); int (*_write_user_prot_reg) (struct mtd_info *mtd, loff_t to, - size_t len, size_t *retlen, u_char *buf); + size_t len, size_t *retlen, + const u_char *buf); int (*_lock_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len); int (*_erase_user_prot_reg) (struct mtd_info *mtd, loff_t from, @@ -518,7 +519,7 @@ int mtd_get_user_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen, int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to, size_t len, - size_t *retlen, u_char *buf); + size_t *retlen, const u_char *buf); int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len); int mtd_erase_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len); -- cgit v1.2.3 From 36830159acbeb9896d7684b5f52db7b22efa197f Mon Sep 17 00:00:00 2001 From: Moshe Tal Date: Mon, 15 Feb 2021 16:13:02 +0200 Subject: net/mlx5: Add register layout to support extended link state Add needed structure layouts and defines for pddr register (Port Diagnostics Database Register) and the troublshooting page. This will be used to get extended link state from the monitor opcode bits. Signed-off-by: Moshe Tal Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 50 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2da953ad02ed..4e531c2aab52 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -127,6 +127,7 @@ enum { MLX5_REG_PELC = 0x500e, MLX5_REG_PVLC = 0x500f, MLX5_REG_PCMR = 0x5041, + MLX5_REG_PDDR = 0x5031, MLX5_REG_PMLP = 0x5002, MLX5_REG_PPLM = 0x5023, MLX5_REG_PCAM = 0x507f, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1599deee0456..f2c51d6833c6 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9956,6 +9956,53 @@ struct mlx5_ifc_mirc_reg_bits { u8 reserved_at_20[0x20]; }; +struct mlx5_ifc_pddr_monitor_opcode_bits { + u8 reserved_at_0[0x10]; + u8 monitor_opcode[0x10]; +}; + +union mlx5_ifc_pddr_troubleshooting_page_status_opcode_auto_bits { + struct mlx5_ifc_pddr_monitor_opcode_bits pddr_monitor_opcode; + u8 reserved_at_0[0x20]; +}; + +enum { + /* Monitor opcodes */ + MLX5_PDDR_REG_TRBLSH_GROUP_OPCODE_MONITOR = 0x0, +}; + +struct mlx5_ifc_pddr_troubleshooting_page_bits { + u8 reserved_at_0[0x10]; + u8 group_opcode[0x10]; + + union mlx5_ifc_pddr_troubleshooting_page_status_opcode_auto_bits status_opcode; + + u8 reserved_at_40[0x20]; + + u8 status_message[59][0x20]; +}; + +union mlx5_ifc_pddr_reg_page_data_auto_bits { + struct mlx5_ifc_pddr_troubleshooting_page_bits pddr_troubleshooting_page; + u8 reserved_at_0[0x7c0]; +}; + +enum { + MLX5_PDDR_REG_PAGE_SELECT_TROUBLESHOOTING_INFO_PAGE = 0x1, +}; + +struct mlx5_ifc_pddr_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 reserved_at_12[0xe]; + + u8 reserved_at_20[0x18]; + u8 page_select[0x8]; + + union mlx5_ifc_pddr_reg_page_data_auto_bits page_data; +}; + union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_bufferx_reg_bits bufferx_reg; struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; @@ -9970,6 +10017,9 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_pamp_reg_bits pamp_reg; struct mlx5_ifc_paos_reg_bits paos_reg; struct mlx5_ifc_pcap_reg_bits pcap_reg; + struct mlx5_ifc_pddr_monitor_opcode_bits pddr_monitor_opcode; + struct mlx5_ifc_pddr_reg_bits pddr_reg; + struct mlx5_ifc_pddr_troubleshooting_page_bits pddr_troubleshooting_page; struct mlx5_ifc_peir_reg_bits peir_reg; struct mlx5_ifc_pelc_reg_bits pelc_reg; struct mlx5_ifc_pfcc_reg_bits pfcc_reg; -- cgit v1.2.3 From 07ff4aed015c564d03fd518d2fb54e5e6948903c Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 3 Mar 2021 11:35:44 +0100 Subject: time/timecounter: Mark 1st argument of timecounter_cyc2time() as const The timecounter is not modified in this function. Mark it as const. Signed-off-by: Marc Kleine-Budde Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210303103544.994855-1-mkl@pengutronix.de --- include/linux/timecounter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h index 754b74a2167f..c6540ceea143 100644 --- a/include/linux/timecounter.h +++ b/include/linux/timecounter.h @@ -124,7 +124,7 @@ extern u64 timecounter_read(struct timecounter *tc); * This allows conversion of cycle counter values which were generated * in the past. */ -extern u64 timecounter_cyc2time(struct timecounter *tc, +extern u64 timecounter_cyc2time(const struct timecounter *tc, u64 cycle_tstamp); #endif -- cgit v1.2.3 From 9a44c1cc63887627284ae232a9626a9f1cd066fc Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 16 Apr 2021 10:36:33 +0200 Subject: net: Add a WWAN subsystem This change introduces initial support for a WWAN framework. Given the complexity and heterogeneity of existing WWAN hardwares and interfaces, there is no strict definition of what a WWAN device is and how it should be represented. It's often a collection of multiple devices that perform the global WWAN feature (netdev, tty, chardev, etc). One usual way to expose modem controls and configuration is via high level protocols such as the well known AT command protocol, MBIM or QMI. The USB modems started to expose them as character devices, and user daemons such as ModemManager learnt to use them. This initial version adds the concept of WWAN port, which is a logical pipe to a modem control protocol. The protocols are rawly exposed to user via character device, allowing straigthforward support in existing tools (ModemManager, ofono...). The WWAN core takes care of the generic part, including character device management, and relies on port driver operations to receive/submit protocol data. Since the different devices exposing protocols for a same WWAN hardware do not necessarily know about each others (e.g. two different USB interfaces, PCI/MHI channel devices...) and can be created/removed in different orders, the WWAN core ensures that all WAN ports contributing to the 'whole' WWAN feature are grouped under the same virtual WWAN device, relying on the provided parent device (e.g. mhi controller, USB device). It's a 'trick' I copied from Johannes's earlier WWAN subsystem proposal. This initial version is purposely minimalist, it's essentially moving the generic part of the previously proposed mhi_wwan_ctrl driver inside a common WWAN framework, but the implementation is open and flexible enough to allow extension for further drivers. Signed-off-by: Loic Poulain Signed-off-by: David S. Miller --- include/linux/wwan.h | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 include/linux/wwan.h (limited to 'include/linux') diff --git a/include/linux/wwan.h b/include/linux/wwan.h new file mode 100644 index 000000000000..aa05a253dcf9 --- /dev/null +++ b/include/linux/wwan.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2021, Linaro Ltd */ + +#ifndef __WWAN_H +#define __WWAN_H + +#include +#include +#include + +/** + * enum wwan_port_type - WWAN port types + * @WWAN_PORT_AT: AT commands + * @WWAN_PORT_MBIM: Mobile Broadband Interface Model control + * @WWAN_PORT_QMI: Qcom modem/MSM interface for modem control + * @WWAN_PORT_QCDM: Qcom Modem diagnostic interface + * @WWAN_PORT_FIREHOSE: XML based command protocol + * @WWAN_PORT_MAX: Number of supported port types + */ +enum wwan_port_type { + WWAN_PORT_AT, + WWAN_PORT_MBIM, + WWAN_PORT_QMI, + WWAN_PORT_QCDM, + WWAN_PORT_FIREHOSE, + WWAN_PORT_MAX, +}; + +struct wwan_port; + +/** struct wwan_port_ops - The WWAN port operations + * @start: The routine for starting the WWAN port device. + * @stop: The routine for stopping the WWAN port device. + * @tx: The routine that sends WWAN port protocol data to the device. + * + * The wwan_port_ops structure contains a list of low-level operations + * that control a WWAN port device. All functions are mandatory. + */ +struct wwan_port_ops { + int (*start)(struct wwan_port *port); + void (*stop)(struct wwan_port *port); + int (*tx)(struct wwan_port *port, struct sk_buff *skb); +}; + +/** + * wwan_create_port - Add a new WWAN port + * @parent: Device to use as parent and shared by all WWAN ports + * @type: WWAN port type + * @ops: WWAN port operations + * @drvdata: Pointer to caller driver data + * + * Allocate and register a new WWAN port. The port will be automatically exposed + * to user as a character device and attached to the right virtual WWAN device, + * based on the parent pointer. The parent pointer is the device shared by all + * components of a same WWAN modem (e.g. USB dev, PCI dev, MHI controller...). + * + * drvdata will be placed in the WWAN port device driver data and can be + * retrieved with wwan_port_get_drvdata(). + * + * This function must be balanced with a call to wwan_remove_port(). + * + * Returns a valid pointer to wwan_port on success or PTR_ERR on failure + */ +struct wwan_port *wwan_create_port(struct device *parent, + enum wwan_port_type type, + const struct wwan_port_ops *ops, + void *drvdata); + +/** + * wwan_remove_port - Remove a WWAN port + * @port: WWAN port to remove + * + * Remove a previously created port. + */ +void wwan_remove_port(struct wwan_port *port); + +/** + * wwan_port_rx - Receive data from the WWAN port + * @port: WWAN port for which data is received + * @skb: Pointer to the rx buffer + * + * A port driver calls this function upon data reception (MBIM, AT...). + */ +void wwan_port_rx(struct wwan_port *port, struct sk_buff *skb); + +/** + * wwan_port_txoff - Stop TX on WWAN port + * @port: WWAN port for which TX must be stopped + * + * Used for TX flow control, a port driver calls this function to indicate TX + * is temporary unavailable (e.g. due to ring buffer fullness). + */ +void wwan_port_txoff(struct wwan_port *port); + + +/** + * wwan_port_txon - Restart TX on WWAN port + * @port: WWAN port for which TX must be restarted + * + * Used for TX flow control, a port driver calls this function to indicate TX + * is available again. + */ +void wwan_port_txon(struct wwan_port *port); + +/** + * wwan_port_get_drvdata - Retrieve driver data from a WWAN port + * @port: Related WWAN port + */ +void *wwan_port_get_drvdata(struct wwan_port *port); + +#endif /* __WWAN_H */ -- cgit v1.2.3 From 02c587733c8161355a43e6e110c2e29bd0acff72 Mon Sep 17 00:00:00 2001 From: Walter Wu Date: Fri, 16 Apr 2021 15:46:00 -0700 Subject: kasan: remove redundant config option CONFIG_KASAN_STACK and CONFIG_KASAN_STACK_ENABLE both enable KASAN stack instrumentation, but we should only need one config, so that we remove CONFIG_KASAN_STACK_ENABLE and make CONFIG_KASAN_STACK workable. see [1]. When enable KASAN stack instrumentation, then for gcc we could do no prompt and default value y, and for clang prompt and default value n. This patch fixes the following compilation warning: include/linux/kasan.h:333:30: warning: 'CONFIG_KASAN_STACK' is not defined, evaluates to 0 [-Wundef] [akpm@linux-foundation.org: fix merge snafu] Link: https://bugzilla.kernel.org/show_bug.cgi?id=210221 [1] Link: https://lkml.kernel.org/r/20210226012531.29231-1-walter-zh.wu@mediatek.com Fixes: d9b571c885a8 ("kasan: fix KASAN_STACK dependency for HW_TAGS") Signed-off-by: Walter Wu Suggested-by: Dmitry Vyukov Reviewed-by: Nathan Chancellor Acked-by: Arnd Bergmann Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Alexander Potapenko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b91732bd05d7..14f72ec96492 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -330,7 +330,7 @@ static inline bool kasan_check_byte(const void *address) #endif /* CONFIG_KASAN */ -#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK +#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) void kasan_unpoison_task_stack(struct task_struct *task); #else static inline void kasan_unpoison_task_stack(struct task_struct *task) {} -- cgit v1.2.3 From f09ea6fb12723d6726293d68de00b6307368bd76 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 16 Apr 2021 12:27:39 -0700 Subject: ethtool: add a new command for reading standard stats Add an interface for reading standard stats, including stats which don't have a corresponding control interface. Start with IEEE 802.3 PHY stats. There seems to be only one stat to expose there. Define API to not require user space changes when new stats or groups are added. Groups are based on bitset, stats have a string set associated. v1: wrap stats in a nest Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 112a85b57f1f..2d5455eedbf4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -250,6 +250,13 @@ static inline void ethtool_stats_init(u64 *stats, unsigned int n) stats[n] = ETHTOOL_STAT_NOT_SET; } +/* Basic IEEE 802.3 PHY statistics (30.3.2.1.*), not otherwise exposed + * via a more targeted API. + */ +struct ethtool_eth_phy_stats { + u64 SymbolErrorDuringCarrier; +}; + /** * struct ethtool_pause_stats - statistics for IEEE 802.3x pause frames * @tx_pause_frames: transmitted pause frame count. Reported to user space @@ -487,6 +494,7 @@ struct ethtool_module_eeprom { * @get_module_eeprom_by_page: Get a region of plug-in module EEPROM data from * specified page. Returns a negative error code or the amount of bytes * read. + * @get_eth_phy_stats: Query some of the IEEE 802.3 PHY statistics. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must @@ -597,6 +605,8 @@ struct ethtool_ops { int (*get_module_eeprom_by_page)(struct net_device *dev, const struct ethtool_module_eeprom *page, struct netlink_ext_ack *extack); + void (*get_eth_phy_stats)(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats); }; int ethtool_check_ops(const struct ethtool_ops *ops); -- cgit v1.2.3 From ca2244547ec7505d1cf61d43f5e76e3ffd99cf77 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 16 Apr 2021 12:27:40 -0700 Subject: ethtool: add interface to read standard MAC stats Most of the MAC statistics are included in struct rtnl_link_stats64, but some fields are aggregated. Besides it's good to expose these clearly hardware stats separately. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 2d5455eedbf4..3c689a13e679 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -250,6 +250,34 @@ static inline void ethtool_stats_init(u64 *stats, unsigned int n) stats[n] = ETHTOOL_STAT_NOT_SET; } +/* Basic IEEE 802.3 MAC statistics (30.3.1.1.*), not otherwise exposed + * via a more targeted API. + */ +struct ethtool_eth_mac_stats { + u64 FramesTransmittedOK; + u64 SingleCollisionFrames; + u64 MultipleCollisionFrames; + u64 FramesReceivedOK; + u64 FrameCheckSequenceErrors; + u64 AlignmentErrors; + u64 OctetsTransmittedOK; + u64 FramesWithDeferredXmissions; + u64 LateCollisions; + u64 FramesAbortedDueToXSColls; + u64 FramesLostDueToIntMACXmitError; + u64 CarrierSenseErrors; + u64 OctetsReceivedOK; + u64 FramesLostDueToIntMACRcvError; + u64 MulticastFramesXmittedOK; + u64 BroadcastFramesXmittedOK; + u64 FramesWithExcessiveDeferral; + u64 MulticastFramesReceivedOK; + u64 BroadcastFramesReceivedOK; + u64 InRangeLengthErrors; + u64 OutOfRangeLengthField; + u64 FrameTooLongErrors; +}; + /* Basic IEEE 802.3 PHY statistics (30.3.2.1.*), not otherwise exposed * via a more targeted API. */ @@ -495,6 +523,7 @@ struct ethtool_module_eeprom { * specified page. Returns a negative error code or the amount of bytes * read. * @get_eth_phy_stats: Query some of the IEEE 802.3 PHY statistics. + * @get_eth_mac_stats: Query some of the IEEE 802.3 MAC statistics. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must @@ -607,6 +636,8 @@ struct ethtool_ops { struct netlink_ext_ack *extack); void (*get_eth_phy_stats)(struct net_device *dev, struct ethtool_eth_phy_stats *phy_stats); + void (*get_eth_mac_stats)(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats); }; int ethtool_check_ops(const struct ethtool_ops *ops); -- cgit v1.2.3 From bfad2b979ddcc330c08bb071eb3c3f7b3411a681 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 16 Apr 2021 12:27:41 -0700 Subject: ethtool: add interface to read standard MAC Ctrl stats Number of devices maintains the standard-based MAC control counters for control frames. Add a API for those. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 3c689a13e679..1ca6b836f9fe 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -285,6 +285,15 @@ struct ethtool_eth_phy_stats { u64 SymbolErrorDuringCarrier; }; +/* Basic IEEE 802.3 MAC Ctrl statistics (30.3.3.*), not otherwise exposed + * via a more targeted API. + */ +struct ethtool_eth_ctrl_stats { + u64 MACControlFramesTransmitted; + u64 MACControlFramesReceived; + u64 UnsupportedOpcodesReceived; +}; + /** * struct ethtool_pause_stats - statistics for IEEE 802.3x pause frames * @tx_pause_frames: transmitted pause frame count. Reported to user space @@ -524,6 +533,7 @@ struct ethtool_module_eeprom { * read. * @get_eth_phy_stats: Query some of the IEEE 802.3 PHY statistics. * @get_eth_mac_stats: Query some of the IEEE 802.3 MAC statistics. + * @get_eth_ctrl_stats: Query some of the IEEE 802.3 MAC Ctrl statistics. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must @@ -638,6 +648,8 @@ struct ethtool_ops { struct ethtool_eth_phy_stats *phy_stats); void (*get_eth_mac_stats)(struct net_device *dev, struct ethtool_eth_mac_stats *mac_stats); + void (*get_eth_ctrl_stats)(struct net_device *dev, + struct ethtool_eth_ctrl_stats *ctrl_stats); }; int ethtool_check_ops(const struct ethtool_ops *ops); -- cgit v1.2.3 From a8b06e9d40d8b18c41c8ce060e8dc004fa59e708 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 16 Apr 2021 12:27:42 -0700 Subject: ethtool: add interface to read RMON stats Most devices maintain RMON (RFC 2819) stats - particularly the "histogram" of packets received by size. Unlike other RFCs which duplicate IEEE stats, the short/oversized frame counters in RMON don't seem to match IEEE stats 1-to-1 either, so expose those, too. Do not expose basic packet, CRC errors etc - those are already otherwise covered. Because standard defines packet ranges only up to 1518, and everything above that should theoretically be "oversized" - devices often create their own ranges. Going beyond what the RFC defines - expose the "histogram" in the Tx direction (assume for now that the ranges will be the same). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 1ca6b836f9fe..e030f7510cd3 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -346,6 +346,44 @@ struct ethtool_fec_stats { } corrected_blocks, uncorrectable_blocks, corrected_bits; }; +/** + * struct ethtool_rmon_hist_range - byte range for histogram statistics + * @low: low bound of the bucket (inclusive) + * @high: high bound of the bucket (inclusive) + */ +struct ethtool_rmon_hist_range { + u16 low; + u16 high; +}; + +#define ETHTOOL_RMON_HIST_MAX 10 + +/** + * struct ethtool_rmon_stats - selected RMON (RFC 2819) statistics + * @undersize_pkts: Equivalent to `etherStatsUndersizePkts` from the RFC. + * @oversize_pkts: Equivalent to `etherStatsOversizePkts` from the RFC. + * @fragments: Equivalent to `etherStatsFragments` from the RFC. + * @jabbers: Equivalent to `etherStatsJabbers` from the RFC. + * @hist: Packet counter for packet length buckets (e.g. + * `etherStatsPkts128to255Octets` from the RFC). + * @hist_tx: Tx counters in similar form to @hist, not defined in the RFC. + * + * Selection of RMON (RFC 2819) statistics which are not exposed via different + * APIs, primarily the packet-length-based counters. + * Unfortunately different designs choose different buckets beyond + * the 1024B mark (jumbo frame teritory), so the definition of the bucket + * ranges is left to the driver. + */ +struct ethtool_rmon_stats { + u64 undersize_pkts; + u64 oversize_pkts; + u64 fragments; + u64 jabbers; + + u64 hist[ETHTOOL_RMON_HIST_MAX]; + u64 hist_tx[ETHTOOL_RMON_HIST_MAX]; +}; + #define ETH_MODULE_EEPROM_PAGE_LEN 128 #define ETH_MODULE_MAX_I2C_ADDRESS 0x7f @@ -534,6 +572,8 @@ struct ethtool_module_eeprom { * @get_eth_phy_stats: Query some of the IEEE 802.3 PHY statistics. * @get_eth_mac_stats: Query some of the IEEE 802.3 MAC statistics. * @get_eth_ctrl_stats: Query some of the IEEE 802.3 MAC Ctrl statistics. + * @get_rmon_stats: Query some of the RMON (RFC 2819) statistics. + * Set %ranges to a pointer to zero-terminated array of byte ranges. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must @@ -650,6 +690,9 @@ struct ethtool_ops { struct ethtool_eth_mac_stats *mac_stats); void (*get_eth_ctrl_stats)(struct net_device *dev, struct ethtool_eth_ctrl_stats *ctrl_stats); + void (*get_rmon_stats)(struct net_device *dev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges); }; int ethtool_check_ops(const struct ethtool_ops *ops); -- cgit v1.2.3 From 5f7c292b8975c9146063abbb91c0b9cdc1a5e9c5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 25 Mar 2021 19:19:47 -0700 Subject: KVM: Move prototypes for MMU notifier callbacks to generic code Move the prototypes for the MMU notifier callbacks out of arch code and into common code. There is no benefit to having each arch replicate the prototypes since any deviation from the invocation in common code will explode. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20210326021957.1424875-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1b65e7204344..e6d77353025c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -218,6 +218,14 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif +#ifdef KVM_ARCH_WANT_MMU_NOTIFIER +int kvm_unmap_hva_range(struct kvm *kvm, + unsigned long start, unsigned long end, unsigned flags); +int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); +#endif + enum { OUTSIDE_GUEST_MODE, IN_GUEST_MODE, -- cgit v1.2.3 From 6c9dd6d26216ad9733e57f382e1669c142494aab Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 2 Apr 2021 17:53:09 +0200 Subject: KVM: constify kvm_arch_flush_remote_tlbs_memslot memslots are stored in RCU and there should be no need to change them. Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e6d77353025c..34a974ffc882 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -894,7 +894,7 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot); #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - struct kvm_memory_slot *memslot); + const struct kvm_memory_slot *memslot); #else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, -- cgit v1.2.3 From 3039bcc744980afe87c612122e47a27306483bc2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 1 Apr 2021 17:56:50 -0700 Subject: KVM: Move x86's MMU notifier memslot walkers to generic code Move the hva->gfn lookup for MMU notifiers into common code. Every arch does a similar lookup, and some arch code is all but identical across multiple architectures. In addition to consolidating code, this will allow introducing optimizations that will benefit all architectures without incurring multiple walks of the memslots, e.g. by taking mmu_lock if and only if a relevant range exists in the memslots. The use of __always_inline to avoid indirect call retpolines, as done by x86, may also benefit other architectures. Consolidating the lookups also fixes a wart in x86, where the legacy MMU and TDP MMU each do their own memslot walks. Lastly, future enhancements to the memslot implementation, e.g. to add an interval tree to track host address, will need to touch far less arch specific code. MIPS, PPC, and arm64 will be converted one at a time in future patches. Signed-off-by: Sean Christopherson Message-Id: <20210402005658.3024832-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 34a974ffc882..0228436bc446 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -219,11 +219,25 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif #ifdef KVM_ARCH_WANT_MMU_NOTIFIER +#ifdef KVM_ARCH_WANT_NEW_MMU_NOTIFIER_APIS +struct kvm_gfn_range { + struct kvm_memory_slot *slot; + gfn_t start; + gfn_t end; + pte_t pte; + bool may_block; +}; +bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); +bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); +bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); +bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range); +#else int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, unsigned flags); int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); +#endif /* KVM_ARCH_WANT_NEW_MMU_NOTIFIER_APIS */ #endif enum { -- cgit v1.2.3 From b4c5936c47f86295cc76672e8dbeeca8b2379ba6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 1 Apr 2021 17:56:54 -0700 Subject: KVM: Kill off the old hva-based MMU notifier callbacks Yank out the hva-based MMU notifier APIs now that all architectures that use the notifiers have moved to the gfn-based APIs. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20210402005658.3024832-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0228436bc446..6b4dd9500d70 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -219,7 +219,6 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif #ifdef KVM_ARCH_WANT_MMU_NOTIFIER -#ifdef KVM_ARCH_WANT_NEW_MMU_NOTIFIER_APIS struct kvm_gfn_range { struct kvm_memory_slot *slot; gfn_t start; @@ -231,13 +230,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range); -#else -int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end, unsigned flags); -int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); -int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); -int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -#endif /* KVM_ARCH_WANT_NEW_MMU_NOTIFIER_APIS */ #endif enum { -- cgit v1.2.3 From 1df53d212c803c0e11a2b10d47ec830d3576b972 Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Fri, 16 Apr 2021 16:34:47 +0200 Subject: Drivers: hv: vmbus: Introduce and negotiate VMBus protocol version 5.3 Hyper-V has added VMBus protocol version 5.3. Allow Linux guests to negotiate the new version on version of Hyper-V that support it. Signed-off-by: Andrea Parri (Microsoft) Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210416143449.16185-2-parri.andrea@gmail.com Signed-off-by: Wei Liu --- include/linux/hyperv.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 2c18c8e768ef..3ce36bbb398e 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -234,6 +234,7 @@ static inline u32 hv_get_avail_to_write_percent( * 5 . 0 (Newer Windows 10) * 5 . 1 (Windows 10 RS4) * 5 . 2 (Windows Server 2019, RS5) + * 5 . 3 (Windows Server 2022) */ #define VERSION_WS2008 ((0 << 16) | (13)) @@ -245,6 +246,7 @@ static inline u32 hv_get_avail_to_write_percent( #define VERSION_WIN10_V5 ((5 << 16) | (0)) #define VERSION_WIN10_V5_1 ((5 << 16) | (1)) #define VERSION_WIN10_V5_2 ((5 << 16) | (2)) +#define VERSION_WIN10_V5_3 ((5 << 16) | (3)) /* Make maximum size of pipe payload of 16K */ #define MAX_PIPE_DATA_PAYLOAD (sizeof(u8) * 16384) -- cgit v1.2.3 From 870ced0548c895eb0bf8d982400344e5a9b2df00 Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Fri, 16 Apr 2021 16:34:48 +0200 Subject: Drivers: hv: vmbus: Drivers: hv: vmbus: Introduce CHANNELMSG_MODIFYCHANNEL_RESPONSE Introduce the CHANNELMSG_MODIFYCHANNEL_RESPONSE message type, and code to receive and process such a message. Signed-off-by: Andrea Parri (Microsoft) Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210416143449.16185-3-parri.andrea@gmail.com Signed-off-by: Wei Liu --- include/linux/hyperv.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 3ce36bbb398e..9c2373a1cb2d 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -477,6 +477,7 @@ enum vmbus_channel_message_type { CHANNELMSG_TL_CONNECT_REQUEST = 21, CHANNELMSG_MODIFYCHANNEL = 22, CHANNELMSG_TL_CONNECT_RESULT = 23, + CHANNELMSG_MODIFYCHANNEL_RESPONSE = 24, CHANNELMSG_COUNT }; @@ -590,6 +591,13 @@ struct vmbus_channel_open_result { u32 status; } __packed; +/* Modify Channel Result parameters */ +struct vmbus_channel_modifychannel_response { + struct vmbus_channel_message_header header; + u32 child_relid; + u32 status; +} __packed; + /* Close channel parameters; */ struct vmbus_channel_close_channel { struct vmbus_channel_message_header header; @@ -722,6 +730,7 @@ struct vmbus_channel_msginfo { struct vmbus_channel_gpadl_torndown gpadl_torndown; struct vmbus_channel_gpadl_created gpadl_created; struct vmbus_channel_version_response version_response; + struct vmbus_channel_modifychannel_response modify_response; } response; u32 msgsize; @@ -1596,7 +1605,7 @@ extern __u32 vmbus_proto_version; int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id, const guid_t *shv_host_servie_id); -int vmbus_send_modifychannel(u32 child_relid, u32 target_vp); +int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp); void vmbus_set_event(struct vmbus_channel *channel); /* Get the start of the ring buffer. */ -- cgit v1.2.3 From efce5b50bad8b63d07719318c34a664ccdb56b70 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Fri, 9 Apr 2021 12:40:26 +0300 Subject: ieee80211: add the values of ranging parameters max LTF total field Add an enum with the values of the ranging parameters max LTF total field, as defined in IEEE802.11az_D2.6, table Table 9-322h23fc. Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20210409123755.d2588ebb1974.I9424c8ade13c4c938cb9999d8ce99d0d4c1cc198@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 72ff75fb1971..25fc7bee868a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3861,4 +3861,11 @@ struct ieee80211_neighbor_ap_info { u8 channel; } __packed; +enum ieee80211_range_params_max_total_ltf { + IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_4 = 0, + IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_8, + IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_16, + IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_UNSPECIFIED, +}; + #endif /* LINUX_IEEE80211_H */ -- cgit v1.2.3 From 76cf42213307f0908e010ac4c2bdcb77113202dd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Apr 2021 12:40:17 +0300 Subject: wireless: align some HE capabilities with the spec Some names were changed, align that with the spec as of 802.11ax-D6.1. Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20210409123755.b1e5fbab0d8c.I3eb6076cb0714ec6aec6b8f9dee613ce4a05d825@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 25fc7bee868a..687db25eb85f 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2020,7 +2020,7 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG 0x01 #define IEEE80211_HE_MAC_CAP4_QTP 0x02 #define IEEE80211_HE_MAC_CAP4_BQR 0x04 -#define IEEE80211_HE_MAC_CAP4_SRP_RESP 0x08 +#define IEEE80211_HE_MAC_CAP4_PSR_RESP 0x08 #define IEEE80211_HE_MAC_CAP4_NDP_FB_REP 0x10 #define IEEE80211_HE_MAC_CAP4_OPS 0x20 #define IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU 0x40 @@ -2031,7 +2031,7 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B40 0x01 #define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B41 0x02 -#define IEEE80211_HE_MAC_CAP5_SUBCHAN_SELECVITE_TRANSMISSION 0x04 +#define IEEE80211_HE_MAC_CAP5_SUBCHAN_SELECTIVE_TRANSMISSION 0x04 #define IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU 0x08 #define IEEE80211_HE_MAC_CAP5_OM_CTRL_UL_MU_DATA_DIS_RX 0x10 #define IEEE80211_HE_MAC_CAP5_HE_DYNAMIC_SM_PS 0x20 @@ -2089,7 +2089,7 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_MASK 0x18 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_1 0x00 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_2 0x20 -#define IEEE80211_HE_PHY_CAP3_RX_HE_MU_PPDU_FROM_NON_AP_STA 0x40 +#define IEEE80211_HE_PHY_CAP3_RX_PARTIAL_BW_SU_IN_20MHZ_MU 0x40 #define IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER 0x80 #define IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE 0x01 @@ -2136,15 +2136,15 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_42_SU 0x01 #define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU 0x02 -#define IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMER_FB 0x04 -#define IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMER_FB 0x08 +#define IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMING_FB 0x04 +#define IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMING_PARTIAL_BW_FB 0x08 #define IEEE80211_HE_PHY_CAP6_TRIG_CQI_FB 0x10 #define IEEE80211_HE_PHY_CAP6_PARTIAL_BW_EXT_RANGE 0x20 #define IEEE80211_HE_PHY_CAP6_PARTIAL_BANDWIDTH_DL_MUMIMO 0x40 #define IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT 0x80 -#define IEEE80211_HE_PHY_CAP7_SRP_BASED_SR 0x01 -#define IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_AR 0x02 +#define IEEE80211_HE_PHY_CAP7_PSR_BASED_SR 0x01 +#define IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_SUPP 0x02 #define IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI 0x04 #define IEEE80211_HE_PHY_CAP7_MAX_NC_1 0x08 #define IEEE80211_HE_PHY_CAP7_MAX_NC_2 0x10 -- cgit v1.2.3 From 1f851b8dfd76a0e91560247802dd25a4754753c7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Apr 2021 12:40:20 +0300 Subject: wireless: align HE capabilities A-MPDU Length Exponent Extension The A-MPDU length exponent extension is defined differently in 802.11ax D6.1, align with that. Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20210409123755.c2a257d3e2df.I3455245d388c52c61dace7e7958dbed7e807cfb6@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 687db25eb85f..c74033aca726 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2006,17 +2006,15 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, * A-MDPU Length Exponent field in the HT capabilities, VHT capabilities and the * same field in the HE capabilities. */ -#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_USE_VHT 0x00 -#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_1 0x08 -#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2 0x10 -#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_RESERVED 0x18 +#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_0 0x00 +#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_1 0x08 +#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_2 0x10 +#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3 0x18 #define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK 0x18 #define IEEE80211_HE_MAC_CAP3_AMSDU_FRAG 0x20 #define IEEE80211_HE_MAC_CAP3_FLEX_TWT_SCHED 0x40 #define IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS 0x80 -#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_SHIFT 3 - #define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG 0x01 #define IEEE80211_HE_MAC_CAP4_QTP 0x02 #define IEEE80211_HE_MAC_CAP4_BQR 0x04 -- cgit v1.2.3 From 2f5164447cdab6419edddde3a214f93a53aa4e60 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Apr 2021 12:40:24 +0300 Subject: wireless: fix spelling of A-MSDU in HE capabilities In the HE capabilities, spell A-MSDU correctly, not "A-MDSU". Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20210409123755.9e6ff1af1181.If6868bc6902ccd9a95c74c78f716c4b41473ef14@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index c74033aca726..2967437f1b11 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2021,7 +2021,7 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_MAC_CAP4_PSR_RESP 0x08 #define IEEE80211_HE_MAC_CAP4_NDP_FB_REP 0x10 #define IEEE80211_HE_MAC_CAP4_OPS 0x20 -#define IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU 0x40 +#define IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU 0x40 /* Multi TID agg TX is split between byte #4 and #5 * The value is a combination of B39,B40,B41 */ -- cgit v1.2.3 From 8ca6f063b73d3754213d009efb3df486c8fe52d2 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Thu, 1 Apr 2021 16:37:24 -0700 Subject: KVM: x86/mmu: Re-add const qualifier in kvm_tdp_mmu_zap_collapsible_sptes kvm_tdp_mmu_zap_collapsible_sptes unnecessarily removes the const qualifier from its memlsot argument, leading to a compiler warning. Add the const annotation and pass it to subsequent functions. Signed-off-by: Ben Gardon Message-Id: <20210401233736.638171-2-bgardon@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6b4dd9500d70..d17e3ff1138d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1130,7 +1130,7 @@ __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn) } static inline unsigned long -__gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) +__gfn_to_hva_memslot(const struct kvm_memory_slot *slot, gfn_t gfn) { return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; } -- cgit v1.2.3 From 9591c3a34f7722bd77f42c98d76fd5a5bad465f0 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 22 Mar 2021 19:39:43 +0200 Subject: fs: introduce a wrapper uuid_to_fsid() Some filesystem's use a digest of their uuid for f_fsid. Create a simple wrapper for this open coded folding. Filesystems that have a non null uuid but use the block device number for f_fsid may also consider using this helper. [JK: Added missing asm/byteorder.h include] Link: https://lore.kernel.org/r/20210322173944.449469-2-amir73il@gmail.com Acked-by: Damien Le Moal Reviewed-by: Christian Brauner Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/statfs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/statfs.h b/include/linux/statfs.h index 20f695b90aab..02c862686ea3 100644 --- a/include/linux/statfs.h +++ b/include/linux/statfs.h @@ -4,6 +4,7 @@ #include #include +#include struct kstatfs { long f_type; @@ -50,4 +51,11 @@ static inline __kernel_fsid_t u64_to_fsid(u64 v) return (__kernel_fsid_t){.val = {(u32)v, (u32)(v>>32)}}; } +/* Fold 16 bytes uuid to 64 bit fsid */ +static inline __kernel_fsid_t uuid_to_fsid(__u8 *uuid) +{ + return u64_to_fsid(le64_to_cpup((void *)uuid) ^ + le64_to_cpup((void *)(uuid + sizeof(u64)))); +} + #endif -- cgit v1.2.3 From 32c0a6bcaa1f57e80f67bf3ae2b35d00cea8361a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 21 Mar 2021 21:03:11 +0000 Subject: btrfs: add and use readahead_batch_length Implement readahead_batch_length() to determine the number of bytes in the current batch of readahead pages and use it in btrfs. Also use the readahead_pos to get the offset. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/linux/pagemap.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 8c9947fd62f3..4c1ffe1d5dac 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -981,6 +981,15 @@ static inline unsigned int readahead_count(struct readahead_control *rac) return rac->_nr_pages; } +/** + * readahead_batch_length - The number of bytes in the current batch. + * @rac: The readahead request. + */ +static inline loff_t readahead_batch_length(struct readahead_control *rac) +{ + return rac->_batch_count * PAGE_SIZE; +} + static inline unsigned long dir_pages(struct inode *inode) { return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >> -- cgit v1.2.3 From 7a35693adcd38664b852ad10e3742782b3e87987 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 7 Apr 2021 14:25:22 +0100 Subject: dm: replace dm_vcalloc() Use kvcalloc or kvmalloc_array instead (depending whether zeroing is useful). Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 5c641f930caf..ff700fb6ce1d 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -574,11 +574,6 @@ struct dm_table *dm_swap_table(struct mapped_device *md, */ void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm); -/* - * A wrapper around vmalloc. - */ -void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); - /*----------------------------------------------------------------- * Macros. *---------------------------------------------------------------*/ -- cgit v1.2.3 From a9c81ccdf52dd73a20178c40bca34cf52991fdea Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 12 Apr 2021 07:30:57 -0700 Subject: perf/x86: Add structures for the attributes of Hybrid PMUs Hybrid PMUs have different events and formats. In theory, Hybrid PMU specific attributes should be maintained in the dedicated struct x86_hybrid_pmu, but it wastes space because the events and formats are similar among Hybrid PMUs. To reduce duplication, all hybrid PMUs will share a group of attributes in the following patch. To distinguish an attribute from different Hybrid PMUs, a PMU aware attribute structure is introduced. A PMU type is required for the attribute structure. The type is internal usage. It is not visible in the sysfs API. Hybrid PMUs may support the same event name, but with different event encoding, e.g., the mem-loads event on an Atom PMU has different event encoding from a Core PMU. It brings issue if two attributes are created for them. Current sysfs_update_group finds an attribute by searching the attr name (aka event name). If two attributes have the same event name, the first attribute will be replaced. To address the issue, only one attribute is created for the event. The event_str is extended and stores event encodings from all Hybrid PMUs. Each event encoding is divided by ";". The order of the event encodings must follow the order of the hybrid PMU index. The event_str is internal usage as well. When a user wants to show the attribute of a Hybrid PMU, only the corresponding part of the string is displayed. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Link: https://lkml.kernel.org/r/1618237865-33448-18-git-send-email-kan.liang@linux.intel.com --- include/linux/perf_event.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8989b2b7268d..61b385154c4a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1549,6 +1549,18 @@ struct perf_pmu_events_ht_attr { const char *event_str_noht; }; +struct perf_pmu_events_hybrid_attr { + struct device_attribute attr; + u64 id; + const char *event_str; + u64 pmu_type; +}; + +struct perf_pmu_format_hybrid_attr { + struct device_attribute attr; + u64 pmu_type; +}; + ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); -- cgit v1.2.3 From 55bcf6ef314ae8ba81bcd74aa760247b635ed47b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 12 Apr 2021 07:31:01 -0700 Subject: perf: Extend PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE Current Hardware events and Hardware cache events have special perf types, PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE. The two types don't pass the PMU type in the user interface. For a hybrid system, the perf subsystem doesn't know which PMU the events belong to. The first capable PMU will always be assigned to the events. The events never get a chance to run on the other capable PMUs. Extend the two types to become PMU aware types. The PMU type ID is stored at attr.config[63:32]. Add a new PMU capability, PERF_PMU_CAP_EXTENDED_HW_TYPE, to indicate a PMU which supports the extended PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE. The PMU type is only required when searching a specific PMU. The PMU specific codes will only be interested in the 'real' config value, which is stored in the low 32 bit of the event->attr.config. Update the event->attr.config in the generic code, so the PMU specific codes don't need to calculate it separately. If a user specifies a PMU type, but the PMU doesn't support the extended type, error out. If an event cannot be initialized in a PMU specified by a user, error out immediately. Perf should not try to open it on other PMUs. The new PMU capability is only set for the X86 hybrid PMUs for now. Other architectures, e.g., ARM, may need it as well. The support on ARM may be implemented later separately. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1618237865-33448-22-git-send-email-kan.liang@linux.intel.com --- include/linux/perf_event.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 61b385154c4a..a763928a0e41 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -260,15 +260,16 @@ struct perf_event; /** * pmu::capabilities flags */ -#define PERF_PMU_CAP_NO_INTERRUPT 0x01 -#define PERF_PMU_CAP_NO_NMI 0x02 -#define PERF_PMU_CAP_AUX_NO_SG 0x04 -#define PERF_PMU_CAP_EXTENDED_REGS 0x08 -#define PERF_PMU_CAP_EXCLUSIVE 0x10 -#define PERF_PMU_CAP_ITRACE 0x20 -#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40 -#define PERF_PMU_CAP_NO_EXCLUDE 0x80 -#define PERF_PMU_CAP_AUX_OUTPUT 0x100 +#define PERF_PMU_CAP_NO_INTERRUPT 0x0001 +#define PERF_PMU_CAP_NO_NMI 0x0002 +#define PERF_PMU_CAP_AUX_NO_SG 0x0004 +#define PERF_PMU_CAP_EXTENDED_REGS 0x0008 +#define PERF_PMU_CAP_EXCLUSIVE 0x0010 +#define PERF_PMU_CAP_ITRACE 0x0020 +#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x0040 +#define PERF_PMU_CAP_NO_EXCLUDE 0x0080 +#define PERF_PMU_CAP_AUX_OUTPUT 0x0100 +#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0200 struct perf_output_handle; -- cgit v1.2.3 From d9c9e4db186ab4d81f84e6f22b225d333b9424e3 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Mon, 19 Apr 2021 17:52:38 +0200 Subject: bpf: Factorize bpf_trace_printk and bpf_seq_printf Two helpers (trace_printk and seq_printf) have very similar implementations of format string parsing and a third one is coming (snprintf). To avoid code duplication and make the code easier to maintain, this moves the operations associated with format string parsing (validation and argument sanitization) into one generic function. The implementation of the two existing helpers already drifted quite a bit so unifying them entailed a lot of changes: - bpf_trace_printk always expected fmt[fmt_size] to be the terminating NULL character, this is no longer true, the first 0 is terminating. - bpf_trace_printk now supports %% (which produces the percentage char). - bpf_trace_printk now skips width formating fields. - bpf_trace_printk now supports the X modifier (capital hexadecimal). - bpf_trace_printk now supports %pK, %px, %pB, %pi4, %pI4, %pi6 and %pI6 - argument casting on 32 bit has been simplified into one macro and using an enum instead of obscure int increments. - bpf_seq_printf now uses bpf_trace_copy_string instead of strncpy_from_kernel_nofault and handles the %pks %pus specifiers. - bpf_seq_printf now prints longs correctly on 32 bit architectures. - both were changed to use a global per-cpu tmp buffer instead of one stack buffer for trace_printk and 6 small buffers for seq_printf. - to avoid per-cpu buffer usage conflict, these helpers disable preemption while the per-cpu buffer is in use. - both helpers now support the %ps and %pS specifiers to print symbols. The implementation is also moved from bpf_trace.c to helpers.c because the upcoming bpf_snprintf helper will be made available to all BPF programs and will need it. Signed-off-by: Florent Revest Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210419155243.1632274-2-revest@chromium.org --- include/linux/bpf.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ff8cd68c01b3..77d1d8c65b81 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2077,4 +2077,24 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); +enum bpf_printf_mod_type { + BPF_PRINTF_INT, + BPF_PRINTF_LONG, + BPF_PRINTF_LONG_LONG, +}; + +/* Workaround for getting va_list handling working with different argument type + * combinations generically for 32 and 64 bit archs. + */ +#define BPF_CAST_FMT_ARG(arg_nb, args, mod) \ + (mod[arg_nb] == BPF_PRINTF_LONG_LONG || \ + (mod[arg_nb] == BPF_PRINTF_LONG && __BITS_PER_LONG == 64) \ + ? (u64)args[arg_nb] \ + : (u32)args[arg_nb]) + +int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, + u64 *final_args, enum bpf_printf_mod_type *mod, + u32 num_args); +void bpf_printf_cleanup(void); + #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From fff13c4bb646ef849fd74ced87eef54340d28c21 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Mon, 19 Apr 2021 17:52:39 +0200 Subject: bpf: Add a ARG_PTR_TO_CONST_STR argument type This type provides the guarantee that an argument is going to be a const pointer to somewhere in a read-only map value. It also checks that this pointer is followed by a zero character before the end of the map value. Signed-off-by: Florent Revest Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210419155243.1632274-3-revest@chromium.org --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 77d1d8c65b81..c160526fc8bf 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -309,6 +309,7 @@ enum bpf_arg_type { ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ + ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ __BPF_ARG_TYPE_MAX, }; -- cgit v1.2.3 From 7b15523a989b63927c2bb08e9b5b0bbc10b58bef Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Mon, 19 Apr 2021 17:52:40 +0200 Subject: bpf: Add a bpf_snprintf helper The implementation takes inspiration from the existing bpf_trace_printk helper but there are a few differences: To allow for a large number of format-specifiers, parameters are provided in an array, like in bpf_seq_printf. Because the output string takes two arguments and the array of parameters also takes two arguments, the format string needs to fit in one argument. Thankfully, ARG_PTR_TO_CONST_STR is guaranteed to point to a zero-terminated read-only map so we don't need a format string length arg. Because the format-string is known at verification time, we also do a first pass of format string validation in the verifier logic. This makes debugging easier. Signed-off-by: Florent Revest Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210419155243.1632274-4-revest@chromium.org --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c160526fc8bf..f8a45f109e96 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1953,6 +1953,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; +extern const struct bpf_func_proto bpf_snprintf_proto; extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; -- cgit v1.2.3 From c6400e3fc3fa821a26a58cf867331e0877a4c56b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 17 Apr 2021 14:38:07 +0300 Subject: netlink: simplify nl_set_extack_cookie_u64(), nl_set_extack_cookie_u32() Taking address of a function argument directly works just fine. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/netlink.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 0bcf98098c5a..61b1c7fcc401 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -129,23 +129,19 @@ struct netlink_ext_ack { static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, u64 cookie) { - u64 __cookie = cookie; - if (!extack) return; - memcpy(extack->cookie, &__cookie, sizeof(__cookie)); - extack->cookie_len = sizeof(__cookie); + memcpy(extack->cookie, &cookie, sizeof(cookie)); + extack->cookie_len = sizeof(cookie); } static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack, u32 cookie) { - u32 __cookie = cookie; - if (!extack) return; - memcpy(extack->cookie, &__cookie, sizeof(__cookie)); - extack->cookie_len = sizeof(__cookie); + memcpy(extack->cookie, &cookie, sizeof(cookie)); + extack->cookie_len = sizeof(cookie); } void netlink_kernel_release(struct sock *sk); -- cgit v1.2.3 From da702f34e3cc4b6b87ed2d63c17d65d841fa81c6 Mon Sep 17 00:00:00 2001 From: "Radu Pirea (NXP OSS)" Date: Mon, 19 Apr 2021 19:13:59 +0300 Subject: net: phy: add genphy_c45_pma_suspend/resume Add generic PMA suspend and resume callback functions for C45 PHYs. Signed-off-by: Radu Pirea (NXP OSS) Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 98fb441dd72e..e3d4d583463b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1535,6 +1535,8 @@ int genphy_c45_pma_read_abilities(struct phy_device *phydev); int genphy_c45_read_status(struct phy_device *phydev); int genphy_c45_config_aneg(struct phy_device *phydev); int genphy_c45_loopback(struct phy_device *phydev, bool enable); +int genphy_c45_pma_resume(struct phy_device *phydev); +int genphy_c45_pma_suspend(struct phy_device *phydev); /* Generic C45 PHY driver */ extern struct phy_driver genphy_c45_driver; -- cgit v1.2.3 From 6980ffa0c5a8e65d53ff803d2cafdba3e2022714 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 21 Jun 2020 21:35:34 +0300 Subject: net/mlx5e: RX, Add checks for calculated Striding RQ attributes Striding RQ attributes below are mutually dependent. An unaware change to one might take the others out of the valid range derived by the HW caps: - The MPWQE size in bytes - The number of strides in a MPWQE - The stride size Add checks to verify they are valid and comply to the HW spec and SW assumptions/requirements. This is not a fix, no particular issue exists today. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 92a029a800a0..578c4ccae91c 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -911,8 +911,11 @@ static inline u16 get_cqe_flow_tag(struct mlx5_cqe64 *cqe) return be32_to_cpu(cqe->sop_drop_qpn) & 0xFFF; } -#define MLX5_MPWQE_LOG_NUM_STRIDES_BASE (9) -#define MLX5_MPWQE_LOG_STRIDE_SZ_BASE (6) +#define MLX5_MPWQE_LOG_NUM_STRIDES_EXT_BASE 3 +#define MLX5_MPWQE_LOG_NUM_STRIDES_BASE 9 +#define MLX5_MPWQE_LOG_NUM_STRIDES_MAX 16 +#define MLX5_MPWQE_LOG_STRIDE_SZ_BASE 6 +#define MLX5_MPWQE_LOG_STRIDE_SZ_MAX 13 struct mpwrq_cqe_bc { __be16 filler_consumed_strides; -- cgit v1.2.3 From 704cfecdd03d7b84403ed96ba0009ea07270e74e Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 28 Feb 2021 23:48:27 +0200 Subject: net/mlx5: mlx5_ifc updates for flex parser Added the required definitions for supporting more protocols by flex parsers (GTP-U, Geneve TLV options), and for using the right flex parser that was configured for this protocol. Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f2c51d6833c6..aa6effe1dd6d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -622,7 +622,19 @@ struct mlx5_ifc_fte_match_set_misc3_bits { u8 geneve_tlv_option_0_data[0x20]; - u8 reserved_at_140[0xc0]; + u8 gtpu_teid[0x20]; + + u8 gtpu_msg_type[0x8]; + u8 gtpu_msg_flags[0x8]; + u8 reserved_at_170[0x10]; + + u8 gtpu_dw_2[0x20]; + + u8 gtpu_first_ext_dw_0[0x20]; + + u8 gtpu_dw_0[0x20]; + + u8 reserved_at_1e0[0x20]; }; struct mlx5_ifc_fte_match_set_misc4_bits { @@ -1237,9 +1249,17 @@ enum { enum { MLX5_FLEX_PARSER_GENEVE_ENABLED = 1 << 3, + MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED = 1 << 4, + mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED = 1 << 5, MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED = 1 << 7, MLX5_FLEX_PARSER_ICMP_V4_ENABLED = 1 << 8, MLX5_FLEX_PARSER_ICMP_V6_ENABLED = 1 << 9, + MLX5_FLEX_PARSER_GENEVE_TLV_OPTION_0_ENABLED = 1 << 10, + MLX5_FLEX_PARSER_GTPU_ENABLED = 1 << 11, + MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED = 1 << 16, + MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED = 1 << 17, + MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED = 1 << 18, + MLX5_FLEX_PARSER_GTPU_TEID_ENABLED = 1 << 19, }; enum { @@ -1637,7 +1657,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cqe_compression_timeout[0x10]; u8 cqe_compression_max_num[0x10]; - u8 reserved_at_5e0[0x10]; + u8 reserved_at_5e0[0x8]; + u8 flex_parser_id_gtpu_dw_0[0x4]; + u8 reserved_at_5ec[0x4]; u8 tag_matching[0x1]; u8 rndv_offload_rc[0x1]; u8 rndv_offload_dc[0x1]; @@ -1648,7 +1670,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 affiliate_nic_vport_criteria[0x8]; u8 native_port_num[0x8]; u8 num_vhca_ports[0x8]; - u8 reserved_at_618[0x6]; + u8 flex_parser_id_gtpu_teid[0x4]; + u8 reserved_at_61c[0x2]; u8 sw_owner_id[0x1]; u8 reserved_at_61f[0x1]; @@ -1683,7 +1706,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_6e0[0x10]; u8 sf_base_id[0x10]; - u8 reserved_at_700[0x8]; + u8 flex_parser_id_gtpu_dw_2[0x4]; + u8 flex_parser_id_gtpu_first_ext_dw_0[0x4]; u8 num_total_dynamic_vf_msix[0x18]; u8 reserved_at_720[0x14]; u8 dynamic_msix_table_size[0xc]; -- cgit v1.2.3 From 7304d603a57a1edecfecfbcc26f85edcda4cae81 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Mon, 2 Nov 2020 23:57:13 +0200 Subject: net/mlx5: DR, Add support for force-loopback QP When supported by the device, SW steering RoCE RC QP that is used to write/read to/from ICM will be created with force-loopback attribute. Such QP doesn't require GID index upon creation. Signed-off-by: Erez Shitrit Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index aa6effe1dd6d..4d9569c4b96c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -961,7 +961,9 @@ struct mlx5_ifc_roce_cap_bits { u8 roce_apm[0x1]; u8 reserved_at_1[0x3]; u8 sw_r_roce_src_udp_port[0x1]; - u8 reserved_at_5[0x19]; + u8 fl_rc_qp_when_roce_disabled[0x1]; + u8 fl_rc_qp_when_roce_enabled[0x1]; + u8 reserved_at_7[0x17]; u8 qp_ts_format[0x2]; u8 reserved_at_20[0x60]; @@ -2942,7 +2944,8 @@ struct mlx5_ifc_qpc_bits { u8 state[0x4]; u8 lag_tx_port_affinity[0x4]; u8 st[0x8]; - u8 reserved_at_10[0x3]; + u8 reserved_at_10[0x2]; + u8 isolate_vl_tc[0x1]; u8 pm_state[0x2]; u8 reserved_at_15[0x1]; u8 req_e2e_credit_mode[0x2]; -- cgit v1.2.3 From aeacb52a8de7046be5399ba311f49bce96e1b269 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 3 Nov 2020 01:31:53 +0200 Subject: net/mlx5: DR, Add support for isolate_vl_tc QP When using SW steering, rule insertion rate depends on the RDMA RC QP performance used for writing to the ICM. During stress this QP is competing on the HW resources with all the other QPs that are used to send data. To protect SW steering QP's performance in such cases, we set this QP to use isolated VL. The VL number is reserved by FW and is not exposed to the driver. Support for this QP on isolated VL exists only when both force-loopback and isolate_vl_tc capabilities are set. Signed-off-by: Alex Vesker Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4d9569c4b96c..52b7cabcde08 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1319,7 +1319,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_srq_sz[0x8]; u8 log_max_qp_sz[0x8]; u8 event_cap[0x1]; - u8 reserved_at_91[0x7]; + u8 reserved_at_91[0x2]; + u8 isolate_vl_tc_new[0x1]; + u8 reserved_at_94[0x4]; u8 prio_tag_required[0x1]; u8 reserved_at_99[0x2]; u8 log_max_qp[0x5]; -- cgit v1.2.3 From 5d3c4c79384af06e3c8e25b7770b6247496b4417 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 12 Apr 2021 15:20:49 -0700 Subject: KVM: Stop looking for coalesced MMIO zones if the bus is destroyed Abort the walk of coalesced MMIO zones if kvm_io_bus_unregister_dev() fails to allocate memory for the new instance of the bus. If it can't instantiate a new bus, unregister_dev() destroys all devices _except_ the target device. But, it doesn't tell the caller that it obliterated the bus and invoked the destructor for all devices that were on the bus. In the coalesced MMIO case, this can result in a deleted list entry dereference due to attempting to continue iterating on coalesced_zones after future entries (in the walk) have been deleted. Opportunistically add curly braces to the for-loop, which encompasses many lines but sneaks by without braces due to the guts being a single if statement. Fixes: f65886606c2d ("KVM: fix memory leak in kvm_io_bus_unregister_dev()") Cc: stable@vger.kernel.org Reported-by: Hao Sun Signed-off-by: Sean Christopherson Message-Id: <20210412222050.876100-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d17e3ff1138d..82b066db37cb 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -192,8 +192,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, void *val); int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev); -void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_io_device *dev); +int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_io_device *dev); struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr); -- cgit v1.2.3 From 3a05d08f6cc75b74079290c33d6127b2857226fa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 30 Mar 2021 16:11:38 +0100 Subject: PCI/MSI: Drop use of msi_controller from core code As there is no driver using msi_controller, we can now safely remove its use from the PCI probe code. Link: https://lore.kernel.org/r/20210330151145.997953-8-maz@kernel.org Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Acked-by: Bjorn Helgaas --- include/linux/pci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 86c799c97b77..ebf557e59d87 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -540,7 +540,6 @@ struct pci_host_bridge { int (*map_irq)(const struct pci_dev *, u8, u8); void (*release_fn)(struct pci_host_bridge *); void *release_data; - struct msi_controller *msi; unsigned int ignore_reset_delay:1; /* For entire hierarchy */ unsigned int no_ext_tags:1; /* No Extended Tags */ unsigned int native_aer:1; /* OS may use PCIe AER */ @@ -621,7 +620,6 @@ struct pci_bus { struct resource busn_res; /* Bus numbers routed to this bus */ struct pci_ops *ops; /* Configuration access functions */ - struct msi_controller *msi; /* MSI controller */ void *sysdata; /* Hook for sys-specific extension */ struct proc_dir_entry *procdir; /* Directory entry in /proc/bus/pci */ -- cgit v1.2.3 From b227be0d7314d0869d4e28c199ac1fc7075cf06e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 30 Mar 2021 16:11:39 +0100 Subject: PCI/MSI: Kill msi_controller structure msi_controller had a good, long life as the abstraction for a driver providing MSIs to PCI devices. But it has been replaced in all drivers by the more expressive generic MSI framework. Farewell, struct msi_controller. Link: https://lore.kernel.org/r/20210330151145.997953-9-maz@kernel.org Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Acked-by: Bjorn Helgaas --- include/linux/msi.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index aef35fd1cf11..3f21e77b57b7 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -240,8 +240,7 @@ void pci_msi_unmask_irq(struct irq_data *data); /* * The arch hooks to setup up msi irqs. Default functions are implemented * as weak symbols so that they /can/ be overriden by architecture specific - * code if needed. These hooks must be enabled by the architecture or by - * drivers which depend on them via msi_controller based MSI handling. + * code if needed. These hooks can only be enabled by the architecture. * * If CONFIG_PCI_MSI_ARCH_FALLBACKS is not selected they are replaced by * stubs with warnings. @@ -272,19 +271,6 @@ static inline void arch_teardown_msi_irqs(struct pci_dev *dev) void arch_restore_msi_irqs(struct pci_dev *dev); void default_restore_msi_irqs(struct pci_dev *dev); -struct msi_controller { - struct module *owner; - struct device *dev; - struct device_node *of_node; - struct list_head list; - - int (*setup_irq)(struct msi_controller *chip, struct pci_dev *dev, - struct msi_desc *desc); - int (*setup_irqs)(struct msi_controller *chip, struct pci_dev *dev, - int nvec, int type); - void (*teardown_irq)(struct msi_controller *chip, unsigned int irq); -}; - #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN #include -- cgit v1.2.3 From f8bcf249d9cf292c6ceb3d9f5bd90815090f5286 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 30 Mar 2021 16:11:40 +0100 Subject: PCI/MSI: Kill default_teardown_msi_irqs() It doesn't have any caller left. Link: https://lore.kernel.org/r/20210330151145.997953-10-maz@kernel.org Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Acked-by: Bjorn Helgaas --- include/linux/msi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index 3f21e77b57b7..6aff469e511d 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -250,7 +250,6 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc); void arch_teardown_msi_irq(unsigned int irq); int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void arch_teardown_msi_irqs(struct pci_dev *dev); -void default_teardown_msi_irqs(struct pci_dev *dev); #else static inline int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { -- cgit v1.2.3 From 94e89b145371b68fa0ea294855adebcd03e0522e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 30 Mar 2021 16:11:41 +0100 Subject: PCI/MSI: Let PCI host bridges declare their reliance on MSI domains There is a whole class of host bridges that cannot know whether MSIs will be provided or not, as they rely on other blocks to provide the MSI functionnality, using MSI domains. This is the case for example on systems that use the ARM GIC architecture. Introduce a new attribute ('msi_domain') indicating that implicit dependency, and use this property to set the NO_MSI flag when no MSI domain is found at probe time. Link: https://lore.kernel.org/r/20210330151145.997953-11-maz@kernel.org Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Acked-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index ebf557e59d87..ede0aef2cfd4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -550,6 +550,7 @@ struct pci_host_bridge { unsigned int native_dpc:1; /* OS may use PCIe DPC */ unsigned int preserve_config:1; /* Preserve FW resource setup */ unsigned int size_windows:1; /* Enable root bus sizing */ + unsigned int msi_domain:1; /* Bridge wants MSI domain */ /* Resource alignment requirements */ resource_size_t (*align_resource)(struct pci_dev *dev, -- cgit v1.2.3 From b0c3d9354de1f87eebc00694d5218b6611265933 Mon Sep 17 00:00:00 2001 From: Matthew Gerlach Date: Fri, 16 Apr 2021 09:57:19 -0700 Subject: spi: altera: separate core code from platform code In preparation of adding support for a new bus type, separate the core spi-altera code from the platform driver code. Signed-off-by: Matthew Gerlach Link: https://lore.kernel.org/r/20210416165720.554144-2-matthew.gerlach@linux.intel.com Signed-off-by: Mark Brown --- include/linux/spi/altera.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/altera.h b/include/linux/spi/altera.h index 2d42641499a6..2e2a622e56da 100644 --- a/include/linux/spi/altera.h +++ b/include/linux/spi/altera.h @@ -5,10 +5,13 @@ #ifndef __LINUX_SPI_ALTERA_H #define __LINUX_SPI_ALTERA_H +#include #include #include #include +#define ALTERA_SPI_MAX_CS 32 + /** * struct altera_spi_platform_data - Platform data of the Altera SPI driver * @mode_bits: Mode bits of SPI master. @@ -26,4 +29,22 @@ struct altera_spi_platform_data { struct spi_board_info *devices; }; +struct altera_spi { + int irq; + int len; + int count; + int bytes_per_word; + u32 imr; + + /* data buffers */ + const unsigned char *tx; + unsigned char *rx; + + struct regmap *regmap; + u32 regoff; + struct device *dev; +}; + +extern irqreturn_t altera_spi_irq(int irq, void *dev); +extern void altera_spi_init_master(struct spi_master *master); #endif /* __LINUX_SPI_ALTERA_H */ -- cgit v1.2.3 From 67880f1bc342ed4c94e72cad7f8ca76e5121aae3 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Tue, 20 Apr 2021 10:16:11 -0700 Subject: platform/chrome: cros_ec: Add Type C hard reset Update the EC command header to include the new event bit. This bit is included in the latest version of the Chrome EC headers[1]. [1] https://chromium.googlesource.com/chromiumos/platform/ec/+/refs/heads/main/include/ec_commands.h Signed-off-by: Prashant Malani Signed-off-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/20210420171617.3830902-1-pmalani@chromium.org --- include/linux/platform_data/cros_ec_commands.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 5ff8597ceabd..9156078c6fc6 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -5678,6 +5678,7 @@ enum tcpc_cc_polarity { #define PD_STATUS_EVENT_SOP_DISC_DONE BIT(0) #define PD_STATUS_EVENT_SOP_PRIME_DISC_DONE BIT(1) +#define PD_STATUS_EVENT_HARD_RESET BIT(2) struct ec_params_typec_status { uint8_t port; -- cgit v1.2.3 From db2e718a47984b9d71ed890eb2ea36ecf150de18 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Tue, 20 Apr 2021 08:43:34 -0500 Subject: capabilities: require CAP_SETFCAP to map uid 0 cap_setfcap is required to create file capabilities. Since commit 8db6c34f1dbc ("Introduce v3 namespaced file capabilities"), a process running as uid 0 but without cap_setfcap is able to work around this as follows: unshare a new user namespace which maps parent uid 0 into the child namespace. While this task will not have new capabilities against the parent namespace, there is a loophole due to the way namespaced file capabilities are represented as xattrs. File capabilities valid in userns 1 are distinguished from file capabilities valid in userns 2 by the kuid which underlies uid 0. Therefore the restricted root process can unshare a new self-mapping namespace, add a namespaced file capability onto a file, then use that file capability in the parent namespace. To prevent that, do not allow mapping parent uid 0 if the process which opened the uid_map file does not have CAP_SETFCAP, which is the capability for setting file capabilities. As a further wrinkle: a task can unshare its user namespace, then open its uid_map file itself, and map (only) its own uid. In this case we do not have the credential from before unshare, which was potentially more restricted. So, when creating a user namespace, we record whether the creator had CAP_SETFCAP. Then we can use that during map_write(). With this patch: 1. Unprivileged user can still unshare -Ur ubuntu@caps:~$ unshare -Ur root@caps:~# logout 2. Root user can still unshare -Ur ubuntu@caps:~$ sudo bash root@caps:/home/ubuntu# unshare -Ur root@caps:/home/ubuntu# logout 3. Root user without CAP_SETFCAP cannot unshare -Ur: root@caps:/home/ubuntu# /sbin/capsh --drop=cap_setfcap -- root@caps:/home/ubuntu# /sbin/setcap cap_setfcap=p /sbin/setcap unable to set CAP_SETFCAP effective capability: Operation not permitted root@caps:/home/ubuntu# unshare -Ur unshare: write failed /proc/self/uid_map: Operation not permitted Note: an alternative solution would be to allow uid 0 mappings by processes without CAP_SETFCAP, but to prevent such a namespace from writing any file capabilities. This approach can be seen at [1]. Background history: commit 95ebabde382 ("capabilities: Don't allow writing ambiguous v3 file capabilities") tried to fix the issue by preventing v3 fscaps to be written to disk when the root uid would map to the same uid in nested user namespaces. This led to regressions for various workloads. For example, see [2]. Ultimately this is a valid use-case we have to support meaning we had to revert this change in 3b0c2d3eaa83 ("Revert 95ebabde382c ("capabilities: Don't allow writing ambiguous v3 file capabilities")"). Link: https://git.kernel.org/pub/scm/linux/kernel/git/sergeh/linux.git/log/?h=2021-04-15/setfcap-nsfscaps-v4 [1] Link: https://github.com/containers/buildah/issues/3071 [2] Signed-off-by: Serge Hallyn Reviewed-by: Andrew G. Morgan Tested-by: Christian Brauner Reviewed-by: Christian Brauner Tested-by: Giuseppe Scrivano Cc: Eric Biederman Signed-off-by: Linus Torvalds --- include/linux/user_namespace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 64cf8ebdc4ec..f6c5f784be5a 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -63,6 +63,9 @@ struct user_namespace { kgid_t group; struct ns_common ns; unsigned long flags; + /* parent_could_setfcap: true if the creator if this ns had CAP_SETFCAP + * in its effective capability set at the child ns creation time. */ + bool parent_could_setfcap; #ifdef CONFIG_KEYS /* List of joinable keyrings in this namespace. Modification access of -- cgit v1.2.3 From 014068dcb5b17dae110354c4de241833124edba1 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 19 Apr 2021 15:01:02 +0200 Subject: net: phy: genphy_loopback: add link speed configuration In case of loopback, in most cases we need to disable autoneg support and force some speed configuration. Otherwise, depending on currently active auto negotiated link speed, the loopback may or may not work. This patch was tested with following PHYs: TJA1102, KSZ8081, KSZ9031, AT8035, AR9331. Signed-off-by: Oleksij Rempel Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index e3d4d583463b..60d2b26026a2 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1410,6 +1410,7 @@ void phy_disconnect(struct phy_device *phydev); void phy_detach(struct phy_device *phydev); void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); +int phy_config_aneg(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); int phy_speed_down(struct phy_device *phydev, bool sync); -- cgit v1.2.3 From a978f7c479ea68d68a6267a37cbd44362bdd9811 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Tue, 20 Apr 2021 09:54:03 +0200 Subject: net: phy: marvell: add support for Amethyst internal PHY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for Amethyst internal PHY. The only difference from Peridot is HWMON. Signed-off-by: Marek Behún Signed-off-by: David S. Miller --- include/linux/marvell_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index f61d82c53f30..acee44b9db26 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -39,6 +39,7 @@ */ #define MARVELL_PHY_ID_88E6341_FAMILY 0x01410f41 #define MARVELL_PHY_ID_88E6390_FAMILY 0x01410f90 +#define MARVELL_PHY_ID_88E6393_FAMILY 0x002b0b9b #define MARVELL_PHY_FAMILY_ID(id) ((id) >> 4) -- cgit v1.2.3 From d99f2487e1de23a2e902d1a359a85a48bfd21fe7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Apr 2021 07:48:43 -0400 Subject: NFS: The 'fattr_valid' field in struct nfs_server should be unsigned int Fix up a static compiler warning: "fs/nfs/nfs4proc.c:3882 _nfs4_server_capabilities() warn: was expecting a 64 bit value instead of '(1 << 11)'" The fix is to convert the fattr_valid field to match the type of the 'valid' field in struct nfs_fattr. Reported-by: Dan Carpenter Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d28d7a62864f..70057b2e606e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -156,6 +156,7 @@ struct nfs_server { #define NFS_MOUNT_WRITE_EAGER 0x01000000 #define NFS_MOUNT_WRITE_WAIT 0x02000000 + unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ @@ -191,8 +192,6 @@ struct nfs_server { dev_t s_dev; /* superblock dev numbers */ struct nfs_auth_info auth_info; /* parsed auth flavors */ - __u64 fattr_valid; /* Valid attributes */ - #ifdef CONFIG_NFS_FSCACHE struct nfs_fscache_key *fscache_key; /* unique key for superblock */ struct fscache_cookie *fscache; /* superblock cookie */ -- cgit v1.2.3 From c006fac556e401a62054d065da168099ea5a5b10 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Fri, 16 Apr 2021 14:29:36 -0700 Subject: sched: Warn on long periods of pending need_resched CPU scheduler marks need_resched flag to signal a schedule() on a particular CPU. But, schedule() may not happen immediately in cases where the current task is executing in the kernel mode (no preemption state) for extended periods of time. This patch adds a warn_on if need_resched is pending for more than the time specified in sysctl resched_latency_warn_ms. If it goes off, it is likely that there is a missing cond_resched() somewhere. Monitoring is done via the tick and the accuracy is hence limited to jiffy scale. This also means that we won't trigger the warning if the tick is disabled. This feature (LATENCY_WARN) is default disabled. Signed-off-by: Paul Turner Signed-off-by: Josh Don Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210416212936.390566-1-joshdon@google.com --- include/linux/sched/sysctl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 0a3f34638cf5..db2c0f34aaaf 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -48,6 +48,9 @@ extern unsigned int sysctl_numa_balancing_scan_size; #ifdef CONFIG_SCHED_DEBUG extern __read_mostly unsigned int sysctl_sched_migration_cost; extern __read_mostly unsigned int sysctl_sched_nr_migrate; + +extern int sysctl_resched_latency_warn_ms; +extern int sysctl_resched_latency_warn_once; #endif /* -- cgit v1.2.3 From ddd8d94ca31e768c76cf8bfe34ba7b10136b3694 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 17 Apr 2021 11:38:39 +0300 Subject: gpio: omap: Save and restore sysconfig As we are using cpu_pm to save and restore context, we must also save and restore the GPIO sysconfig register. This is needed because we are not calling PM runtime functions at all with cpu_pm. We need to save the sysconfig on idle as it's value can get reconfigured by PM runtime and can be different from the init time value. Device specific flags like "ti,no-idle-on-init" can affect the init value. Fixes: b764a5863fd8 ("gpio: omap: Remove custom PM calls and use cpu_pm instead") Cc: Aaro Koskinen Cc: Adam Ford Cc: Andreas Kemnade Cc: Grygorii Strashko Cc: Peter Ujfalusi Signed-off-by: Tony Lindgren Acked-by: Grygorii Strashko Signed-off-by: Bartosz Golaszewski --- include/linux/platform_data/gpio-omap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h index 8b30b14b47d3..f377817ce75c 100644 --- a/include/linux/platform_data/gpio-omap.h +++ b/include/linux/platform_data/gpio-omap.h @@ -85,6 +85,7 @@ * omap2+ specific GPIO registers */ #define OMAP24XX_GPIO_REVISION 0x0000 +#define OMAP24XX_GPIO_SYSCONFIG 0x0010 #define OMAP24XX_GPIO_IRQSTATUS1 0x0018 #define OMAP24XX_GPIO_IRQSTATUS2 0x0028 #define OMAP24XX_GPIO_IRQENABLE2 0x002c @@ -108,6 +109,7 @@ #define OMAP24XX_GPIO_SETDATAOUT 0x0094 #define OMAP4_GPIO_REVISION 0x0000 +#define OMAP4_GPIO_SYSCONFIG 0x0010 #define OMAP4_GPIO_EOI 0x0020 #define OMAP4_GPIO_IRQSTATUSRAW0 0x0024 #define OMAP4_GPIO_IRQSTATUSRAW1 0x0028 @@ -148,6 +150,7 @@ #ifndef __ASSEMBLER__ struct omap_gpio_reg_offs { u16 revision; + u16 sysconfig; u16 direction; u16 datain; u16 dataout; -- cgit v1.2.3 From c968b89a1d446ec4a1ed3022ebd79d36de5ea1eb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Apr 2021 23:12:44 -0700 Subject: of: linux/of.h: fix kernel-doc warnings Correct kernel-doc notation warnings: ../include/linux/of.h:1211: warning: Function parameter or member 'output' not described in 'of_property_read_string_index' ../include/linux/of.h:1211: warning: Excess function parameter 'out_string' description in 'of_property_read_string_index' ../include/linux/of.h:1477: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Overlay support Signed-off-by: Randy Dunlap Cc: Greg Kroah-Hartman Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20210417061244.2262-1-rdunlap@infradead.org --- include/linux/of.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index ef6b161d1f91..d8db8d3592fd 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1192,7 +1192,7 @@ static inline int of_property_count_strings(const struct device_node *np, * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @index: index of the string in the list of strings - * @out_string: pointer to null terminated return string, modified only if + * @output: pointer to null terminated return string, modified only if * return value is 0. * * Search for a property in a device tree node and retrieve a null @@ -1473,7 +1473,7 @@ static inline bool of_device_is_system_power_controller(const struct device_node return of_property_read_bool(np, "system-power-controller"); } -/** +/* * Overlay support */ -- cgit v1.2.3 From 54526d1fd59338fd6a381dbd806b7ccbae3aa4aa Mon Sep 17 00:00:00 2001 From: Nathan Tempelman Date: Thu, 8 Apr 2021 22:32:14 +0000 Subject: KVM: x86: Support KVM VMs sharing SEV context Add a capability for userspace to mirror SEV encryption context from one vm to another. On our side, this is intended to support a Migration Helper vCPU, but it can also be used generically to support other in-guest workloads scheduled by the host. The intention is for the primary guest and the mirror to have nearly identical memslots. The primary benefits of this are that: 1) The VMs do not share KVM contexts (think APIC/MSRs/etc), so they can't accidentally clobber each other. 2) The VMs can have different memory-views, which is necessary for post-copy migration (the migration vCPUs on the target need to read and write to pages, when the primary guest would VMEXIT). This does not change the threat model for AMD SEV. Any memory involved is still owned by the primary guest and its initial state is still attested to through the normal SEV_LAUNCH_* flows. If userspace wanted to circumvent SEV, they could achieve the same effect by simply attaching a vCPU to the primary VM. This patch deliberately leaves userspace in charge of the memslots for the mirror, as it already has the power to mess with them in the primary guest. This patch does not support SEV-ES (much less SNP), as it does not handle handing off attested VMSAs to the mirror. For additional context, we need a Migration Helper because SEV PSP migration is far too slow for our live migration on its own. Using an in-guest migrator lets us speed this up significantly. Signed-off-by: Nathan Tempelman Message-Id: <20210408223214.2582277-1-natet@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 82b066db37cb..c306b4c3674e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -654,6 +654,7 @@ void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); void kvm_put_kvm(struct kvm *kvm); +bool file_is_kvm(struct file *file); void kvm_put_kvm_no_destroy(struct kvm *kvm); static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) -- cgit v1.2.3 From 52acd22faa1af8a0514ccd075a6978ac97986425 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 16 Apr 2021 11:08:10 +0800 Subject: KVM: Boost vCPU candidate in user mode which is delivering interrupt Both lock holder vCPU and IPI receiver that has halted are condidate for boost. However, the PLE handler was originally designed to deal with the lock holder preemption problem. The Intel PLE occurs when the spinlock waiter is in kernel mode. This assumption doesn't hold for IPI receiver, they can be in either kernel or user mode. the vCPU candidate in user mode will not be boosted even if they should respond to IPIs. Some benchmarks like pbzip2, swaptions etc do the TLB shootdown in kernel mode and most of the time they are running in user mode. It can lead to a large number of continuous PLE events because the IPI sender causes PLE events repeatedly until the receiver is scheduled while the receiver is not candidate for a boost. This patch boosts the vCPU candidiate in user mode which is delivery interrupt. We can observe the speed of pbzip2 improves 10% in 96 vCPUs VM in over-subscribe scenario (The host machine is 2 socket, 48 cores, 96 HTs Intel CLX box). There is no performance regression for other benchmarks like Unixbench spawn (most of the time contend read/write lock in kernel mode), ebizzy (most of the time contend read/write sem and TLB shoodtdown in kernel mode). Signed-off-by: Wanpeng Li Message-Id: <1618542490-14756-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c306b4c3674e..8895b95b6a22 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -960,6 +960,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); +bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_post_init_vm(struct kvm *kvm); void kvm_arch_pre_destroy_vm(struct kvm *kvm); -- cgit v1.2.3 From 4cfdd47d6d95aca4fb8d6cfbe73392472d353f82 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 15 Apr 2021 15:53:14 +0000 Subject: KVM: SVM: Add KVM_SEV SEND_START command The command is used to create an outgoing SEV guest encryption context. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Paolo Bonzini Cc: Joerg Roedel Cc: Borislav Petkov Cc: Tom Lendacky Cc: x86@kernel.org Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Steve Rutherford Reviewed-by: Venu Busireddy Signed-off-by: Brijesh Singh Signed-off-by: Ashish Kalra Message-Id: <2f1686d0164e0f1b3d6a41d620408393e0a48376.1618498113.git.ashish.kalra@amd.com> Signed-off-by: Paolo Bonzini --- include/linux/psp-sev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index b801ead1e2bb..73da511b9423 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -326,11 +326,11 @@ struct sev_data_send_start { u64 pdh_cert_address; /* In */ u32 pdh_cert_len; /* In */ u32 reserved1; - u64 plat_cert_address; /* In */ - u32 plat_cert_len; /* In */ + u64 plat_certs_address; /* In */ + u32 plat_certs_len; /* In */ u32 reserved2; - u64 amd_cert_address; /* In */ - u32 amd_cert_len; /* In */ + u64 amd_certs_address; /* In */ + u32 amd_certs_len; /* In */ u32 reserved3; u64 session_address; /* In */ u32 session_len; /* In/Out */ -- cgit v1.2.3 From 5569e2e7a650dfffd4df7635662b2f92162d6501 Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Tue, 20 Apr 2021 05:01:20 -0400 Subject: KVM: SVM: Add support for KVM_SEV_SEND_CANCEL command After completion of SEND_START, but before SEND_FINISH, the source VMM can issue the SEND_CANCEL command to stop a migration. This is necessary so that a cancelled migration can restart with a new target later. Reviewed-by: Nathan Tempelman Reviewed-by: Brijesh Singh Signed-off-by: Steve Rutherford Message-Id: <20210412194408.2458827-1-srutherford@google.com> Signed-off-by: Paolo Bonzini --- include/linux/psp-sev.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 73da511b9423..d48a7192e881 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -73,6 +73,7 @@ enum sev_cmd { SEV_CMD_SEND_UPDATE_DATA = 0x041, SEV_CMD_SEND_UPDATE_VMSA = 0x042, SEV_CMD_SEND_FINISH = 0x043, + SEV_CMD_SEND_CANCEL = 0x044, /* Guest migration commands (incoming) */ SEV_CMD_RECEIVE_START = 0x050, @@ -392,6 +393,15 @@ struct sev_data_send_finish { u32 handle; /* In */ } __packed; +/** + * struct sev_data_send_cancel - SEND_CANCEL command parameters + * + * @handle: handle of the VM to process + */ +struct sev_data_send_cancel { + u32 handle; /* In */ +} __packed; + /** * struct sev_data_receive_start - RECEIVE_START command parameters * -- cgit v1.2.3 From 1de15e99a242a66ef4f803fe1ad357f86b3a75f8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 12 Apr 2021 17:07:39 +0300 Subject: pinctrl: Keep enum pin_config_param ordered by name It seems the ordering is by name. Keep it that way. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210412140741.39946-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 6aeb711f7cd1..188db8d84162 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -90,6 +90,7 @@ struct pinctrl_map; * value on the line. Use argument 1 to indicate high level, argument 0 to * indicate low level. (Please see Documentation/driver-api/pinctl.rst, * section "GPIO mode pitfalls" for a discussion around this parameter.) + * @PIN_CONFIG_PERSIST_STATE: retain pin state across sleep or controller reset * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power * supplies, the argument to this parameter (on a custom format) tells * the driver which alternative power source to use. @@ -101,7 +102,6 @@ struct pinctrl_map; * or latch delay (on outputs) this parameter (in a custom format) * specifies the clock skew or latch delay. It typically controls how * many double inverters are put in front of the line. - * @PIN_CONFIG_PERSIST_STATE: retain pin state across sleep or controller reset * @PIN_CONFIG_END: this is the last enumerator for pin configurations, if * you need to pass in custom configurations to the pin controller, use * PIN_CONFIG_END+1 as the base offset. @@ -127,11 +127,11 @@ enum pin_config_param { PIN_CONFIG_LOW_POWER_MODE, PIN_CONFIG_OUTPUT_ENABLE, PIN_CONFIG_OUTPUT, + PIN_CONFIG_PERSIST_STATE, PIN_CONFIG_POWER_SOURCE, PIN_CONFIG_SLEEP_HARDWARE_STATE, PIN_CONFIG_SLEW_RATE, PIN_CONFIG_SKEW_DELAY, - PIN_CONFIG_PERSIST_STATE, PIN_CONFIG_END = 0x7F, PIN_CONFIG_MAX = 0xFF, }; -- cgit v1.2.3 From 31f9a421a1d01538776db37ec9c5419a3a49d650 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 12 Apr 2021 17:07:40 +0300 Subject: pinctrl: Introduce MODE group in enum pin_config_param Better to have a MODE group of settings to keep them together when ordered alphabetically. Hence, rename PIN_CONFIG_LOW_POWER_MODE to PIN_CONFIG_MODE_LOW_POWER. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210412140741.39946-2-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 188db8d84162..189e701832ea 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -76,7 +76,7 @@ struct pinctrl_map; * @PIN_CONFIG_INPUT_SCHMITT_ENABLE: control schmitt-trigger mode on the pin. * If the argument != 0, schmitt-trigger mode is enabled. If it's 0, * schmitt-trigger mode is disabled. - * @PIN_CONFIG_LOW_POWER_MODE: this will configure the pin for low power + * @PIN_CONFIG_MODE_LOW_POWER: this will configure the pin for low power * operation, if several modes of operation are supported these can be * passed in the argument on a custom form, else just use argument 1 * to indicate low power mode, argument 0 turns low power mode off. @@ -124,7 +124,7 @@ enum pin_config_param { PIN_CONFIG_INPUT_ENABLE, PIN_CONFIG_INPUT_SCHMITT, PIN_CONFIG_INPUT_SCHMITT_ENABLE, - PIN_CONFIG_LOW_POWER_MODE, + PIN_CONFIG_MODE_LOW_POWER, PIN_CONFIG_OUTPUT_ENABLE, PIN_CONFIG_OUTPUT, PIN_CONFIG_PERSIST_STATE, -- cgit v1.2.3 From 09e11caaa4cffac681963688b774e1aa3063b3a9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 12 Apr 2021 17:07:41 +0300 Subject: pinctrl: Add PIN_CONFIG_MODE_PWM to enum pin_config_param It seems that we will have more and more pin controllers that support PWM function on the (selected) pins. Due to it being a part of pin controller IP the idea is to have some code that will switch the mode and attach the corresponding driver, for example, via using it as a library. Meanwhile, put a corresponding item to the pin_config_param enumerator. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210412140741.39946-3-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 189e701832ea..e18ab3d5908f 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -80,6 +80,7 @@ struct pinctrl_map; * operation, if several modes of operation are supported these can be * passed in the argument on a custom form, else just use argument 1 * to indicate low power mode, argument 0 turns low power mode off. + * @PIN_CONFIG_MODE_PWM: this will configure the pin for PWM * @PIN_CONFIG_OUTPUT_ENABLE: this will enable the pin's output mode * without driving a value there. For most platforms this reduces to * enable the output buffers and then let the pin controller current @@ -125,6 +126,7 @@ enum pin_config_param { PIN_CONFIG_INPUT_SCHMITT, PIN_CONFIG_INPUT_SCHMITT_ENABLE, PIN_CONFIG_MODE_LOW_POWER, + PIN_CONFIG_MODE_PWM, PIN_CONFIG_OUTPUT_ENABLE, PIN_CONFIG_OUTPUT, PIN_CONFIG_PERSIST_STATE, -- cgit v1.2.3 From 85367040511f8402d7e4054d8c17b053c75e33ff Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 21 Apr 2021 23:45:26 +0800 Subject: scsi: blk-mq: Fix build warning when making htmldocs Fixes the following warning when running 'make htmldocs': include/linux/blk-mq.h:395: warning: Function parameter or member 'set_rq_budget_token' not described in 'blk_mq_ops' include/linux/blk-mq.h:395: warning: Function parameter or member 'get_rq_budget_token' not described in 'blk_mq_ops' [mkp: added warning messages] Link: https://lore.kernel.org/r/20210421154526.1954174-1-ming.lei@redhat.com Fixes: d022d18c045f ("scsi: blk-mq: Add callbacks for storing & retrieving budget token") Reported-by: Stephen Rothwell Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- include/linux/blk-mq.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 3bd3ee651143..359486940fa0 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -313,12 +313,12 @@ struct blk_mq_ops { */ void (*put_budget)(struct request_queue *, int); - /* - * @set_rq_budget_toekn: store rq's budget token + /** + * @set_rq_budget_token: store rq's budget token */ void (*set_rq_budget_token)(struct request *, int); - /* - * @get_rq_budget_toekn: retrieve rq's budget token + /** + * @get_rq_budget_token: retrieve rq's budget token */ int (*get_rq_budget_token)(struct request *); -- cgit v1.2.3 From a943d76352dbb4707a5e5537bbe696c00f5ddd36 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 21 Apr 2021 21:11:32 +0300 Subject: devm-helpers: Fix devm_delayed_work_autocancel() kerneldoc The kerneldoc for devm_delayed_work_autocancel() contains invalid parameter description. Fix the parameter description. And while at it - make it more obvous that this function operates on delayed_work. That helps differentiating with resource-managed INIT_WORK description (which should follow in near future) Fixes: 0341ce544394 ("workqueue: Add resource managed version of delayed work init") Reviewed-by: Hans de Goede Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/db3a8b4b8899fdf109a0cc760807de12d3b4f09b.1619028482.git.matti.vaittinen@fi.rohmeurope.com Signed-off-by: Greg Kroah-Hartman --- include/linux/devm-helpers.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devm-helpers.h b/include/linux/devm-helpers.h index f64e0c9f3763..f40f77717a24 100644 --- a/include/linux/devm-helpers.h +++ b/include/linux/devm-helpers.h @@ -32,13 +32,14 @@ static inline void devm_delayed_work_drop(void *res) } /** - * devm_delayed_work_autocancel - Resource-managed work allocation - * @dev: Device which lifetime work is bound to - * @pdata: work to be cancelled when driver is detached + * devm_delayed_work_autocancel - Resource-managed delayed work allocation + * @dev: Device which lifetime work is bound to + * @w: Work item to be queued + * @worker: Worker function * - * Initialize work which is automatically cancelled when driver is detached. - * A few drivers need delayed work which must be cancelled before driver - * is detached to avoid accessing removed resources. + * Initialize delayed work which is automatically cancelled when driver is + * detached. A few drivers need delayed work which must be cancelled before + * driver is detached to avoid accessing removed resources. * devm_delayed_work_autocancel() can be used to omit the explicit * cancelleation when driver is detached. */ -- cgit v1.2.3 From fa989ae7c7b38efbc6c3370571fb8a6f7350029a Mon Sep 17 00:00:00 2001 From: Sai Krishna Potthuri Date: Thu, 22 Apr 2021 14:00:00 +0530 Subject: firmware: xilinx: Add pinctrl support Adding pinctrl support to query platform specific information (pins) from firmware. Signed-off-by: Sai Krishna Potthuri Acked-by: Michal Simek Link: https://lore.kernel.org/r/1619080202-31924-2-git-send-email-lakshmi.sai.krishna.potthuri@xilinx.com Signed-off-by: Linus Walleij --- include/linux/firmware/xlnx-zynqmp.h | 90 ++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 71177b17eee5..8285a4bcfc2d 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -72,6 +72,12 @@ enum pm_api_id { PM_FPGA_LOAD = 22, PM_FPGA_GET_STATUS = 23, PM_GET_CHIPID = 24, + PM_PINCTRL_REQUEST = 28, + PM_PINCTRL_RELEASE = 29, + PM_PINCTRL_GET_FUNCTION = 30, + PM_PINCTRL_SET_FUNCTION = 31, + PM_PINCTRL_CONFIG_PARAM_GET = 32, + PM_PINCTRL_CONFIG_PARAM_SET = 33, PM_IOCTL = 34, PM_QUERY_DATA = 35, PM_CLOCK_ENABLE = 36, @@ -122,6 +128,12 @@ enum pm_query_id { PM_QID_CLOCK_GET_FIXEDFACTOR_PARAMS = 3, PM_QID_CLOCK_GET_PARENTS = 4, PM_QID_CLOCK_GET_ATTRIBUTES = 5, + PM_QID_PINCTRL_GET_NUM_PINS = 6, + PM_QID_PINCTRL_GET_NUM_FUNCTIONS = 7, + PM_QID_PINCTRL_GET_NUM_FUNCTION_GROUPS = 8, + PM_QID_PINCTRL_GET_FUNCTION_NAME = 9, + PM_QID_PINCTRL_GET_FUNCTION_GROUPS = 10, + PM_QID_PINCTRL_GET_PIN_GROUPS = 11, PM_QID_CLOCK_GET_NUM_CLOCKS = 12, PM_QID_CLOCK_GET_MAX_DIVISOR = 13, }; @@ -285,6 +297,44 @@ enum dll_reset_type { PM_DLL_RESET_PULSE = 2, }; +enum pm_pinctrl_config_param { + PM_PINCTRL_CONFIG_SLEW_RATE = 0, + PM_PINCTRL_CONFIG_BIAS_STATUS = 1, + PM_PINCTRL_CONFIG_PULL_CTRL = 2, + PM_PINCTRL_CONFIG_SCHMITT_CMOS = 3, + PM_PINCTRL_CONFIG_DRIVE_STRENGTH = 4, + PM_PINCTRL_CONFIG_VOLTAGE_STATUS = 5, + PM_PINCTRL_CONFIG_TRI_STATE = 6, + PM_PINCTRL_CONFIG_MAX = 7, +}; + +enum pm_pinctrl_slew_rate { + PM_PINCTRL_SLEW_RATE_FAST = 0, + PM_PINCTRL_SLEW_RATE_SLOW = 1, +}; + +enum pm_pinctrl_bias_status { + PM_PINCTRL_BIAS_DISABLE = 0, + PM_PINCTRL_BIAS_ENABLE = 1, +}; + +enum pm_pinctrl_pull_ctrl { + PM_PINCTRL_BIAS_PULL_DOWN = 0, + PM_PINCTRL_BIAS_PULL_UP = 1, +}; + +enum pm_pinctrl_schmitt_cmos { + PM_PINCTRL_INPUT_TYPE_CMOS = 0, + PM_PINCTRL_INPUT_TYPE_SCHMITT = 1, +}; + +enum pm_pinctrl_drive_strength { + PM_PINCTRL_DRIVE_STRENGTH_2MA = 0, + PM_PINCTRL_DRIVE_STRENGTH_4MA = 1, + PM_PINCTRL_DRIVE_STRENGTH_8MA = 2, + PM_PINCTRL_DRIVE_STRENGTH_12MA = 3, +}; + enum zynqmp_pm_shutdown_type { ZYNQMP_PM_SHUTDOWN_TYPE_SHUTDOWN = 0, ZYNQMP_PM_SHUTDOWN_TYPE_RESET = 1, @@ -353,6 +403,14 @@ int zynqmp_pm_write_pggs(u32 index, u32 value); int zynqmp_pm_read_pggs(u32 index, u32 *value); int zynqmp_pm_system_shutdown(const u32 type, const u32 subtype); int zynqmp_pm_set_boot_health_status(u32 value); +int zynqmp_pm_pinctrl_request(const u32 pin); +int zynqmp_pm_pinctrl_release(const u32 pin); +int zynqmp_pm_pinctrl_get_function(const u32 pin, u32 *id); +int zynqmp_pm_pinctrl_set_function(const u32 pin, const u32 id); +int zynqmp_pm_pinctrl_get_config(const u32 pin, const u32 param, + u32 *value); +int zynqmp_pm_pinctrl_set_config(const u32 pin, const u32 param, + u32 value); #else static inline struct zynqmp_eemi_ops *zynqmp_pm_get_eemi_ops(void) { @@ -537,6 +595,38 @@ static inline int zynqmp_pm_set_boot_health_status(u32 value) { return -ENODEV; } + +static inline int zynqmp_pm_pinctrl_request(const u32 pin) +{ + return -ENODEV; +} + +static inline int zynqmp_pm_pinctrl_release(const u32 pin) +{ + return -ENODEV; +} + +static inline int zynqmp_pm_pinctrl_get_function(const u32 pin, u32 *id) +{ + return -ENODEV; +} + +static inline int zynqmp_pm_pinctrl_set_function(const u32 pin, const u32 id) +{ + return -ENODEV; +} + +static inline int zynqmp_pm_pinctrl_get_config(const u32 pin, const u32 param, + u32 *value) +{ + return -ENODEV; +} + +static inline int zynqmp_pm_pinctrl_set_config(const u32 pin, const u32 param, + u32 value) +{ + return -ENODEV; +} #endif #endif /* __FIRMWARE_ZYNQMP_H__ */ -- cgit v1.2.3 From 75f4e830fa9c47637054a3b7201765f2a314bda2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 16 Apr 2021 16:05:55 +0200 Subject: serial: do not restore interrupt state in sysrq helper The uart_unlock_and_check_sysrq() helper can be used to defer processing of sysrq until the interrupt handler has released the port lock and is about to return. Since commit 81e2073c175b ("genirq: Disable interrupts for force threaded handlers") interrupt handlers that are not explicitly requested as threaded are always called with interrupts disabled and there is no need to save the interrupt state when taking the port lock. Instead of adding another sysrq helper for when the interrupt state has not needlessly been saved, drop the state parameter from uart_unlock_and_check_sysrq() and update its callers to no longer explicitly disable interrupts in their interrupt handlers. Cc: Joel Stanley Cc: Andrew Jeffery Cc: Andy Gross Cc: Bjorn Andersson Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210416140557.25177-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index e1b684e33841..d7ed00f1594e 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -500,19 +500,19 @@ static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int c return 0; } -static inline void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { int sysrq_ch; if (!port->has_sysrq) { - spin_unlock_irqrestore(&port->lock, irqflags); + spin_unlock(&port->lock); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; - spin_unlock_irqrestore(&port->lock, irqflags); + spin_unlock(&port->lock); if (sysrq_ch) handle_sysrq(sysrq_ch); @@ -526,9 +526,9 @@ static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int c { return 0; } -static inline void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { - spin_unlock_irqrestore(&port->lock, irqflags); + spin_unlock(&port->lock); } #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ -- cgit v1.2.3 From d60d6e7adfc3814f6de03c978ff1daab21478f87 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Thu, 21 Jan 2021 21:34:05 -0500 Subject: thermal/core: Remove thermal_notify_framework thermal_notify_framework just updates for a single trip point where as thermal_zone_device_update does other bookkeeping like updating the temperature of the thermal zone and setting the next trip point. The only driver that was using thermal_notify_framework was updated in the previous patch to use thermal_zone_device_update instead. Since there are no users for thermal_notify_framework remove it. Signed-off-by: Thara Gopinath Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210122023406.3500424-3-thara.gopinath@linaro.org --- include/linux/thermal.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 169502164364..d296f3b88fb9 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -390,7 +390,6 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); int thermal_zone_get_slope(struct thermal_zone_device *tz); int thermal_zone_get_offset(struct thermal_zone_device *tz); -void thermal_notify_framework(struct thermal_zone_device *, int); int thermal_zone_device_enable(struct thermal_zone_device *tz); int thermal_zone_device_disable(struct thermal_zone_device *tz); void thermal_zone_device_critical(struct thermal_zone_device *tz); @@ -436,10 +435,6 @@ static inline int thermal_zone_get_offset( struct thermal_zone_device *tz) { return -ENODEV; } -static inline void thermal_notify_framework(struct thermal_zone_device *tz, - int trip) -{ } - static inline int thermal_zone_device_enable(struct thermal_zone_device *tz) { return -ENODEV; } -- cgit v1.2.3 From 7f318847a0f37b96d8927e8d30ae7b8f149b11f1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Apr 2021 14:44:09 +0100 Subject: perf: Get rid of oprofile leftovers perf_pmu_name() and perf_num_counters() are unused. Drop them. Signed-off-by: Marc Zyngier Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210414134409.1266357-6-maz@kernel.org --- include/linux/perf_event.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3f7f89ea5e51..51154ed9a206 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -951,8 +951,6 @@ extern void perf_event_itrace_started(struct perf_event *event); extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); extern void perf_pmu_unregister(struct pmu *pmu); -extern int perf_num_counters(void); -extern const char *perf_pmu_name(void); extern void __perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task); extern void __perf_event_task_sched_out(struct task_struct *prev, -- cgit v1.2.3 From 46135d6f878ab00261d4a2082d620bfb41019aab Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 17 Mar 2021 10:07:19 +0000 Subject: irqchip/gic-v4.1: Disable vSGI upon (GIC CPUIF < v4.1) detection GIC CPU interfaces versions predating GIC v4.1 were not built to accommodate vINTID within the vSGI range; as reported in the GIC specifications (8.2 "Changes to the CPU interface"), it is CONSTRAINED UNPREDICTABLE to deliver a vSGI to a PE with ID_AA64PFR0_EL1.GIC < b0011. Check the GIC CPUIF version by reading the SYS_ID_AA64_PFR0_EL1. Disable vSGIs if a CPUIF version < 4.1 is detected to prevent using vSGIs on systems where they may misbehave. Signed-off-by: Lorenzo Pieralisi Cc: Marc Zyngier Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210317100719.3331-2-lorenzo.pieralisi@arm.com --- include/linux/irqchip/arm-gic-v4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 943c3411ca10..2c63375bbd43 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -145,4 +145,6 @@ int its_init_v4(struct irq_domain *domain, const struct irq_domain_ops *vpe_ops, const struct irq_domain_ops *sgi_ops); +bool gic_cpuif_has_vsgi(void); + #endif -- cgit v1.2.3 From 1a0b05e435544cd53cd3936bdab425d88784b71a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 2 Apr 2021 16:02:37 +0100 Subject: irqdomain: Get rid of irq_create_strict_mappings() No user of this helper is left, remove it. Signed-off-by: Marc Zyngier --- include/linux/irqdomain.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index d2c61de208a8..7a1dd7b969b6 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -415,9 +415,6 @@ static inline unsigned int irq_linear_revmap(struct irq_domain *domain, extern unsigned int irq_find_mapping(struct irq_domain *host, irq_hw_number_t hwirq); extern unsigned int irq_create_direct_mapping(struct irq_domain *host); -extern int irq_create_strict_mappings(struct irq_domain *domain, - unsigned int irq_base, - irq_hw_number_t hwirq_base, int count); extern const struct irq_domain_ops irq_domain_simple_ops; -- cgit v1.2.3 From c0dcaa55f91d925c9ac2c950ff84138534337a6c Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 2 Mar 2021 10:12:01 -0800 Subject: ice: Allow ignoring opcodes on specific VF Declare bitmap of allowed commands on VF. Initialize default opcodes list that should be always supported. Declare array of supported opcodes for each caps used in virtchnl code. Change allowed bitmap by setting or clearing corresponding bit to allowlist (bit set) or denylist (bit clear). Signed-off-by: Michal Swiatkowski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 40dd6afbfd81..debdd196773b 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -139,6 +139,7 @@ enum virtchnl_ops { /* opcode 34 - 46 are reserved */ VIRTCHNL_OP_ADD_FDIR_FILTER = 47, VIRTCHNL_OP_DEL_FDIR_FILTER = 48, + VIRTCHNL_OP_MAX, }; /* These macros are used to generate compilation errors if a structure/union -- cgit v1.2.3 From 142da08c4dc0afd07f9136b4812d5386bd6e1717 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Tue, 2 Mar 2021 10:12:12 -0800 Subject: ice: Advertise virtchnl UDP segmentation offload capability As the hardware is capable of supporting UDP segmentation offload, add a capability bit to virtchnl.h to communicate this and have the driver advertise its support. Suggested-by: Jesse Brandeburg Signed-off-by: Brett Creeley Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index debdd196773b..9e0341cf2c36 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -251,6 +251,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000 #define VIRTCHNL_VF_OFFLOAD_ADQ 0X00800000 +#define VIRTCHNL_VF_OFFLOAD_USO 0X02000000 #define VIRTCHNL_VF_OFFLOAD_FDIR_PF 0X10000000 /* Define below the capability flags that are not offloads */ -- cgit v1.2.3 From 222a8ab01698148c00c271cda82d96f4e6e7b0a8 Mon Sep 17 00:00:00 2001 From: Qi Zhang Date: Tue, 13 Apr 2021 08:48:39 +0800 Subject: ice: Enable RSS configure for AVF Currently, RSS hash input is not available to AVF by ethtool, it is set by the PF directly. Add the RSS configure support for AVF through new virtchnl message, and define the capability flag VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF to query this new RSS offload support. Signed-off-by: Jia Guo Signed-off-by: Qi Zhang Signed-off-by: Haiyue Wang Tested-by: Bo Chen Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 9e0341cf2c36..565deea6ffe8 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -136,7 +136,9 @@ enum virtchnl_ops { VIRTCHNL_OP_DISABLE_CHANNELS = 31, VIRTCHNL_OP_ADD_CLOUD_FILTER = 32, VIRTCHNL_OP_DEL_CLOUD_FILTER = 33, - /* opcode 34 - 46 are reserved */ + /* opcode 34 - 44 are reserved */ + VIRTCHNL_OP_ADD_RSS_CFG = 45, + VIRTCHNL_OP_DEL_RSS_CFG = 46, VIRTCHNL_OP_ADD_FDIR_FILTER = 47, VIRTCHNL_OP_DEL_FDIR_FILTER = 48, VIRTCHNL_OP_MAX, @@ -252,6 +254,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000 #define VIRTCHNL_VF_OFFLOAD_ADQ 0X00800000 #define VIRTCHNL_VF_OFFLOAD_USO 0X02000000 +#define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF 0X08000000 #define VIRTCHNL_VF_OFFLOAD_FDIR_PF 0X10000000 /* Define below the capability flags that are not offloads */ @@ -677,6 +680,14 @@ enum virtchnl_vfr_states { VIRTCHNL_VFR_VFACTIVE, }; +/* Type of RSS algorithm */ +enum virtchnl_rss_algorithm { + VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC = 0, + VIRTCHNL_RSS_ALG_R_ASYMMETRIC = 1, + VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC = 2, + VIRTCHNL_RSS_ALG_XOR_SYMMETRIC = 3, +}; + #define VIRTCHNL_MAX_NUM_PROTO_HDRS 32 #define PROTO_HDR_SHIFT 5 #define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT) @@ -832,6 +843,14 @@ struct virtchnl_proto_hdrs { VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs); +struct virtchnl_rss_cfg { + struct virtchnl_proto_hdrs proto_hdrs; /* protocol headers */ + enum virtchnl_rss_algorithm rss_algorithm; /* RSS algorithm type */ + u8 reserved[128]; /* reserve for future */ +}; + +VIRTCHNL_CHECK_STRUCT_LEN(2444, virtchnl_rss_cfg); + /* action configuration for FDIR */ struct virtchnl_filter_action { enum virtchnl_action type; @@ -1100,6 +1119,10 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_DEL_CLOUD_FILTER: valid_len = sizeof(struct virtchnl_filter); break; + case VIRTCHNL_OP_ADD_RSS_CFG: + case VIRTCHNL_OP_DEL_RSS_CFG: + valid_len = sizeof(struct virtchnl_rss_cfg); + break; case VIRTCHNL_OP_ADD_FDIR_FILTER: valid_len = sizeof(struct virtchnl_fdir_add); break; -- cgit v1.2.3 From 1aea7808372eee4ad01f98e064c88c57f1e94855 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Thu, 22 Apr 2021 17:41:15 +0200 Subject: LSM: Infrastructure management of the superblock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move management of the superblock->sb_security blob out of the individual security modules and into the security infrastructure. Instead of allocating the blobs from within the modules, the modules tell the infrastructure how much space is required, and the space is allocated there. Cc: John Johansen Signed-off-by: Casey Schaufler Signed-off-by: Mickaël Salaün Reviewed-by: Stephen Smalley Acked-by: Serge Hallyn Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20210422154123.13086-6-mic@digikod.net Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index fb7f3193753d..75715998a95f 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1573,6 +1573,7 @@ struct lsm_blob_sizes { int lbs_cred; int lbs_file; int lbs_inode; + int lbs_superblock; int lbs_ipc; int lbs_msg_msg; int lbs_task; -- cgit v1.2.3 From 83e804f0bfee2247b1c0aa64845c81a38562da7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 22 Apr 2021 17:41:16 +0200 Subject: fs,security: Add sb_delete hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sb_delete security hook is called when shutting down a superblock, which may be useful to release kernel objects tied to the superblock's lifetime (e.g. inodes). This new hook is needed by Landlock to release (ephemerally) tagged struct inodes. This comes from the unprivileged nature of Landlock described in the next commit. Cc: Al Viro Cc: James Morris Signed-off-by: Mickaël Salaün Reviewed-by: Jann Horn Acked-by: Serge Hallyn Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20210422154123.13086-7-mic@digikod.net Signed-off-by: James Morris --- include/linux/lsm_hook_defs.h | 1 + include/linux/lsm_hooks.h | 3 +++ include/linux/security.h | 4 ++++ 3 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 477a597db013..e8adadbf9581 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -59,6 +59,7 @@ LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc, struct fs_parameter *param) LSM_HOOK(int, 0, sb_alloc_security, struct super_block *sb) +LSM_HOOK(void, LSM_RET_VOID, sb_delete, struct super_block *sb) LSM_HOOK(void, LSM_RET_VOID, sb_free_security, struct super_block *sb) LSM_HOOK(void, LSM_RET_VOID, sb_free_mnt_opts, void *mnt_opts) LSM_HOOK(int, 0, sb_eat_lsm_opts, char *orig, void **mnt_opts) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 75715998a95f..cc2eaaaec0e4 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -108,6 +108,9 @@ * allocated. * @sb contains the super_block structure to be modified. * Return 0 if operation was successful. + * @sb_delete: + * Release objects tied to a superblock (e.g. inodes). + * @sb contains the super_block structure being released. * @sb_free_security: * Deallocate and clear the sb->s_security field. * @sb contains the super_block structure to be modified. diff --git a/include/linux/security.h b/include/linux/security.h index 8aeebd6646dc..90298baa4551 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -291,6 +291,7 @@ void security_bprm_committed_creds(struct linux_binprm *bprm); int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc); int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param); int security_sb_alloc(struct super_block *sb); +void security_sb_delete(struct super_block *sb); void security_sb_free(struct super_block *sb); void security_free_mnt_opts(void **mnt_opts); int security_sb_eat_lsm_opts(char *options, void **mnt_opts); @@ -631,6 +632,9 @@ static inline int security_sb_alloc(struct super_block *sb) return 0; } +static inline void security_sb_delete(struct super_block *sb) +{ } + static inline void security_sb_free(struct super_block *sb) { } -- cgit v1.2.3 From 265885daf3e5082eb9f6e2a23bdbf9ba4456a21b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 22 Apr 2021 17:41:18 +0200 Subject: landlock: Add syscall implementations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These 3 system calls are designed to be used by unprivileged processes to sandbox themselves: * landlock_create_ruleset(2): Creates a ruleset and returns its file descriptor. * landlock_add_rule(2): Adds a rule (e.g. file hierarchy access) to a ruleset, identified by the dedicated file descriptor. * landlock_restrict_self(2): Enforces a ruleset on the calling thread and its future children (similar to seccomp). This syscall has the same usage restrictions as seccomp(2): the caller must have the no_new_privs attribute set or have CAP_SYS_ADMIN in the current user namespace. All these syscalls have a "flags" argument (not currently used) to enable extensibility. Here are the motivations for these new syscalls: * A sandboxed process may not have access to file systems, including /dev, /sys or /proc, but it should still be able to add more restrictions to itself. * Neither prctl(2) nor seccomp(2) (which was used in a previous version) fit well with the current definition of a Landlock security policy. All passed structs (attributes) are checked at build time to ensure that they don't contain holes and that they are aligned the same way for each architecture. See the user and kernel documentation for more details (provided by a following commit): * Documentation/userspace-api/landlock.rst * Documentation/security/landlock.rst Cc: Arnd Bergmann Cc: James Morris Cc: Jann Horn Cc: Kees Cook Signed-off-by: Mickaël Salaün Acked-by: Serge Hallyn Link: https://lore.kernel.org/r/20210422154123.13086-9-mic@digikod.net Signed-off-by: James Morris --- include/linux/syscalls.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2839dc9a7c01..fa3971012e1c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -69,6 +69,8 @@ struct io_uring_params; struct clone_args; struct open_how; struct mount_attr; +struct landlock_ruleset_attr; +enum landlock_rule_type; #include #include @@ -1041,6 +1043,11 @@ asmlinkage long sys_pidfd_send_signal(int pidfd, int sig, siginfo_t __user *info, unsigned int flags); asmlinkage long sys_pidfd_getfd(int pidfd, int fd, unsigned int flags); +asmlinkage long sys_landlock_create_ruleset(const struct landlock_ruleset_attr __user *attr, + size_t size, __u32 flags); +asmlinkage long sys_landlock_add_rule(int ruleset_fd, enum landlock_rule_type rule_type, + const void __user *rule_attr, __u32 flags); +asmlinkage long sys_landlock_restrict_self(int ruleset_fd, __u32 flags); /* * Architecture-specific system calls -- cgit v1.2.3 From 96874c619c200bc704ae2d8e34a3746350922135 Mon Sep 17 00:00:00 2001 From: Mohammad Athari Bin Ismail Date: Thu, 22 Apr 2021 15:55:00 +0800 Subject: net: stmmac: Add HW descriptor prefetch setting for DWMAC Core 5.20 onwards DWMAC Core 5.20 onwards supports HW descriptor prefetching. Additionally, it also depends on platform specific RTL configuration. This capability could be enabled by setting DMA_Mode bit-19 (DCHE). So, to enable this cability, platform must set plat->dma_cfg->dche = true and the DWMAC core version must be 5.20 onwards. Else, this capability wouldn`t be configured Signed-off-by: Mohammad Athari Bin Ismail Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 97edb31d6310..0db36360ef21 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -97,6 +97,7 @@ struct stmmac_dma_cfg { bool aal; bool eame; bool multi_msi_en; + bool dche; }; #define AXI_BLEN 7 -- cgit v1.2.3 From 3ddb3fd8cdb0a6c11b7c8d91ba42d84c4ea3cc43 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 22 Apr 2021 21:18:22 +0200 Subject: signal, perf: Fix siginfo_t by avoiding u64 on 32-bit architectures The alignment of a structure is that of its largest member. On architectures like 32-bit Arm (but not e.g. 32-bit x86) 64-bit integers will require 64-bit alignment and not its natural word size. This means that there is no portable way to add 64-bit integers to siginfo_t on 32-bit architectures without breaking the ABI, because siginfo_t does not yet (and therefore likely never will) contain 64-bit fields on 32-bit architectures. Adding a 64-bit integer could change the alignment of the union after the 3 initial int si_signo, si_errno, si_code, thus introducing 4 bytes of padding shifting the entire union, which would break the ABI. One alternative would be to use the __packed attribute, however, it is non-standard C. Given siginfo_t has definitions outside the Linux kernel in various standard libraries that can be compiled with any number of different compilers (not just those we rely on), using non-standard attributes on siginfo_t should be avoided to ensure portability. In the case of the si_perf field, word size is sufficient since there is no exact requirement on size, given the data it contains is user-defined via perf_event_attr::sig_data. On 32-bit architectures, any excess bits of perf_event_attr::sig_data will therefore be truncated when copying into si_perf. Since si_perf is intended to disambiguate events (e.g. encoding relevant information if there are more events of the same type), 32 bits should provide enough entropy to do so on 32-bit architectures. For 64-bit architectures, no change is intended. Fixes: fb6cc127e0b6 ("signal: Introduce TRAP_PERF si_code and si_perf to siginfo") Reported-by: Marek Szyprowski Reported-by: Jon Hunter Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Tested-by: Marek Szyprowski Tested-by: Jon Hunter Link: https://lkml.kernel.org/r/20210422191823.79012-1-elver@google.com --- include/linux/compat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index c8821d966812..f0d2dd35d408 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -237,7 +237,7 @@ typedef struct compat_siginfo { u32 _pkey; } _addr_pkey; /* used when si_code=TRAP_PERF */ - compat_u64 _perf; + compat_ulong_t _perf; }; } _sigfault; -- cgit v1.2.3 From 6dab809bb5b183015e19d558bfa95107de660be0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 19 Apr 2021 14:24:55 +0300 Subject: mmc: core: Convert mmc_of_parse_voltage() to use device property API mmc_of_parse() for a few years has been using device property API. Convert mmc_of_parse_voltage() as well. At the same time switch users to new API. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210419112459.25241-2-andriy.shevchenko@linux.intel.com Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 17d7b326af29..c7e7b43600e9 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -509,7 +509,7 @@ void mmc_free_host(struct mmc_host *); void mmc_of_parse_clk_phase(struct mmc_host *host, struct mmc_clk_phase_map *map); int mmc_of_parse(struct mmc_host *host); -int mmc_of_parse_voltage(struct device_node *np, u32 *mask); +int mmc_of_parse_voltage(struct mmc_host *host, u32 *mask); static inline void *mmc_priv(struct mmc_host *host) { -- cgit v1.2.3 From edd602146507532c1714d8428f654b87205f492e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 19 Apr 2021 14:24:59 +0300 Subject: mmc: mmc_spi: Make of_mmc_spi.c resource provider agnostic In order to use the same driver on non-OF platforms, make of_mmc_spi.c resource provider agnostic. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210419112459.25241-6-andriy.shevchenko@linux.intel.com Signed-off-by: Ulf Hansson --- include/linux/spi/mmc_spi.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/mmc_spi.h b/include/linux/spi/mmc_spi.h index 778ae8eb1f3e..9ad9a06e488d 100644 --- a/include/linux/spi/mmc_spi.h +++ b/include/linux/spi/mmc_spi.h @@ -35,16 +35,7 @@ struct mmc_spi_platform_data { void (*setpower)(struct device *, unsigned int maskval); }; -#ifdef CONFIG_OF extern struct mmc_spi_platform_data *mmc_spi_get_pdata(struct spi_device *spi); extern void mmc_spi_put_pdata(struct spi_device *spi); -#else -static inline struct mmc_spi_platform_data * -mmc_spi_get_pdata(struct spi_device *spi) -{ - return spi->dev.platform_data; -} -static inline void mmc_spi_put_pdata(struct spi_device *spi) {} -#endif /* CONFIG_OF */ #endif /* __LINUX_SPI_MMC_SPI_H */ -- cgit v1.2.3 From 7ff5062079ef5c2f92af0c770dfc0ab925fa2128 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 10 Feb 2020 10:00:21 +0000 Subject: iov_iter: Add ITER_XARRAY Add an iterator, ITER_XARRAY, that walks through a set of pages attached to an xarray, starting at a given page and offset and walking for the specified amount of bytes. The iterator supports transparent huge pages. The iterate_xarray() macro calls the helper function with rcu_access() helped. I think that this is only a problem for iov_iter_for_each_range() - and that returns an error for ITER_XARRAY (also, this function does not appear to be called). The caller must guarantee that the pages are all present and they must be locked using PG_locked, PG_writeback or PG_fscache to prevent them from going away or being migrated whilst they're being accessed. This is useful for copying data from socket buffers to inodes in network filesystems and for transferring data between those inodes and the cache using direct I/O. Whilst it is true that ITER_BVEC could be used instead, that would require a bio_vec array to be allocated to refer to all the pages - which should be redundant if inode->i_pages also points to all these pages. Note that older versions of this patch implemented an ITER_MAPPING instead, which was almost the same. Changes: v7: - Rename iter_xarray_copy_pages() to iter_xarray_populate_pages()[1]. Signed-off-by: David Howells Reviewed-and-tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Alexander Viro cc: Matthew Wilcox (Oracle) cc: Christoph Hellwig cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/3577430.1579705075@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/158861205740.340223.16592990225607814022.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/159465785214.1376674.6062549291411362531.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/160588477334.3465195.3608963255682568730.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118129703.1232039.17141248432017826976.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161026313.2537118.14676007075365418649.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340386671.1303470.10752208972482479840.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539527815.286939.14607323792547049341.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653786033.2770958.14154191921867463240.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789064740.6155.11932541175173658065.stgit@warthog.procyon.org.uk/ # v6 Link: https://lore.kernel.org/r/27c369a8f42bb8a617672b2dc0126a5c6df5a050.camel@kernel.org [1] --- include/linux/uio.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 27ff8eb786dc..5f5ffc45d4aa 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -10,6 +10,7 @@ #include struct page; +struct address_space; struct pipe_inode_info; struct kvec { @@ -24,6 +25,7 @@ enum iter_type { ITER_BVEC = 16, ITER_PIPE = 32, ITER_DISCARD = 64, + ITER_XARRAY = 128, }; struct iov_iter { @@ -39,6 +41,7 @@ struct iov_iter { const struct iovec *iov; const struct kvec *kvec; const struct bio_vec *bvec; + struct xarray *xarray; struct pipe_inode_info *pipe; }; union { @@ -47,6 +50,7 @@ struct iov_iter { unsigned int head; unsigned int start_head; }; + loff_t xarray_start; }; }; @@ -80,6 +84,11 @@ static inline bool iov_iter_is_discard(const struct iov_iter *i) return iov_iter_type(i) == ITER_DISCARD; } +static inline bool iov_iter_is_xarray(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_XARRAY; +} + static inline unsigned char iov_iter_rw(const struct iov_iter *i) { return i->type & (READ | WRITE); @@ -221,6 +230,8 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe, size_t count); void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); +void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, + loff_t start, size_t count); ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, -- cgit v1.2.3 From 73e10ded33a1cfc0c72404aaedc493e9813b6239 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 10 Feb 2020 10:00:21 +0000 Subject: mm: Add set/end/wait functions for PG_private_2 Add three functions to manipulate PG_private_2: (*) set_page_private_2() - Set the flag and take an appropriate reference on the flagged page. (*) end_page_private_2() - Clear the flag, drop the reference and wake up any waiters, somewhat analogously with end_page_writeback(). (*) wait_on_page_private_2() - Wait for the flag to be cleared. Wrappers will need to be placed in the netfs lib header in the patch that adds that. [This implements a suggestion by Linus[1] to not mix the terminology of PG_private_2 and PG_fscache in the mm core function] Changes: v7: - Use compound_head() in all the functions to make them THP safe[6]. v5: - Add set and end functions, calling the end function end rather than unlock[3]. - Keep a ref on the page when PG_private_2 is set[4][5]. v4: - Remove extern from the declaration[2]. Suggested-by: Linus Torvalds Signed-off-by: David Howells Reviewed-by: Matthew Wilcox (Oracle) Tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Alexander Viro cc: Christoph Hellwig cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/1330473.1612974547@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/CAHk-=wjgA-74ddehziVk=XAEMTKswPu1Yw4uaro1R3ibs27ztw@mail.gmail.com/ [1] Link: https://lore.kernel.org/r/20210216102659.GA27714@lst.de/ [2] Link: https://lore.kernel.org/r/161340387944.1303470.7944159520278177652.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539528910.286939.1252328699383291173.stgit@warthog.procyon.org.uk # v4 Link: https://lore.kernel.org/r/20210321105309.GG3420@casper.infradead.org [3] Link: https://lore.kernel.org/r/CAHk-=wh+2gbF7XEjYc=HV9w_2uVzVf7vs60BPz0gFA=+pUm3ww@mail.gmail.com/ [4] Link: https://lore.kernel.org/r/CAHk-=wjSGsRj7xwhSMQ6dAQiz53xA39pOG+XA_WeTgwBBu4uqg@mail.gmail.com/ [5] Link: https://lore.kernel.org/r/20210408145057.GN2531743@casper.infradead.org/ [6] Link: https://lore.kernel.org/r/161653788200.2770958.9517755716374927208.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789066013.6155.9816857201817288382.stgit@warthog.procyon.org.uk/ # v6 --- include/linux/pagemap.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 8c9947fd62f3..bb4433c98d02 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -688,6 +688,26 @@ void wait_for_stable_page(struct page *page); void page_endio(struct page *page, bool is_write, int err); +/** + * set_page_private_2 - Set PG_private_2 on a page and take a ref + * @page: The page. + * + * Set the PG_private_2 flag on a page and take the reference needed for the VM + * to handle its lifetime correctly. This sets the flag and takes the + * reference unconditionally, so care must be taken not to set the flag again + * if it's already set. + */ +static inline void set_page_private_2(struct page *page) +{ + page = compound_head(page); + get_page(page); + SetPagePrivate2(page); +} + +void end_page_private_2(struct page *page); +void wait_on_page_private_2(struct page *page); +int wait_on_page_private_2_killable(struct page *page); + /* * Add an arbitrary waiter to a page's wait queue */ -- cgit v1.2.3 From fcd9ae4f7f3b5fbd549285bab0478a339113620e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 7 Apr 2021 21:18:55 +0100 Subject: mm/filemap: Pass the file_ra_state in the ractl For readahead_expand(), we need to modify the file ra_state, so pass it down by adding it to the ractl. We have to do this because it's not always the same as f_ra in the struct file that is already being passed. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: David Howells Tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne Link: https://lore.kernel.org/r/20210407201857.3582797-2-willy@infradead.org/ Link: https://lore.kernel.org/r/161789067431.6155.8063840447229665720.stgit@warthog.procyon.org.uk/ # v6 --- include/linux/pagemap.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index bb4433c98d02..4220ded38f4b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -812,20 +812,23 @@ static inline int add_to_page_cache(struct page *page, * @file: The file, used primarily by network filesystems for authentication. * May be NULL if invoked internally by the filesystem. * @mapping: Readahead this filesystem object. + * @ra: File readahead state. May be NULL. */ struct readahead_control { struct file *file; struct address_space *mapping; + struct file_ra_state *ra; /* private: use the readahead_* accessors instead */ pgoff_t _index; unsigned int _nr_pages; unsigned int _batch_count; }; -#define DEFINE_READAHEAD(rac, f, m, i) \ - struct readahead_control rac = { \ +#define DEFINE_READAHEAD(ractl, f, r, m, i) \ + struct readahead_control ractl = { \ .file = f, \ .mapping = m, \ + .ra = r, \ ._index = i, \ } @@ -833,10 +836,9 @@ struct readahead_control { void page_cache_ra_unbounded(struct readahead_control *, unsigned long nr_to_read, unsigned long lookahead_count); -void page_cache_sync_ra(struct readahead_control *, struct file_ra_state *, +void page_cache_sync_ra(struct readahead_control *, unsigned long req_count); +void page_cache_async_ra(struct readahead_control *, struct page *, unsigned long req_count); -void page_cache_async_ra(struct readahead_control *, struct file_ra_state *, - struct page *, unsigned long req_count); /** * page_cache_sync_readahead - generic file readahead @@ -856,8 +858,8 @@ void page_cache_sync_readahead(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t index, unsigned long req_count) { - DEFINE_READAHEAD(ractl, file, mapping, index); - page_cache_sync_ra(&ractl, ra, req_count); + DEFINE_READAHEAD(ractl, file, ra, mapping, index); + page_cache_sync_ra(&ractl, req_count); } /** @@ -879,8 +881,8 @@ void page_cache_async_readahead(struct address_space *mapping, struct file_ra_state *ra, struct file *file, struct page *page, pgoff_t index, unsigned long req_count) { - DEFINE_READAHEAD(ractl, file, mapping, index); - page_cache_async_ra(&ractl, ra, page, req_count); + DEFINE_READAHEAD(ractl, file, ra, mapping, index); + page_cache_async_ra(&ractl, page, req_count); } /** -- cgit v1.2.3 From c790fbf20a53e8297c97ddb1c0c9d41c060067f3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 7 Apr 2021 21:18:56 +0100 Subject: fs: Document file_ra_state Turn the comments into kernel-doc and improve the wording slightly. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: David Howells Tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne Link: https://lore.kernel.org/r/20210407201857.3582797-3-willy@infradead.org/ Link: https://lore.kernel.org/r/161789068619.6155.1397999970593531574.stgit@warthog.procyon.org.uk/ # v6 --- include/linux/fs.h | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ec8f3ddf4a6a..33831a8bda52 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -891,18 +891,22 @@ struct fown_struct { int signum; /* posix.1b rt signal to be delivered on IO */ }; -/* - * Track a single file's readahead state +/** + * struct file_ra_state - Track a file's readahead state. + * @start: Where the most recent readahead started. + * @size: Number of pages read in the most recent readahead. + * @async_size: Start next readahead when this many pages are left. + * @ra_pages: Maximum size of a readahead request. + * @mmap_miss: How many mmap accesses missed in the page cache. + * @prev_pos: The last byte in the most recent read request. */ struct file_ra_state { - pgoff_t start; /* where readahead started */ - unsigned int size; /* # of readahead pages */ - unsigned int async_size; /* do asynchronous readahead when - there are only # of pages ahead */ - - unsigned int ra_pages; /* Maximum readahead window */ - unsigned int mmap_miss; /* Cache miss stat for mmap accesses */ - loff_t prev_pos; /* Cache last read() position */ + pgoff_t start; + unsigned int size; + unsigned int async_size; + unsigned int ra_pages; + unsigned int mmap_miss; + loff_t prev_pos; }; /* -- cgit v1.2.3 From 3ca236440126f75c91281c53f137794b8d5f884a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 Sep 2020 14:03:27 +0100 Subject: mm: Implement readahead_control pageset expansion Provide a function, readahead_expand(), that expands the set of pages specified by a readahead_control object to encompass a revised area with a proposed size and length. The proposed area must include all of the old area and may be expanded yet more by this function so that the edges align on (transparent huge) page boundaries as allocated. The expansion will be cut short if a page already exists in either of the areas being expanded into. Note that any expansion made in such a case is not rolled back. This will be used by fscache so that reads can be expanded to cache granule boundaries, thereby allowing whole granules to be stored in the cache, but there are other potential users also. Changes: v6: - Fold in a patch from Matthew Wilcox to tell the ondemand readahead algorithm about the expansion so that the next readahead starts at the right place[2]. v4: - Moved the declaration of readahead_expand() to a better place[1]. Suggested-by: Matthew Wilcox (Oracle) Signed-off-by: David Howells Reviewed-by: Matthew Wilcox (Oracle) Tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Alexander Viro cc: Christoph Hellwig cc: Mike Marshall cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20210217161358.GM2858050@casper.infradead.org/ [1] Link: https://lore.kernel.org/r/20210407201857.3582797-4-willy@infradead.org/ [2] Link: https://lore.kernel.org/r/159974633888.2094769.8326206446358128373.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/160588479816.3465195.553952688795241765.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118131787.1232039.4863969952441067985.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161028670.2537118.13831420617039766044.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340389201.1303470.14353807284546854878.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539530488.286939.18085961677838089157.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653789422.2770958.2108046612147345000.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789069829.6155.4295672417565512161.stgit@warthog.procyon.org.uk/ # v6 --- include/linux/pagemap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4220ded38f4b..63ca6430aef5 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -839,6 +839,8 @@ void page_cache_ra_unbounded(struct readahead_control *, void page_cache_sync_ra(struct readahead_control *, unsigned long req_count); void page_cache_async_ra(struct readahead_control *, struct page *, unsigned long req_count); +void readahead_expand(struct readahead_control *ractl, + loff_t new_start, size_t new_len); /** * page_cache_sync_readahead - generic file readahead -- cgit v1.2.3 From b533a83f2bf97c22ab862a7493d13d80c93696f0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 15 Feb 2021 13:23:33 +0000 Subject: netfs, mm: Move PG_fscache helper funcs to linux/netfs.h Move the PG_fscache related helper funcs (such as SetPageFsCache()) to linux/netfs.h rather than linux/fscache.h as the intention is to move to a model where they're used by the network filesystem and the helper library, but not by fscache/cachefiles itself. Signed-off-by: David Howells Tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Matthew Wilcox cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/161340392347.1303470.18065131603507621762.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539534516.286939.6265142985563005000.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653792959.2770958.5386546945273988117.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789073997.6155.18442271115255650614.stgit@warthog.procyon.org.uk/ # v6 --- include/linux/fscache.h | 11 +---------- include/linux/netfs.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 10 deletions(-) create mode 100644 include/linux/netfs.h (limited to 'include/linux') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index a1c928fe98e7..1f8dc72369ee 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -19,6 +19,7 @@ #include #include #include +#include #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) #define fscache_available() (1) @@ -29,16 +30,6 @@ #endif -/* - * overload PG_private_2 to give us PG_fscache - this is used to indicate that - * a page is currently backed by a local disk cache - */ -#define PageFsCache(page) PagePrivate2((page)) -#define SetPageFsCache(page) SetPagePrivate2((page)) -#define ClearPageFsCache(page) ClearPagePrivate2((page)) -#define TestSetPageFsCache(page) TestSetPagePrivate2((page)) -#define TestClearPageFsCache(page) TestClearPagePrivate2((page)) - /* pattern used to fill dead space in an index entry */ #define FSCACHE_INDEX_DEADFILL_PATTERN 0x79 diff --git a/include/linux/netfs.h b/include/linux/netfs.h new file mode 100644 index 000000000000..cc1102040488 --- /dev/null +++ b/include/linux/netfs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Network filesystem support services. + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * See: + * + * Documentation/filesystems/netfs_library.rst + * + * for a description of the network filesystem interface declared here. + */ + +#ifndef _LINUX_NETFS_H +#define _LINUX_NETFS_H + +#include + +/* + * Overload PG_private_2 to give us PG_fscache - this is used to indicate that + * a page is currently backed by a local disk cache + */ +#define PageFsCache(page) PagePrivate2((page)) +#define SetPageFsCache(page) SetPagePrivate2((page)) +#define ClearPageFsCache(page) ClearPagePrivate2((page)) +#define TestSetPageFsCache(page) TestSetPagePrivate2((page)) +#define TestClearPageFsCache(page) TestClearPagePrivate2((page)) + +#endif /* _LINUX_NETFS_H */ -- cgit v1.2.3 From 99bff93c17c05470196b2c4e699c3e58d327022b Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 15 Feb 2021 13:20:46 +0000 Subject: netfs, mm: Add set/end/wait_on_page_fscache() aliases Add set/end/wait_on_page_fscache() as aliases of set/end/wait_page_private_2(). These allow a page to marked with PG_fscache, the flag to be removed and waiters woken and waiting for the flag to be cleared. A ref on the page is also taken and dropped. [Linus suggested putting the fscache-themed functions into the caching-specific headers rather than pagemap.h[1]] Changes: v5: - Mirror the changes to the core routines[2]. Signed-off-by: David Howells Tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Linus Torvalds cc: Matthew Wilcox cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/1330473.1612974547@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/CAHk-=wjgA-74ddehziVk=XAEMTKswPu1Yw4uaro1R3ibs27ztw@mail.gmail.com/ [1] Link: https://lore.kernel.org/r/161340393568.1303470.4997526899111310530.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539536093.286939.5076448803512118764.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/2499407.1616505440@warthog.procyon.org.uk/ [2] Link: https://lore.kernel.org/r/161653793873.2770958.12157243390965814502.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789075327.6155.7432127924219092385.stgit@warthog.procyon.org.uk/ # v6 --- include/linux/netfs.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index cc1102040488..8479d63406f7 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -26,4 +26,61 @@ #define TestSetPageFsCache(page) TestSetPagePrivate2((page)) #define TestClearPageFsCache(page) TestClearPagePrivate2((page)) +/** + * set_page_fscache - Set PG_fscache on a page and take a ref + * @page: The page. + * + * Set the PG_fscache (PG_private_2) flag on a page and take the reference + * needed for the VM to handle its lifetime correctly. This sets the flag and + * takes the reference unconditionally, so care must be taken not to set the + * flag again if it's already set. + */ +static inline void set_page_fscache(struct page *page) +{ + set_page_private_2(page); +} + +/** + * end_page_fscache - Clear PG_fscache and release any waiters + * @page: The page + * + * Clear the PG_fscache (PG_private_2) bit on a page and wake up any sleepers + * waiting for this. The page ref held for PG_private_2 being set is released. + * + * This is, for example, used when a netfs page is being written to a local + * disk cache, thereby allowing writes to the cache for the same page to be + * serialised. + */ +static inline void end_page_fscache(struct page *page) +{ + end_page_private_2(page); +} + +/** + * wait_on_page_fscache - Wait for PG_fscache to be cleared on a page + * @page: The page to wait on + * + * Wait for PG_fscache (aka PG_private_2) to be cleared on a page. + */ +static inline void wait_on_page_fscache(struct page *page) +{ + wait_on_page_private_2(page); +} + +/** + * wait_on_page_fscache_killable - Wait for PG_fscache to be cleared on a page + * @page: The page to wait on + * + * Wait for PG_fscache (aka PG_private_2) to be cleared on a page or until a + * fatal signal is received by the calling task. + * + * Return: + * - 0 if successful. + * - -EINTR if a fatal signal was encountered. + */ +static inline int wait_on_page_fscache_killable(struct page *page) +{ + return wait_on_page_private_2_killable(page); +} + #endif /* _LINUX_NETFS_H */ -- cgit v1.2.3 From 3d3c95046742e4eebaa4b891b0b01cbbed94ebbd Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 13 May 2020 17:41:20 +0100 Subject: netfs: Provide readahead and readpage netfs helpers Add a pair of helper functions: (*) netfs_readahead() (*) netfs_readpage() to do the work of handling a readahead or a readpage, where the page(s) that form part of the request may be split between the local cache, the server or just require clearing, and may be single pages and transparent huge pages. This is all handled within the helper. Note that while both will read from the cache if there is data present, only netfs_readahead() will expand the request beyond what it was asked to do, and only netfs_readahead() will write back to the cache. netfs_readpage(), on the other hand, is synchronous and only fetches the page (which might be a THP) it is asked for. The netfs gives the helper parameters from the VM, the cache cookie it wants to use (or NULL) and a table of operations (only one of which is mandatory): (*) expand_readahead() [optional] Called to allow the netfs to request an expansion of a readahead request to meet its own alignment requirements. This is done by changing rreq->start and rreq->len. (*) clamp_length() [optional] Called to allow the netfs to cut down a subrequest to meet its own boundary requirements. If it does this, the helper will generate additional subrequests until the full request is satisfied. (*) is_still_valid() [optional] Called to find out if the data just read from the cache has been invalidated and must be reread from the server. (*) issue_op() [required] Called to ask the netfs to issue a read to the server. The subrequest describes the read. The read request holds information about the file being accessed. The netfs can cache information in rreq->netfs_priv. Upon completion, the netfs should set the error, transferred and can also set FSCACHE_SREQ_CLEAR_TAIL and then call fscache_subreq_terminated(). (*) done() [optional] Called after the pages have been unlocked. The read request is still pinning the file and mapping and may still be pinning pages with PG_fscache. rreq->error indicates any error that has been accumulated. (*) cleanup() [optional] Called when the helper is disposing of a finished read request. This allows the netfs to clear rreq->netfs_priv. Netfs support is enabled with CONFIG_NETFS_SUPPORT=y. It will be built even if CONFIG_FSCACHE=n and in this case much of it should be optimised away, allowing the filesystem to use it even when caching is disabled. Changes: v5: - Comment why netfs_readahead() is putting pages[2]. - Use page_file_mapping() rather than page->mapping[2]. - Use page_index() rather than page->index[2]. - Use set_page_fscache()[3] rather then SetPageFsCache() as this takes an appropriate ref too[4]. v4: - Folded in a kerneldoc comment fix. - Folded in a fix for the error handling in the case that ENOMEM occurs. - Added flag to netfs_subreq_terminated() to indicate that the caller may have been running async and stuff that might sleep needs punting to a workqueue (can't use in_softirq()[1]). Signed-off-by: David Howells Reviewed-and-tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Matthew Wilcox cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20210216084230.GA23669@lst.de/ [1] Link: https://lore.kernel.org/r/20210321014202.GF3420@casper.infradead.org/ [2] Link: https://lore.kernel.org/r/2499407.1616505440@warthog.procyon.org.uk/ [3] Link: https://lore.kernel.org/r/CAHk-=wh+2gbF7XEjYc=HV9w_2uVzVf7vs60BPz0gFA=+pUm3ww@mail.gmail.com/ [4] Link: https://lore.kernel.org/r/160588497406.3465195.18003475695899726222.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118136849.1232039.8923686136144228724.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161032290.2537118.13400578415247339173.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340394873.1303470.6237319335883242536.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539537375.286939.16642940088716990995.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653795430.2770958.4947584573720000554.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789076581.6155.6745849361504760209.stgit@warthog.procyon.org.uk/ # v6 --- include/linux/netfs.h | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 8479d63406f7..59e926e62d2e 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -14,6 +14,8 @@ #ifndef _LINUX_NETFS_H #define _LINUX_NETFS_H +#include +#include #include /* @@ -83,4 +85,85 @@ static inline int wait_on_page_fscache_killable(struct page *page) return wait_on_page_private_2_killable(page); } +enum netfs_read_source { + NETFS_FILL_WITH_ZEROES, + NETFS_DOWNLOAD_FROM_SERVER, + NETFS_READ_FROM_CACHE, + NETFS_INVALID_READ, +} __mode(byte); + +/* + * Descriptor for a single component subrequest. + */ +struct netfs_read_subrequest { + struct netfs_read_request *rreq; /* Supervising read request */ + struct list_head rreq_link; /* Link in rreq->subrequests */ + loff_t start; /* Where to start the I/O */ + size_t len; /* Size of the I/O */ + size_t transferred; /* Amount of data transferred */ + refcount_t usage; + short error; /* 0 or error that occurred */ + unsigned short debug_index; /* Index in list (for debugging output) */ + enum netfs_read_source source; /* Where to read from */ + unsigned long flags; +#define NETFS_SREQ_WRITE_TO_CACHE 0 /* Set if should write to cache */ +#define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ +#define NETFS_SREQ_SHORT_READ 2 /* Set if there was a short read from the cache */ +#define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ +#define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */ +}; + +/* + * Descriptor for a read helper request. This is used to make multiple I/O + * requests on a variety of sources and then stitch the result together. + */ +struct netfs_read_request { + struct work_struct work; + struct inode *inode; /* The file being accessed */ + struct address_space *mapping; /* The mapping being accessed */ + struct list_head subrequests; /* Requests to fetch I/O from disk or net */ + void *netfs_priv; /* Private data for the netfs */ + unsigned int debug_id; + atomic_t nr_rd_ops; /* Number of read ops in progress */ + size_t submitted; /* Amount submitted for I/O so far */ + size_t len; /* Length of the request */ + short error; /* 0 or error that occurred */ + loff_t i_size; /* Size of the file */ + loff_t start; /* Start position */ + pgoff_t no_unlock_page; /* Don't unlock this page after read */ + refcount_t usage; + unsigned long flags; +#define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */ +#define NETFS_RREQ_WRITE_TO_CACHE 1 /* Need to write to the cache */ +#define NETFS_RREQ_NO_UNLOCK_PAGE 2 /* Don't unlock no_unlock_page on completion */ +#define NETFS_RREQ_DONT_UNLOCK_PAGES 3 /* Don't unlock the pages on completion */ +#define NETFS_RREQ_FAILED 4 /* The request failed */ +#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ + const struct netfs_read_request_ops *netfs_ops; +}; + +/* + * Operations the network filesystem can/must provide to the helpers. + */ +struct netfs_read_request_ops { + void (*init_rreq)(struct netfs_read_request *rreq, struct file *file); + void (*expand_readahead)(struct netfs_read_request *rreq); + bool (*clamp_length)(struct netfs_read_subrequest *subreq); + void (*issue_op)(struct netfs_read_subrequest *subreq); + bool (*is_still_valid)(struct netfs_read_request *rreq); + void (*done)(struct netfs_read_request *rreq); + void (*cleanup)(struct address_space *mapping, void *netfs_priv); +}; + +struct readahead_control; +extern void netfs_readahead(struct readahead_control *, + const struct netfs_read_request_ops *, + void *); +extern int netfs_readpage(struct file *, + struct page *, + const struct netfs_read_request_ops *, + void *); + +extern void netfs_subreq_terminated(struct netfs_read_subrequest *, ssize_t, bool); + #endif /* _LINUX_NETFS_H */ -- cgit v1.2.3 From 77b4d2c6316ab096e3f77eea240144941434f2a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 18 Sep 2020 09:25:13 +0100 Subject: netfs: Add tracepoints Add three tracepoints to track the activity of the read helpers: (1) netfs/netfs_read This logs entry to the read helpers and also expansion of the range in a readahead request. (2) netfs/netfs_rreq This logs the progress of netfs_read_request objects which track read requests. A read request may be a compound of multiple subrequests. (3) netfs/netfs_sreq This logs the progress of netfs_read_subrequest objects, which track the contributions from various sources to a read request. Signed-off-by: David Howells Reviewed-and-tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Matthew Wilcox cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/161118138060.1232039.5353374588021776217.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161033468.2537118.14021843889844001905.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340395843.1303470.7355519662919639648.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539538693.286939.10171713520419106334.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653796447.2770958.1870655382450862155.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789078003.6155.17814844411672989942.stgit@warthog.procyon.org.uk/ # v6 --- include/linux/netfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 59e926e62d2e..8e8c6a4e4dde 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -124,6 +124,7 @@ struct netfs_read_request { struct list_head subrequests; /* Requests to fetch I/O from disk or net */ void *netfs_priv; /* Private data for the netfs */ unsigned int debug_id; + unsigned int cookie_debug_id; atomic_t nr_rd_ops; /* Number of read ops in progress */ size_t submitted; /* Amount submitted for I/O so far */ size_t len; /* Length of the request */ -- cgit v1.2.3 From 289af54cc67ace285b6d4335a54324562894c4e2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 3 Nov 2020 11:32:41 +0000 Subject: netfs: Gather stats Gather statistics from the netfs interface that can be exported through a seqfile. This is intended to be called by a later patch when viewing /proc/fs/fscache/stats. Signed-off-by: David Howells Reviewed-and-tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Matthew Wilcox cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/161118139247.1232039.10556850937548511068.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161034669.2537118.2761232524997091480.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340397101.1303470.17581910581108378458.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539539959.286939.6794352576462965914.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653797700.2770958.5801990354413178228.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789079281.6155.17141344853277186500.stgit@warthog.procyon.org.uk/ # v6 --- include/linux/netfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 8e8c6a4e4dde..db4af80cbae3 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -166,5 +166,6 @@ extern int netfs_readpage(struct file *, void *); extern void netfs_subreq_terminated(struct netfs_read_subrequest *, ssize_t, bool); +extern void netfs_stats_show(struct seq_file *); #endif /* _LINUX_NETFS_H */ -- cgit v1.2.3 From e1b1240c1ff5f8bfba797f14996d8bac8a9ec437 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Sep 2020 11:06:07 +0100 Subject: netfs: Add write_begin helper Add a helper to do the pre-reading work for the netfs write_begin address space op. Changes v6: - Fixed a missing rreq put in netfs_write_begin()[3]. - Use DEFINE_READAHEAD()[4]. v5: - Made the wait for PG_fscache in netfs_write_begin() killable[2]. v4: - Added flag to netfs_subreq_terminated() to indicate that the caller may have been running async and stuff that might sleep needs punting to a workqueue (can't use in_softirq()[1]). Signed-off-by: David Howells Reviewed-and-tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Matthew Wilcox cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20210216084230.GA23669@lst.de/ [1] Link: https://lore.kernel.org/r/2499407.1616505440@warthog.procyon.org.uk/ [2] Link: https://lore.kernel.org/r/161781042127.463527.9154479794406046987.stgit@warthog.procyon.org.uk/ [3] Link: https://lore.kernel.org/r/1234933.1617886271@warthog.procyon.org.uk/ [4] Link: https://lore.kernel.org/r/160588543960.3465195.2792938973035886168.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118140165.1232039.16418853874312234477.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161035539.2537118.15674887534950908530.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340398368.1303470.11242918276563276090.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539541541.286939.1889738674057013729.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653798616.2770958.17213315845968485563.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789080530.6155.1011847312392330491.stgit@warthog.procyon.org.uk/ # v6 --- include/linux/netfs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index db4af80cbae3..99659ed9524e 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -147,11 +147,14 @@ struct netfs_read_request { * Operations the network filesystem can/must provide to the helpers. */ struct netfs_read_request_ops { + bool (*is_cache_enabled)(struct inode *inode); void (*init_rreq)(struct netfs_read_request *rreq, struct file *file); void (*expand_readahead)(struct netfs_read_request *rreq); bool (*clamp_length)(struct netfs_read_subrequest *subreq); void (*issue_op)(struct netfs_read_subrequest *subreq); bool (*is_still_valid)(struct netfs_read_request *rreq); + int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, + struct page *page, void **_fsdata); void (*done)(struct netfs_read_request *rreq); void (*cleanup)(struct address_space *mapping, void *netfs_priv); }; @@ -164,6 +167,11 @@ extern int netfs_readpage(struct file *, struct page *, const struct netfs_read_request_ops *, void *); +extern int netfs_write_begin(struct file *, struct address_space *, + loff_t, unsigned int, unsigned int, struct page **, + void **, + const struct netfs_read_request_ops *, + void *); extern void netfs_subreq_terminated(struct netfs_read_subrequest *, ssize_t, bool); extern void netfs_stats_show(struct seq_file *); -- cgit v1.2.3 From 726218fdc22c9b52f16e1228499a804bbf262a20 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Feb 2020 14:22:24 +0000 Subject: netfs: Define an interface to talk to a cache Add an interface to the netfs helper library for reading data from the cache instead of downloading it from the server and support for writing data just downloaded or cleared to the cache. The API passes an iov_iter to the cache read/write routines to indicate the data/buffer to be used. This is done using the ITER_XARRAY type to provide direct access to the netfs inode's pagecache. When the netfs's ->begin_cache_operation() method is called, this must fill in the cache_resources in the netfs_read_request struct, including the netfs_cache_ops used by the helper lib to talk to the cache. The helper lib does not directly access the cache. Changes: v6: - Call trace_netfs_read() after beginning the cache op so that the cookie debug ID can be logged[3]. - Don't record the error from writing to the cache. We don't want to pass it back to the netfs[4]. - Fix copy-to-cache subreq amalgamation to not round up as it goes along otherwise it overcalculates the length of the write[5]. v5: - Use end_page_fscache() rather than unlock_page_fscache()[2]. v4: - Added flag to netfs_subreq_terminated() to indicate that the caller may have been running async and stuff that might sleep needs punting to a workqueue (can't use in_softirq()[1]). - Add missing inc of netfs_n_rh_read stat. - Move initial definition of fscache_begin_read_operation() elsewhere. - Need to call op->begin_cache_operation() from netfs_write_begin(). Signed-off-by: David Howells Reviewed-and-tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Matthew Wilcox cc: linux-mm@kvack.org cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20210216084230.GA23669@lst.de/ [1] Link: https://lore.kernel.org/r/2499407.1616505440@warthog.procyon.org.uk/ [2] Link: https://lore.kernel.org/r/161781045123.463527.14533348855710902201.stgit@warthog.procyon.org.uk/ [3] Link: https://lore.kernel.org/r/161781046256.463527.18158681600085556192.stgit@warthog.procyon.org.uk/ [4] Link: https://lore.kernel.org/r/161781047695.463527.7463536103593997492.stgit@warthog.procyon.org.uk/ [5] Link: https://lore.kernel.org/r/161118141321.1232039.8296910406755622458.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161036700.2537118.11170748455436854978.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340399569.1303470.1138884774643385730.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539542874.286939.13337898213448136687.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653799826.2770958.9015430297426331950.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789081462.6155.3853904866933313256.stgit@warthog.procyon.org.uk/ # v6 --- include/linux/netfs.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 99659ed9524e..9062adfa2fb9 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -92,6 +92,18 @@ enum netfs_read_source { NETFS_INVALID_READ, } __mode(byte); +typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, + bool was_async); + +/* + * Resources required to do operations on a cache. + */ +struct netfs_cache_resources { + const struct netfs_cache_ops *ops; + void *cache_priv; + void *cache_priv2; +}; + /* * Descriptor for a single component subrequest. */ @@ -121,11 +133,13 @@ struct netfs_read_request { struct work_struct work; struct inode *inode; /* The file being accessed */ struct address_space *mapping; /* The mapping being accessed */ + struct netfs_cache_resources cache_resources; struct list_head subrequests; /* Requests to fetch I/O from disk or net */ void *netfs_priv; /* Private data for the netfs */ unsigned int debug_id; unsigned int cookie_debug_id; atomic_t nr_rd_ops; /* Number of read ops in progress */ + atomic_t nr_wr_ops; /* Number of write ops in progress */ size_t submitted; /* Amount submitted for I/O so far */ size_t len; /* Length of the request */ short error; /* 0 or error that occurred */ @@ -149,6 +163,7 @@ struct netfs_read_request { struct netfs_read_request_ops { bool (*is_cache_enabled)(struct inode *inode); void (*init_rreq)(struct netfs_read_request *rreq, struct file *file); + int (*begin_cache_operation)(struct netfs_read_request *rreq); void (*expand_readahead)(struct netfs_read_request *rreq); bool (*clamp_length)(struct netfs_read_subrequest *subreq); void (*issue_op)(struct netfs_read_subrequest *subreq); @@ -159,6 +174,46 @@ struct netfs_read_request_ops { void (*cleanup)(struct address_space *mapping, void *netfs_priv); }; +/* + * Table of operations for access to a cache. This is obtained by + * rreq->ops->begin_cache_operation(). + */ +struct netfs_cache_ops { + /* End an operation */ + void (*end_operation)(struct netfs_cache_resources *cres); + + /* Read data from the cache */ + int (*read)(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + bool seek_data, + netfs_io_terminated_t term_func, + void *term_func_priv); + + /* Write data to the cache */ + int (*write)(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv); + + /* Expand readahead request */ + void (*expand_readahead)(struct netfs_cache_resources *cres, + loff_t *_start, size_t *_len, loff_t i_size); + + /* Prepare a read operation, shortening it to a cached/uncached + * boundary as appropriate. + */ + enum netfs_read_source (*prepare_read)(struct netfs_read_subrequest *subreq, + loff_t i_size); + + /* Prepare a write operation, working out what part of the write we can + * actually do. + */ + int (*prepare_write)(struct netfs_cache_resources *cres, + loff_t *_start, size_t *_len, loff_t i_size); +}; + struct readahead_control; extern void netfs_readahead(struct readahead_control *, const struct netfs_read_request_ops *, -- cgit v1.2.3 From 26aaeffcafe6cbb7c3978fa6ed7555122f8c9f8c Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 22 Feb 2021 11:39:47 +0000 Subject: fscache, cachefiles: Add alternate API to use kiocb for read/write to cache Add an alternate API by which the cache can be accessed through a kiocb, doing async DIO, rather than using the current API that tells the cache where all the pages are. The new API is intended to be used in conjunction with the netfs helper library. A filesystem must pick one or the other and not mix them. Filesystems wanting to use the new API must #define FSCACHE_USE_NEW_IO_API before #including the header. This prevents them from continuing to use the old API at the same time as there are incompatibilities in how the PG_fscache page bit is used. Changes: v6: - Provide a routine to shape a write so that the start and length can be aligned for DIO[3]. v4: - Use the vfs_iocb_iter_read/write() helpers[1] - Move initial definition of fscache_begin_read_operation() here. - Remove a commented-out line[2] - Combine ki->term_func calls in cachefiles_read_complete()[2]. - Remove explicit NULL initialiser[2]. - Remove extern on func decl[2]. - Put in param names on func decl[2]. - Remove redundant else[2]. - Fill out the kdoc comment for fscache_begin_read_operation(). - Rename fs/fscache/page2.c to io.c to match later patches. Signed-off-by: David Howells Reviewed-and-tested-by: Jeff Layton Tested-by: Dave Wysochanski Tested-By: Marc Dionne cc: Christoph Hellwig cc: linux-cachefs@redhat.com cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20210216102614.GA27555@lst.de/ [1] Link: https://lore.kernel.org/r/20210216084230.GA23669@lst.de/ [2] Link: https://lore.kernel.org/r/161781047695.463527.7463536103593997492.stgit@warthog.procyon.org.uk/ [3] Link: https://lore.kernel.org/r/161118142558.1232039.17993829899588971439.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161037850.2537118.8819808229350326503.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340402057.1303470.8038373593844486698.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539545919.286939.14573472672781434757.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653801477.2770958.10543270629064934227.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789084517.6155.12799689829859169640.stgit@warthog.procyon.org.uk/ # v6 --- include/linux/fscache-cache.h | 4 ++++ include/linux/fscache.h | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 3f0b19dcfae7..3235ddbdcc09 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -304,6 +304,10 @@ struct fscache_cache_ops { /* dissociate a cache from all the pages it was backing */ void (*dissociate_pages)(struct fscache_cache *cache); + + /* Begin a read operation for the netfs lib */ + int (*begin_read_operation)(struct netfs_read_request *rreq, + struct fscache_retrieval *op); }; extern struct fscache_cookie fscache_fsdef_index; diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 1f8dc72369ee..abc1c4737fb8 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -37,6 +37,7 @@ struct pagevec; struct fscache_cache_tag; struct fscache_cookie; struct fscache_netfs; +struct netfs_read_request; typedef void (*fscache_rw_complete_t)(struct page *page, void *context, @@ -191,6 +192,10 @@ extern void __fscache_update_cookie(struct fscache_cookie *, const void *); extern int __fscache_attr_changed(struct fscache_cookie *); extern void __fscache_invalidate(struct fscache_cookie *); extern void __fscache_wait_on_invalidate(struct fscache_cookie *); + +#ifdef FSCACHE_USE_NEW_IO_API +extern int __fscache_begin_read_operation(struct netfs_read_request *, struct fscache_cookie *); +#else extern int __fscache_read_or_alloc_page(struct fscache_cookie *, struct page *, fscache_rw_complete_t, @@ -214,6 +219,8 @@ extern void __fscache_uncache_all_inode_pages(struct fscache_cookie *, struct inode *); extern void __fscache_readpages_cancel(struct fscache_cookie *cookie, struct list_head *pages); +#endif /* FSCACHE_USE_NEW_IO_API */ + extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool); extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_t, bool (*)(void *), void *); @@ -498,6 +505,36 @@ int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size) return -ENOBUFS; } +#ifdef FSCACHE_USE_NEW_IO_API + +/** + * fscache_begin_read_operation - Begin a read operation for the netfs lib + * @rreq: The read request being undertaken + * @cookie: The cookie representing the cache object + * + * Begin a read operation on behalf of the netfs helper library. @rreq + * indicates the read request to which the operation state should be attached; + * @cookie indicates the cache object that will be accessed. + * + * This is intended to be called from the ->begin_cache_operation() netfs lib + * operation as implemented by the network filesystem. + * + * Returns: + * * 0 - Success + * * -ENOBUFS - No caching available + * * Other error code from the cache, such as -ENOMEM. + */ +static inline +int fscache_begin_read_operation(struct netfs_read_request *rreq, + struct fscache_cookie *cookie) +{ + if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) + return __fscache_begin_read_operation(rreq, cookie); + return -ENOBUFS; +} + +#else /* FSCACHE_USE_NEW_IO_API */ + /** * fscache_read_or_alloc_page - Read a page from the cache or allocate a block * in which to store it @@ -777,6 +814,8 @@ void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, __fscache_uncache_all_inode_pages(cookie, inode); } +#endif /* FSCACHE_USE_NEW_IO_API */ + /** * fscache_disable_cookie - Disable a cookie * @cookie: The cookie representing the cache object -- cgit v1.2.3 From a8ce7bd89689997537dd22dcbced46cf23dc19da Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 23 Apr 2021 13:45:24 +0200 Subject: regulator: core: Fix off_on_delay handling The jiffies-based off_on_delay implementation has a couple of problems that cause it to sometimes not actually delay for the required time: (1) If, for example, the off_on_delay time is equivalent to one jiffy, and the ->last_off_jiffy is set just before a new jiffy starts, then _regulator_do_enable() does not wait at all since it checks using time_before(). (2) When jiffies overflows, the value of "remaining" becomes higher than "max_delay" and the code simply proceeds without waiting. Fix these problems by changing it to use ktime_t instead. [Note that since jiffies doesn't start at zero but at INITIAL_JIFFIES ("-5 minutes"), (2) above also led to the code not delaying if the first regulator_enable() is called when the ->last_off_jiffy is not initialised, such as for regulators with ->constraints->boot_on set. It's not clear to me if this was intended or not, but I've preserved this behaviour explicitly with the check for a non-zero ->last_off.] Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20210423114524.26414-1-vincent.whitchurch@axis.com Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 597ed117086f..4ea520c248e9 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -476,7 +476,7 @@ struct regulator_dev { unsigned int is_switch:1; /* time when this regulator was disabled last time */ - unsigned long last_off_jiffy; + ktime_t last_off; }; struct regulator_dev * -- cgit v1.2.3 From 06ec5acc7747f225154fcafaf2afe52324694baa Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 2 Mar 2021 13:54:42 +0200 Subject: net/mlx5: E-Switch, Return eswitch max ports when eswitch is supported mlx5_eswitch_get_total_vports() doesn't honor MLX5_ESWICH Kconfig flag. When MLX5_ESWITCH is disabled, FS layer continues to initialize eswitch specific ACL namespaces. Instead, start honoring MLX5_ESWITCH flag and perform vport specific initialization only when vport count is non zero. Signed-off-by: Parav Pandit Reviewed-by: Roi Dayan Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 9cf1da2883c6..17109b65c1ac 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -65,8 +65,6 @@ struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, struct mlx5_eswitch_rep *rep, u32 sqn); -u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); - #ifdef CONFIG_MLX5_ESWITCH enum devlink_eswitch_encap_mode mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev); @@ -126,6 +124,8 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, #define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); +u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); + #else /* CONFIG_MLX5_ESWITCH */ static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev) @@ -162,10 +162,17 @@ mlx5_eswitch_get_vport_metadata_mask(void) { return 0; } + +static inline u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) +{ + return 0; +} + #endif /* CONFIG_MLX5_ESWITCH */ static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev) { return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS; } + #endif -- cgit v1.2.3 From 9f8c7100c8f9879b7e972205cd1f33f0bc1cc8cb Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 2 Mar 2021 14:10:49 +0200 Subject: net/mlx5: E-Switch, Prepare to return total vports from eswitch struct Total vports are already stored during eswitch initialization. Instead of calculating everytime, read directly from eswitch. Additionally, host PF's SF vport information is available using QUERY_HCA_CAP command. It is not available through HCA_CAP of the eswitch manager PF. Hence, this patch prepares the return total eswitch vport count from the existing eswitch struct. This further helps to keep eswitch port counting macros and logic within eswitch. Signed-off-by: Parav Pandit Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/vport.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 4db87bcfce7b..aad53cb72f17 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,14 +36,6 @@ #include #include -#define MLX5_VPORT_PF_PLACEHOLDER (1u) -#define MLX5_VPORT_UPLINK_PLACEHOLDER (1u) -#define MLX5_VPORT_ECPF_PLACEHOLDER(mdev) (mlx5_ecpf_vport_exists(mdev)) - -#define MLX5_SPECIAL_VPORTS(mdev) (MLX5_VPORT_PF_PLACEHOLDER + \ - MLX5_VPORT_UPLINK_PLACEHOLDER + \ - MLX5_VPORT_ECPF_PLACEHOLDER(mdev)) - #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ -- cgit v1.2.3 From 1fdd7433a98a2f5511f49ad3f3b82bdd6f77265c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 1 Apr 2021 16:27:23 -0700 Subject: kbuild: add an elfnote for whether vmlinux is built with lto Currently, clang LTO built vmlinux won't work with pahole. LTO introduced cross-cu dwarf tag references and broke current pahole model which handles one cu as a time. The solution is to merge all cu's as one pahole cu as in [1]. We would like to do this merging only if cross-cu dwarf references happens. The LTO build mode is a pretty good indication for that. In earlier version of this patch ([2]), clang flag -grecord-gcc-switches is proposed to add to compilation flags so pahole could detect "-flto" and then merging cu's. This will increate the binary size of 1% without LTO though. Arnaldo suggested to use a note to indicate the vmlinux is built with LTO. Such a cheap way to get whether the vmlinux is built with LTO or not helps pahole but is also useful for tracing as LTO may inline/delete/demote global functions, promote static functions, etc. So this patch added an elfnote with a new type LINUX_ELFNOTE_LTO_INFO. The owner of the note is "Linux". With gcc 8.4.1 and clang trunk, without LTO, I got $ readelf -n vmlinux Displaying notes found in: .notes Owner Data size Description ... Linux 0x00000004 func description data: 00 00 00 00 ... With "readelf -x ".notes" vmlinux", I can verify the above "func" with type code 0x101. With clang thin-LTO, I got the same as above except the following: description data: 01 00 00 00 which indicates the vmlinux is built with LTO. [1] https://lore.kernel.org/bpf/20210325065316.3121287-1-yhs@fb.com/ [2] https://lore.kernel.org/bpf/20210331001623.2778934-1-yhs@fb.com/ Suggested-by: Arnaldo Carvalho de Melo Signed-off-by: Yonghong Song Reviewed-by: Nick Desaulniers Tested-by: Sedat Dilek # LLVM/Clang v12.0.0-rc4 (x86-64) Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Masahiro Yamada --- include/linux/elfnote-lto.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/linux/elfnote-lto.h (limited to 'include/linux') diff --git a/include/linux/elfnote-lto.h b/include/linux/elfnote-lto.h new file mode 100644 index 000000000000..d4635a3ecc4f --- /dev/null +++ b/include/linux/elfnote-lto.h @@ -0,0 +1,14 @@ +#ifndef __ELFNOTE_LTO_H +#define __ELFNOTE_LTO_H + +#include + +#define LINUX_ELFNOTE_LTO_INFO 0x101 + +#ifdef CONFIG_LTO +#define BUILD_LTO_INFO ELFNOTE32("Linux", LINUX_ELFNOTE_LTO_INFO, 1) +#else +#define BUILD_LTO_INFO ELFNOTE32("Linux", LINUX_ELFNOTE_LTO_INFO, 0) +#endif + +#endif /* __ELFNOTE_LTO_H */ -- cgit v1.2.3 From 0e0345b77ac4605d5447b252d220e4a2ee118da7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 15 Apr 2021 20:36:07 +0300 Subject: kbuild: redo fake deps at include/config/*.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make include/config/foo/bar.h fake deps files generation simpler. * delete .h suffix those aren't header files, shorten filenames, * delete tolower() Linux filesystems can deal with both upper and lowercase filenames very well, * put everything in 1 directory Presumably 'mkdir -p' split is from dark times when filesystems handled huge directories badly, disks were round adding to seek times. x86_64 allmodconfig lists 12364 files in include/config. ../obj/include/config/ ├── 104_QUAD_8 ├── 60XX_WDT ├── 64BIT ... ├── ZSWAP_DEFAULT_ON ├── ZSWAP_ZPOOL_DEFAULT └── ZSWAP_ZPOOL_DEFAULT_ZBUD 0 directories, 12364 files Signed-off-by: Alexey Dobriyan Signed-off-by: Masahiro Yamada --- include/linux/compiler-version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-version.h b/include/linux/compiler-version.h index 2b2972c77c62..573fa85b6c0c 100644 --- a/include/linux/compiler-version.h +++ b/include/linux/compiler-version.h @@ -9,6 +9,6 @@ * This header exists to force full rebuild when the compiler is upgraded. * * When fixdep scans this, it will find this string "CONFIG_CC_VERSION_TEXT" - * and add dependency on include/config/cc/version/text.h, which is touched + * and add dependency on include/config/CC_VERSION_TEXT, which is touched * by Kconfig when the version string from the compiler changes. */ -- cgit v1.2.3 From 81dd4d4d6178306ab31db91bdc7353d485bdafce Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sat, 24 Apr 2021 10:04:15 -0500 Subject: dmaengine: idxd: Add IDXD performance monitor support Implement the IDXD performance monitor capability (named 'perfmon' in the DSA (Data Streaming Accelerator) spec [1]), which supports the collection of information about key events occurring during DSA and IAX (Intel Analytics Accelerator) device execution, to assist in performance tuning and debugging. The idxd perfmon support is implemented as part of the IDXD driver and interfaces with the Linux perf framework. It has several features in common with the existing uncore pmu support: - it does not support sampling - does not support per-thread counting However it also has some unique features not present in the core and uncore support: - all general-purpose counters are identical, thus no event constraints - operation is always system-wide While the core perf subsystem assumes that all counters are by default per-cpu, the uncore pmus are socket-scoped and use a cpu mask to restrict counting to one cpu from each socket. IDXD counters use a similar strategy but expand the scope even further; since IDXD counters are system-wide and can be read from any cpu, the IDXD perf driver picks a single cpu to do the work (with cpu hotplug notifiers to choose a different cpu if the chosen one is taken off-line). More specifically, the perf userspace tool by default opens a counter for each cpu for an event. However, if it finds a cpumask file associated with the pmu under sysfs, as is the case with the uncore pmus, it will open counters only on the cpus specified by the cpumask. Since perfmon only needs to open a single counter per event for a given IDXD device, the perfmon driver will create a sysfs cpumask file for the device and insert the first cpu of the system into it. When a user uses perf to open an event, perf will open a single counter on the cpu specified by the cpu mask. This amounts to the default system-wide rather than per-cpu counting mentioned previously for perfmon pmu events. In order to keep the cpu mask up-to-date, the driver implements cpu hotplug support for multiple devices, as IDXD usually enumerates and registers more than one idxd device. The perfmon driver implements basic perfmon hardware capability discovery and configuration, and is initialized by the IDXD driver's probe function. During initialization, the driver retrieves the total number of supported performance counters, the pmu ID, and the device type from idxd device, and registers itself under the Linux perf framework. The perf userspace tool can be used to monitor single or multiple events depending on the given configuration, as well as event groups, which are also supported by the perfmon driver. The user configures events using the perf tool command-line interface by specifying the event and corresponding event category, along with an optional set of filters that can be used to restrict counting to specific work queues, traffic classes, page and transfer sizes, and engines (See [1] for specifics). With the configuration specified by the user, the perf tool issues a system call passing that information to the kernel, which uses it to initialize the specified event(s). The event(s) are opened and started, and following termination of the perf command, they're stopped. At that point, the perfmon driver will read the latest count for the event(s), calculate the difference between the latest counter values and previously tracked counter values, and display the final incremental count as the event count for the cycle. An overflow handler registered on the IDXD irq path is used to account for counter overflows, which are signaled by an overflow interrupt. Below are a couple of examples of perf usage for monitoring DSA events. The following monitors all events in the 'engine' category. Becuuse no filters are specified, this captures all engine events for the workload, which in this case is 19 iterations of the work generated by the kernel dmatest module. Details describing the events can be found in Appendix D of [1], Performance Monitoring Events, but briefly they are: event 0x1: total input data processed, in 32-byte units event 0x2: total data written, in 32-byte units event 0x4: number of work descriptors that read the source event 0x8: number of work descriptors that write the destination event 0x10: number of work descriptors dispatched from batch descriptors event 0x20: number of work descriptors dispatched from work queues # perf stat -e dsa0/event=0x1,event_category=0x1/, dsa0/event=0x2,event_category=0x1/, dsa0/event=0x4,event_category=0x1/, dsa0/event=0x8,event_category=0x1/, dsa0/event=0x10,event_category=0x1/, dsa0/event=0x20,event_category=0x1/ modprobe dmatest channel=dma0chan0 timeout=2000 iterations=19 run=1 wait=1 Performance counter stats for 'system wide': 5,332 dsa0/event=0x1,event_category=0x1/ 5,327 dsa0/event=0x2,event_category=0x1/ 19 dsa0/event=0x4,event_category=0x1/ 19 dsa0/event=0x8,event_category=0x1/ 0 dsa0/event=0x10,event_category=0x1/ 19 dsa0/event=0x20,event_category=0x1/ 21.977436186 seconds time elapsed The command below illustrates filter usage with a simple example. It specifies that MEM_MOVE operations should be counted for the DSA device dsa0 (event 0x8 corresponds to the EV_MEM_MOVE event - Number of Memory Move Descriptors, which is part of event category 0x3 - Operations. The detailed category and event IDs are available in Appendix D, Performance Monitoring Events, of [1]). In addition to the event and event category, a number of filters are also specified (the detailed filter values are available in Chapter 6.4 (Filter Support) of [1]), which will restrict counting to only those events that meet all of the filter criteria. In this case, the filters specify that only MEM_MOVE operations that are serviced by work queue wq0 and specifically engine number engine0 and traffic class tc0 having sizes between 0 and 4k and page size of between 0 and 1G result in a counter hit; anything else will be filtered out and not appear in the final count. Note that filters are optional - any filter not specified is assumed to be all ones and will pass anything. # perf stat -e dsa0/filter_wq=0x1,filter_tc=0x1,filter_sz=0x7, filter_eng=0x1,event=0x8,event_category=0x3/ modprobe dmatest channel=dma0chan0 timeout=2000 iterations=19 run=1 wait=1 Performance counter stats for 'system wide': 19 dsa0/filter_wq=0x1,filter_tc=0x1,filter_sz=0x7, filter_eng=0x1,event=0x8,event_category=0x3/ 21.865914091 seconds time elapsed The output above reflects that the unspecified workload resulted in the counting of 19 MEM_MOVE operation events that met the filter criteria. [1]: https://software.intel.com/content/www/us/en/develop/download/intel-data-streaming-accelerator-preliminary-architecture-specification.html [ Based on work originally by Jing Lin. ] Reviewed-by: Dave Jiang Reviewed-by: Kan Liang Signed-off-by: Tom Zanussi Link: https://lore.kernel.org/r/0c5080a7d541904c4ad42b848c76a1ce056ddac7.1619276133.git.zanussi@kernel.org Signed-off-by: Vinod Koul --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index f14adb882338..264d911424c0 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -167,6 +167,7 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_RAPL_ONLINE, CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, + CPUHP_AP_PERF_X86_IDXD_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_CFD_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, -- cgit v1.2.3 From 4c95e0728eee33df6b029a5fca82a67daeca201e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Apr 2021 09:50:59 +0200 Subject: netfilter: ebtables: remove the 3 ebtables pointers from struct net ebtables stores the table internal data (what gets passed to the ebt_do_table() interpreter) in struct net. nftables keeps the internal interpreter format in pernet lists and passes it via the netfilter core infrastructure (priv pointer). Do the same for ebtables: the nf_hook_ops are duplicated via kmemdup, then the ops->priv pointer is set to the table that is being registered. After that, the netfilter core passes this table info to the hookfn. This allows to remove the pointers from struct net. Same pattern can be applied to ip/ip6/arptables. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 3a956145a25c..a8178253ce53 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -100,6 +100,7 @@ struct ebt_table { unsigned int valid_hooks); /* the data used by the kernel */ struct ebt_table_info *private; + struct nf_hook_ops *ops; struct module *me; }; @@ -108,11 +109,9 @@ struct ebt_table { extern int ebt_register_table(struct net *net, const struct ebt_table *table, - const struct nf_hook_ops *ops, - struct ebt_table **res); -extern void ebt_unregister_table(struct net *net, struct ebt_table *table); -void ebt_unregister_table_pre_exit(struct net *net, const char *tablename, - const struct nf_hook_ops *ops); + const struct nf_hook_ops *ops); +extern void ebt_unregister_table(struct net *net, const char *tablename); +void ebt_unregister_table_pre_exit(struct net *net, const char *tablename); extern unsigned int ebt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct ebt_table *table); -- cgit v1.2.3 From 7716bf090e97aec45e97907ec6a382e4610bdd8f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Apr 2021 09:51:00 +0200 Subject: netfilter: x_tables: remove ipt_unregister_table Its the same function as ipt_unregister_table_exit. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv4/ip_tables.h | 3 --- include/linux/netfilter_ipv6/ip6_tables.h | 2 -- 2 files changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index c4676d6feeff..9f440eb6cf6c 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -31,9 +31,6 @@ void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table, void ipt_unregister_table_exit(struct net *net, struct xt_table *table); -void ipt_unregister_table(struct net *net, struct xt_table *table, - const struct nf_hook_ops *ops); - /* Standard entry. */ struct ipt_standard { struct ipt_entry entry; diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 1547d5f9ae06..b88a27ce61b0 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -27,8 +27,6 @@ extern void *ip6t_alloc_initial_table(const struct xt_table *); int ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl, const struct nf_hook_ops *ops, struct xt_table **res); -void ip6t_unregister_table(struct net *net, struct xt_table *table, - const struct nf_hook_ops *ops); void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops); void ip6t_unregister_table_exit(struct net *net, struct xt_table *table); -- cgit v1.2.3 From 1ef4d6d1af2d0c0c7c9b391365a3894bea291e34 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Apr 2021 09:51:01 +0200 Subject: netfilter: x_tables: add xt_find_table This will be used to obtain the xt_table struct given address family and table name. Followup patches will reduce the number of direct accesses to the xt_table structures via net->ipv{4,6}.ip(6)table_{nat,mangle,...} pointers, then remove them. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 8ec48466410a..b2eec7de5280 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -322,6 +322,7 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision); int xt_find_revision(u8 af, const char *name, u8 revision, int target, int *err); +struct xt_table *xt_find_table(struct net *net, u8 af, const char *name); struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name); struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af, -- cgit v1.2.3 From 20a9df33594fe643f9cf46375a9243e3ab8ed3a6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Apr 2021 09:51:02 +0200 Subject: netfilter: iptables: unregister the tables by name xtables stores the xt_table structs in the struct net. This isn't needed anymore, the structures could be passed via the netfilter hook 'private' pointer to the hook functions, which would allow us to remove those pointers from struct net. As a first step, reduce the number of accesses to the net->ipv4.ip6table_{raw,filter,...} pointers. This allows the tables to get unregistered by name instead of having to pass the raw address. The xt_table structure cane looked up by name+address family instead. This patch is useless as-is (the backends still have the raw pointer address), but it lowers the bar to remove those. It also allows to put the 'was table registered in the first place' check into ip_tables.c rather than have it in each table sub module. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv4/ip_tables.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 9f440eb6cf6c..73bcf7f261d2 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -26,10 +26,10 @@ int ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl, const struct nf_hook_ops *ops, struct xt_table **res); -void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table, - const struct nf_hook_ops *ops); +void ipt_unregister_table_pre_exit(struct net *net, const char *name, + const struct nf_hook_ops *ops); -void ipt_unregister_table_exit(struct net *net, struct xt_table *table); +void ipt_unregister_table_exit(struct net *net, const char *name); /* Standard entry. */ struct ipt_standard { -- cgit v1.2.3 From 6c0717545f2ca61c95f5f739da845e77cc8bd498 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Apr 2021 09:51:03 +0200 Subject: netfilter: ip6tables: unregister the tables by name Same as the previous patch, but for ip6tables. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6/ip6_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index b88a27ce61b0..8c07426e18a8 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -27,9 +27,9 @@ extern void *ip6t_alloc_initial_table(const struct xt_table *); int ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl, const struct nf_hook_ops *ops, struct xt_table **res); -void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table, +void ip6t_unregister_table_pre_exit(struct net *net, const char *name, const struct nf_hook_ops *ops); -void ip6t_unregister_table_exit(struct net *net, struct xt_table *table); +void ip6t_unregister_table_exit(struct net *net, const char *name); extern unsigned int ip6t_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table); -- cgit v1.2.3 From 4d705399191c3cfe1264588b3a4a8115e6c3b161 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Apr 2021 09:51:04 +0200 Subject: netfilter: arptables: unregister the tables by name and again, this time for arptables. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_arp/arp_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 26a13294318c..9ec73dcc8fd6 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -52,8 +52,8 @@ extern void *arpt_alloc_initial_table(const struct xt_table *); int arpt_register_table(struct net *net, const struct xt_table *table, const struct arpt_replace *repl, const struct nf_hook_ops *ops, struct xt_table **res); -void arpt_unregister_table(struct net *net, struct xt_table *table); -void arpt_unregister_table_pre_exit(struct net *net, struct xt_table *table, +void arpt_unregister_table(struct net *net, const char *name); +void arpt_unregister_table_pre_exit(struct net *net, const char *name, const struct nf_hook_ops *ops); extern unsigned int arpt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, -- cgit v1.2.3 From ae689334225ff0e4ef112459ecd24aea932c2b00 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Apr 2021 09:51:07 +0200 Subject: netfilter: ip_tables: pass table pointer via nf_hook_ops iptable_x modules rely on 'struct net' to contain a pointer to the table that should be evaluated. In order to remove these pointers from struct net, pass them via the 'priv' pointer in a similar fashion as nf_tables passes the rule data. To do that, duplicate the nf_hook_info array passed in from the iptable_x modules, update the ops->priv pointers of the copy to refer to the table and then change the hookfn implementations to just pass the 'priv' argument to the traverser. After this patch, the xt_table pointers can already be removed from struct net. However, changes to struct net result in re-compile of the entire network stack, so do the removal after arptables and ip6tables have been converted as well. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 3 +++ include/linux/netfilter_ipv4/ip_tables.h | 6 ++---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index b2eec7de5280..a52cc22f806a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -229,6 +229,9 @@ struct xt_table { /* Man behind the curtain... */ struct xt_table_info *private; + /* hook ops that register the table with the netfilter core */ + struct nf_hook_ops *ops; + /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 73bcf7f261d2..0fdab3246ef5 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -24,11 +24,9 @@ int ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl, - const struct nf_hook_ops *ops, struct xt_table **res); - -void ipt_unregister_table_pre_exit(struct net *net, const char *name, - const struct nf_hook_ops *ops); + const struct nf_hook_ops *ops); +void ipt_unregister_table_pre_exit(struct net *net, const char *name); void ipt_unregister_table_exit(struct net *net, const char *name); /* Standard entry. */ -- cgit v1.2.3 From f9006acc8dfe59e25aa75729728ac57a8d84fc32 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Apr 2021 09:51:08 +0200 Subject: netfilter: arp_tables: pass table pointer via nf_hook_ops Same change as previous patch. Only difference: no need to handle NULL template_ops parameter, the only caller (arptable_filter) always passes non-NULL argument. This removes all remaining accesses to net->ipv4.arptable_filter. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_arp/arp_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 9ec73dcc8fd6..a0474b4e7782 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -51,7 +51,7 @@ struct arpt_error { extern void *arpt_alloc_initial_table(const struct xt_table *); int arpt_register_table(struct net *net, const struct xt_table *table, const struct arpt_replace *repl, - const struct nf_hook_ops *ops, struct xt_table **res); + const struct nf_hook_ops *ops); void arpt_unregister_table(struct net *net, const char *name); void arpt_unregister_table_pre_exit(struct net *net, const char *name, const struct nf_hook_ops *ops); -- cgit v1.2.3 From ee177a54413a33fe474d55fabb5f8ff390bb27d7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Apr 2021 09:51:09 +0200 Subject: netfilter: ip6_tables: pass table pointer via nf_hook_ops Same patch as the ip_tables one: removal of all accesses to ip6_tables xt_table pointers. After this patch the struct net xt_table anchors can be removed. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6/ip6_tables.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 8c07426e18a8..11d0e725fe79 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -26,9 +26,8 @@ extern void *ip6t_alloc_initial_table(const struct xt_table *); int ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl, - const struct nf_hook_ops *ops, struct xt_table **res); -void ip6t_unregister_table_pre_exit(struct net *net, const char *name, - const struct nf_hook_ops *ops); + const struct nf_hook_ops *ops); +void ip6t_unregister_table_pre_exit(struct net *net, const char *name); void ip6t_unregister_table_exit(struct net *net, const char *name); extern unsigned int ip6t_do_table(struct sk_buff *skb, const struct nf_hook_state *state, -- cgit v1.2.3 From 95aafe911db602d19b00d2a88c3d54a84119f5dc Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 25 Apr 2021 02:30:38 +0200 Subject: net: ethernet: ixp4xx: Support device tree probing This adds device tree probing to the IXP4xx ethernet driver. Add a platform data bool to tell us whether to register an MDIO bus for the device or not, as well as the corresponding NPE. We need to drop the memory region request as part of this since the OF core will request the memory for the device. Cc: Zoltan HERPAI Cc: Raylynn Knight Signed-off-by: Linus Walleij Signed-off-by: David S. Miller --- include/linux/platform_data/eth_ixp4xx.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/eth_ixp4xx.h b/include/linux/platform_data/eth_ixp4xx.h index 6f652ea0c6ae..114b0940729f 100644 --- a/include/linux/platform_data/eth_ixp4xx.h +++ b/include/linux/platform_data/eth_ixp4xx.h @@ -14,6 +14,8 @@ struct eth_plat_info { u8 rxq; /* configurable, currently 0 - 31 only */ u8 txreadyq; u8 hwaddr[6]; + u8 npe; /* NPE instance used by this interface */ + bool has_mdio; /* If this instance has an MDIO bus */ }; #endif -- cgit v1.2.3 From a655536571747575fcaac3c93252b0032d878545 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 23 Apr 2021 00:17:09 +0200 Subject: netfilter: nfnetlink: add struct nfnl_info and pass it to callbacks Add a new structure to reduce callback footprint and to facilite extensions of the nfnetlink callback interface in the future. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index d4c14257db5d..1baa3205b199 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -7,11 +7,16 @@ #include #include +struct nfnl_info { + struct net *net; + struct sock *sk; + const struct nlmsghdr *nlh; + struct netlink_ext_ack *extack; +}; + struct nfnl_callback { - int (*call)(struct net *net, struct sock *nl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[], - struct netlink_ext_ack *extack); + int (*call)(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const cda[]); int (*call_rcu)(struct net *net, struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[], -- cgit v1.2.3 From 797d49805ddc6595b2fafe3e9ceff7f562be1f2c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 23 Apr 2021 00:17:10 +0200 Subject: netfilter: nfnetlink: pass struct nfnl_info to rcu callbacks Update rcu callbacks to use the nfnl_info structure. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 1baa3205b199..c11f2f99eac4 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -17,10 +17,8 @@ struct nfnl_info { struct nfnl_callback { int (*call)(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]); - int (*call_rcu)(struct net *net, struct sock *nl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[], - struct netlink_ext_ack *extack); + int (*call_rcu)(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const cda[]); int (*call_batch)(struct net *net, struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[], -- cgit v1.2.3 From 7dab8ee3b6e7ec856a616d07ebb9ebd736c92520 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 23 Apr 2021 00:17:11 +0200 Subject: netfilter: nfnetlink: pass struct nfnl_info to batch callbacks Update batch callbacks to use the nfnl_info structure. Rename one clashing info variable to expr_info. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index c11f2f99eac4..df0e3254c57b 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -19,10 +19,8 @@ struct nfnl_callback { const struct nlattr * const cda[]); int (*call_rcu)(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]); - int (*call_batch)(struct net *net, struct sock *nl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[], - struct netlink_ext_ack *extack); + int (*call_batch)(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const cda[]); const struct nla_policy *policy; /* netlink attribute policy */ const u_int16_t attr_count; /* number of nlattr's */ }; -- cgit v1.2.3 From 50f2db9e368f73ecbbaa92da365183fa953aaba7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 23 Apr 2021 00:17:12 +0200 Subject: netfilter: nfnetlink: consolidate callback types Add enum nfnl_callback_type to identify the callback type to provide one single callback. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index df0e3254c57b..515ce53aa20d 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -14,15 +14,19 @@ struct nfnl_info { struct netlink_ext_ack *extack; }; +enum nfnl_callback_type { + NFNL_CB_UNSPEC = 0, + NFNL_CB_MUTEX, + NFNL_CB_RCU, + NFNL_CB_BATCH, +}; + struct nfnl_callback { int (*call)(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]); - int (*call_rcu)(struct sk_buff *skb, const struct nfnl_info *info, - const struct nlattr * const cda[]); - int (*call_batch)(struct sk_buff *skb, const struct nfnl_info *info, - const struct nlattr * const cda[]); - const struct nla_policy *policy; /* netlink attribute policy */ - const u_int16_t attr_count; /* number of nlattr's */ + const struct nla_policy *policy; + enum nfnl_callback_type type; + __u16 attr_count; }; enum nfnl_abort_action { -- cgit v1.2.3 From 47a6959fa331fe892a4fc3b48ca08e92045c6bda Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 26 Apr 2021 12:14:40 +0200 Subject: netfilter: allow to turn off xtables compat layer The compat layer needs to parse untrusted input (the ruleset) to translate it to a 64bit compatible format. We had a number of bugs in this department in the past, so allow users to turn this feature off. Add CONFIG_NETFILTER_XTABLES_COMPAT kconfig knob and make it default to y to keep existing behaviour. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 12 ++++++------ include/linux/netfilter_arp/arp_tables.h | 2 +- include/linux/netfilter_ipv4/ip_tables.h | 2 +- include/linux/netfilter_ipv6/ip6_tables.h | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index a52cc22f806a..07c6ad8f2a02 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -158,7 +158,7 @@ struct xt_match { /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_mtdtor_param *); -#ifdef CONFIG_COMPAT +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, const void *src); int (*compat_to_user)(void __user *dst, const void *src); @@ -169,7 +169,7 @@ struct xt_match { const char *table; unsigned int matchsize; unsigned int usersize; -#ifdef CONFIG_COMPAT +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT unsigned int compatsize; #endif unsigned int hooks; @@ -199,7 +199,7 @@ struct xt_target { /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_tgdtor_param *); -#ifdef CONFIG_COMPAT +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, const void *src); int (*compat_to_user)(void __user *dst, const void *src); @@ -210,7 +210,7 @@ struct xt_target { const char *table; unsigned int targetsize; unsigned int usersize; -#ifdef CONFIG_COMPAT +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT unsigned int compatsize; #endif unsigned int hooks; @@ -452,7 +452,7 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); -#ifdef CONFIG_COMPAT +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT #include struct compat_xt_entry_match { @@ -533,5 +533,5 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); -#endif /* CONFIG_COMPAT */ +#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */ #endif /* _X_TABLES_H */ diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index a0474b4e7782..2aab9612f6ab 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -59,7 +59,7 @@ extern unsigned int arpt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table); -#ifdef CONFIG_COMPAT +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT #include struct compat_arpt_entry { diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 0fdab3246ef5..8d09bfe850dc 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -67,7 +67,7 @@ extern unsigned int ipt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table); -#ifdef CONFIG_COMPAT +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT #include struct compat_ipt_entry { diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 11d0e725fe79..79e73fd7d965 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -33,7 +33,7 @@ extern unsigned int ip6t_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table); -#ifdef CONFIG_COMPAT +#ifdef CONFIG_NETFILTER_XTABLES_COMPAT #include struct compat_ip6t_entry { -- cgit v1.2.3 From 63fa73e2151848ed5930dfe0040c823ffe1f2cc4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 26 Apr 2021 09:44:47 -0300 Subject: net: Fix typo in comment about ancillary data Ingo sent typo fixes for tools/ and this resulted in a warning when building the perf/core branch that will be sent upstream in the next merge window: Warning: Kernel ABI header at 'tools/perf/trace/beauty/include/linux/socket.h' differs from latest version at 'include/linux/socket.h' diff -u tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h Fix the typo on the kernel file to address this. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/socket.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 385894b4a8bb..b8fc5c53ba6f 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -85,7 +85,7 @@ struct mmsghdr { /* * POSIX 1003.1g - ancillary data object information - * Ancillary data consits of a sequence of pairs of + * Ancillary data consists of a sequence of pairs of * (cmsghdr, cmsg_data[]) */ -- cgit v1.2.3 From 3d14ec1fe61aebe3da85a9b8f2c3d61e43d522e6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 25 Apr 2021 22:02:38 +0100 Subject: iov_iter: Four fixes for ITER_XARRAY Fix four things[1] in the patch that adds ITER_XARRAY[2]: (1) Remove the address_space struct predeclaration. This is a holdover from when it was ITER_MAPPING. (2) Fix _copy_mc_to_iter() so that the xarray segment updates count and iov_offset in the iterator before returning. (3) Fix iov_iter_alignment() to not loop in the xarray case. Because the middle pages are all whole pages, only the end pages need be considered - and this can be reduced to just looking at the start position in the xarray and the iteration size. (4) Fix iov_iter_advance() to limit the size of the advance to no more than the remaining iteration size. Reported-by: Al Viro Signed-off-by: David Howells Reviewed-by: Al Viro Tested-by: Jeff Layton Tested-by: Dave Wysochanski Link: https://lore.kernel.org/r/YIVrJT8GwLI0Wlgx@zeniv-ca.linux.org.uk [1] Link: https://lore.kernel.org/r/161918448151.3145707.11541538916600921083.stgit@warthog.procyon.org.uk [2] --- include/linux/uio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 5f5ffc45d4aa..d3ec87706d75 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -10,7 +10,6 @@ #include struct page; -struct address_space; struct pipe_inode_info; struct kvec { -- cgit v1.2.3 From c4b364ce1270d689ee5010001344b8eae3685f32 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 27 Apr 2021 12:22:00 +0800 Subject: net: dsa: free skb->cb usage in core driver Free skb->cb usage in core driver and let device drivers decide to use or not. The reason having a DSA_SKB_CB(skb)->clone was because dsa_skb_tx_timestamp() which may set the clone pointer was called before p->xmit() which would use the clone if any, and the device driver has no way to initialize the clone pointer. This patch just put memset(skb->cb, 0, sizeof(skb->cb)) at beginning of dsa_slave_xmit(). Some new features in the future, like one-step timestamp may need more bytes of skb->cb to use in dsa_skb_tx_timestamp(), and p->xmit(). Signed-off-by: Yangbo Lu Acked-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/dsa/sja1105.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index dd93735ae228..1eb84562b311 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -47,11 +47,12 @@ struct sja1105_tagger_data { }; struct sja1105_skb_cb { + struct sk_buff *clone; u32 meta_tstamp; }; #define SJA1105_SKB_CB(skb) \ - ((struct sja1105_skb_cb *)DSA_SKB_CB_PRIV(skb)) + ((struct sja1105_skb_cb *)((skb)->cb)) struct sja1105_port { u16 subvlan_map[DSA_8021Q_N_SUBVLAN]; -- cgit v1.2.3 From 800fcab8230f622544a12403977b5b7259a076f8 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 27 Apr 2021 09:09:07 +0200 Subject: net: phy: Add support for microchip SMI0 MDIO bus SMI0 is a mangled version of MDIO. The main low level difference is the MDIO C22 OP code is always 0, not 0x2 or 0x1 for Read/Write. The read/write information is instead encoded in the PHY address. Extend the bit-bang code to allow the op code to be overridden, but default to normal C22 values. Add an extra compatible to the mdio-gpio driver, and when this compatible is present, set the op codes to 0. A higher level driver, sitting on top of the basic MDIO bus driver can then implement the rest of the microchip SMI0 odderties. Signed-off-by: Andrew Lunn Signed-off-by: Michael Grzeschik Signed-off-by: Oleksij Rempel Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mdio-bitbang.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdio-bitbang.h b/include/linux/mdio-bitbang.h index aca4dc037b70..373630fe5c28 100644 --- a/include/linux/mdio-bitbang.h +++ b/include/linux/mdio-bitbang.h @@ -33,6 +33,9 @@ struct mdiobb_ops { struct mdiobb_ctrl { const struct mdiobb_ops *ops; + unsigned int override_op_c22; + u8 op_c22_read; + u8 op_c22_write; }; int mdiobb_read(struct mii_bus *bus, int phy, int reg); -- cgit v1.2.3 From 76d6a13383b8e3ff20a9cf52aa9c3de39e485632 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Tue, 27 Apr 2021 19:43:12 +0200 Subject: seq_file: Add a seq_bprintf function Similarly to seq_buf_bprintf in lib/seq_buf.c, this function writes a printf formatted string with arguments provided in a "binary representation" built by functions such as vbin_printf. Signed-off-by: Florent Revest Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210427174313.860948-2-revest@chromium.org --- include/linux/seq_file.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index b83b3ae3c877..723b1fa1177e 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -146,6 +146,10 @@ void *__seq_open_private(struct file *, const struct seq_operations *, int); int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); +#ifdef CONFIG_BINARY_PRINTF +void seq_bprintf(struct seq_file *m, const char *f, const u32 *binary); +#endif + #define DEFINE_SEQ_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ -- cgit v1.2.3 From 527139d738d7f2e9f929c752eebf3cbf0f74c754 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Fri, 16 Apr 2021 20:58:38 +0000 Subject: PCI/sysfs: Convert "rom" to static attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "rom" sysfs attribute allows access to the PCI Option ROM. Previously it was dynamically created either by pci_bus_add_device() or the pci_sysfs_init() initcall, but since it doesn't need to be created or removed dynamically, we can use a static attribute so the device model takes care of addition and removal automatically. Convert "rom" to a static attribute and use the .is_bin_visible() callback to set the correct object size based on the ROM size. Remove "rom_attr" from the struct pci_dev since it is no longer needed. This attribute was added in the pre-git era by https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git/commit/drivers/pci/pci-sysfs.c?id=f6d553444da2 [bhelgaas: commit log] Suggested-by: Oliver O'Halloran Link: https://lore.kernel.org/r/20210416205856.3234481-3-kw@linux.com Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 86c799c97b77..45f1fef80b50 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -458,7 +458,6 @@ struct pci_dev { u32 saved_config_space[16]; /* Config space saved at suspend time */ struct hlist_head saved_cap_space; - struct bin_attribute *rom_attr; /* Attribute descriptor for sysfs ROM entry */ int rom_attr_enabled; /* Display of ROM attribute enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */ -- cgit v1.2.3 From 48cac3f4a96ddf08df8e53809ed066de0dc93915 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Tue, 27 Apr 2021 19:43:13 +0200 Subject: bpf: Implement formatted output helpers with bstr_printf BPF has three formatted output helpers: bpf_trace_printk, bpf_seq_printf and bpf_snprintf. Their signatures specify that all arguments are provided from the BPF world as u64s (in an array or as registers). All of these helpers are currently implemented by calling functions such as snprintf() whose signatures take a variable number of arguments, then placed in a va_list by the compiler to call vsnprintf(). "d9c9e4db bpf: Factorize bpf_trace_printk and bpf_seq_printf" introduced a bpf_printf_prepare function that fills an array of u64 sanitized arguments with an array of "modifiers" which indicate what the "real" size of each argument should be (given by the format specifier). The BPF_CAST_FMT_ARG macro consumes these arrays and casts each argument to its real size. However, the C promotion rules implicitely cast them all back to u64s. Therefore, the arguments given to snprintf are u64s and the va_list constructed by the compiler will use 64 bits for each argument. On 64 bit machines, this happens to work well because 32 bit arguments in va_lists need to occupy 64 bits anyway, but on 32 bit architectures this breaks the layout of the va_list expected by the called function and mangles values. In "88a5c690b6 bpf: fix bpf_trace_printk on 32 bit archs", this problem had been solved for bpf_trace_printk only with a "horrid workaround" that emitted multiple calls to trace_printk where each call had different argument types and generated different va_list layouts. One of the call would be dynamically chosen at runtime. This was ok with the 3 arguments that bpf_trace_printk takes but bpf_seq_printf and bpf_snprintf accept up to 12 arguments. Because this approach scales code exponentially, it is not a viable option anymore. Because the promotion rules are part of the language and because the construction of a va_list is an arch-specific ABI, it's best to just avoid variadic arguments and va_lists altogether. Thankfully the kernel's snprintf() has an alternative in the form of bstr_printf() that accepts arguments in a "binary buffer representation". These binary buffers are currently created by vbin_printf and used in the tracing subsystem to split the cost of printing into two parts: a fast one that only dereferences and remembers values, and a slower one, called later, that does the pretty-printing. This patch refactors bpf_printf_prepare to construct binary buffers of arguments consumable by bstr_printf() instead of arrays of arguments and modifiers. This gets rid of BPF_CAST_FMT_ARG and greatly simplifies the bpf_printf_prepare usage but there are a few gotchas that change how bpf_printf_prepare needs to do things. Currently, bpf_printf_prepare uses a per cpu temporary buffer as a generic storage for strings and IP addresses. With this refactoring, the temporary buffers now holds all the arguments in a structured binary format. To comply with the format expected by bstr_printf, certain format specifiers also need to be pre-formatted: %pB and %pi6/%pi4/%pI4/%pI6. Because vsnprintf subroutines for these specifiers are hard to expose, we pre-format these arguments with calls to snprintf(). Reported-by: Rasmus Villemoes Signed-off-by: Florent Revest Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210427174313.860948-3-revest@chromium.org --- include/linux/bpf.h | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ad4bcf1cadbb..b33f199c4cc2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2081,24 +2081,8 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); -enum bpf_printf_mod_type { - BPF_PRINTF_INT, - BPF_PRINTF_LONG, - BPF_PRINTF_LONG_LONG, -}; - -/* Workaround for getting va_list handling working with different argument type - * combinations generically for 32 and 64 bit archs. - */ -#define BPF_CAST_FMT_ARG(arg_nb, args, mod) \ - (mod[arg_nb] == BPF_PRINTF_LONG_LONG || \ - (mod[arg_nb] == BPF_PRINTF_LONG && __BITS_PER_LONG == 64) \ - ? (u64)args[arg_nb] \ - : (u32)args[arg_nb]) - -int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, - u64 *final_args, enum bpf_printf_mod_type *mod, - u32 num_args); -void bpf_printf_cleanup(void); +int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, + u32 **bin_buf, u32 num_args); +void bpf_bprintf_cleanup(void); #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From d991bb1c8da842a2a0b9dc83b1005e655783f861 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Thu, 29 Apr 2021 22:53:50 -0700 Subject: include/linux/compiler-gcc.h: sparse can do constant folding of __builtin_bswap*() Sparse can do constant folding of __builtin_bswap*() since 2017. Also, a much recent version of Sparse is needed anyway, see commit 6ec4476ac825 ("Raise gcc version requirement to 4.9"). So, remove the comment about sparse not being yet able to constant fold __builtin_bswap*() and remove the corresponding test of __CHECKER__. Link: https://lkml.kernel.org/r/20210226092236.99369-1-luc.vanoostenryck@gmail.com Signed-off-by: Luc Van Oostenryck Acked-by: Arnd Bergmann Acked-by: Miguel Ojeda Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 48750243db4c..5d97ef738a57 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -90,15 +90,11 @@ */ #define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) -/* - * sparse (__CHECKER__) pretends to be gcc, but can't do constant - * folding in __builtin_bswap*() (yet), so don't set these for it. - */ -#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) && !defined(__CHECKER__) +#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ #define __HAVE_BUILTIN_BSWAP16__ -#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP && !__CHECKER__ */ +#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ #if GCC_VERSION >= 70000 #define KASAN_ABI_VERSION 5 -- cgit v1.2.3 From 8e9b16c47680f6e7d6e5864a37f313f905a91cf5 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Thu, 29 Apr 2021 22:55:08 -0700 Subject: mm: page_owner: detect page_owner recursion via task_struct Before the change page_owner recursion was detected via fetching backtrace and inspecting it for current instruction pointer. It has a few problems: - it is slightly slow as it requires extra backtrace and a linear stack scan of the result - it is too late to check if backtrace fetching required memory allocation itself (ia64's unwinder requires it). To simplify recursion tracking let's use page_owner recursion flag in 'struct task_struct'. The change make page_owner=on work on ia64 by avoiding infinite recursion in: kmalloc() -> __set_page_owner() -> save_stack() -> unwind() [ia64-specific] -> build_script() -> kmalloc() -> __set_page_owner() [we short-circuit here] -> save_stack() -> unwind() [recursion] Link: https://lkml.kernel.org/r/20210402115342.1463781-1-slyfox@gentoo.org Signed-off-by: Sergei Trofimovich Reviewed-by: Andrew Morton Acked-by: Vlastimil Babka Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Juri Lelli Cc: Vincent Guittot Cc: Dietmar Eggemann Cc: Steven Rostedt Cc: Ben Segall Cc: Mel Gorman Cc: Daniel Bristot de Oliveira Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d7d07aa0facf..9c25c8e67030 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -841,6 +841,10 @@ struct task_struct { /* Stalled due to lack of memory */ unsigned in_memstall:1; #endif +#ifdef CONFIG_PAGE_OWNER + /* Used by page_owner=on to detect recursion in page tracking. */ + unsigned in_page_owner:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ -- cgit v1.2.3 From 63135aa3866db99fd923b716c5ff2e468879624a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 29 Apr 2021 22:55:18 -0700 Subject: mm: provide filemap_range_needs_writeback() helper Patch series "Improve IOCB_NOWAIT O_DIRECT reads", v3. An internal workload complained because it was using too much CPU, and when I took a look, we had a lot of io_uring workers going to town. For an async buffered read like workload, I am normally expecting _zero_ offloads to a worker thread, but this one had tons of them. I'd drop caches and things would look good again, but then a minute later we'd regress back to using workers. Turns out that every minute something was reading parts of the device, which would add page cache for that inode. I put patches like these in for our kernel, and the problem was solved. Don't -EAGAIN IOCB_NOWAIT dio reads just because we have page cache entries for the given range. This causes unnecessary work from the callers side, when the IO could have been issued totally fine without blocking on writeback when there is none. This patch (of 3): For O_DIRECT reads/writes, we check if we need to issue a call to filemap_write_and_wait_range() to issue and/or wait for writeback for any page in the given range. The existing mechanism just checks for a page in the range, which is suboptimal for IOCB_NOWAIT as we'll fallback to the slow path (and needing retry) if there's just a clean page cache page in the range. Provide filemap_range_needs_writeback() which tries a little harder to check if we actually need to issue and/or wait for writeback in the range. Link: https://lkml.kernel.org/r/20210224164455.1096727-1-axboe@kernel.dk Link: https://lkml.kernel.org/r/20210224164455.1096727-2-axboe@kernel.dk Signed-off-by: Jens Axboe Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index bf4e90d3ab18..12766edee81f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2878,6 +2878,8 @@ static inline int filemap_fdatawait(struct address_space *mapping) extern bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend); +extern bool filemap_range_needs_writeback(struct address_space *, + loff_t lstart, loff_t lend); extern int filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend); extern int __filemap_fdatawrite_range(struct address_space *mapping, -- cgit v1.2.3 From 1c824a680b1b67ad43c0908f11a70bcf37af56d5 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 29 Apr 2021 22:55:32 -0700 Subject: mm: page-writeback: simplify memcg handling in test_clear_page_writeback() Page writeback doesn't hold a page reference, which allows truncate to free a page the second PageWriteback is cleared. This used to require special attention in test_clear_page_writeback(), where we had to be careful not to rely on the unstable page->memcg binding and look up all the necessary information before clearing the writeback flag. Since commit 073861ed77b6 ("mm: fix VM_BUG_ON(PageTail) and BUG_ON(PageWriteback)") test_clear_page_writeback() is called with an explicit reference on the page, and this dance is no longer needed. Use unlock_page_memcg() and dec_lruvec_page_state() directly. This removes the last user of the lock_page_memcg() return value, change it to void. Touch up the comments in there as well. This also removes the last extern user of __unlock_page_memcg(), make it static. Further, it removes the last user of dec_lruvec_state(), delete it, along with a few other unused helpers. Link: https://lkml.kernel.org/r/YCQbYAWg4nvBFL6h@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Hugh Dickins Reviewed-by: Shakeel Butt Acked-by: Michal Hocko Cc: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 10 ++-------- include/linux/vmstat.h | 24 +++--------------------- 2 files changed, 5 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0c04d39a7967..ae448d955a87 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -867,8 +867,7 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); extern bool cgroup_memory_noswap; #endif -struct mem_cgroup *lock_page_memcg(struct page *page); -void __unlock_page_memcg(struct mem_cgroup *memcg); +void lock_page_memcg(struct page *page); void unlock_page_memcg(struct page *page); /* @@ -1289,12 +1288,7 @@ mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) { } -static inline struct mem_cgroup *lock_page_memcg(struct page *page) -{ - return NULL; -} - -static inline void __unlock_page_memcg(struct mem_cgroup *memcg) +static inline void lock_page_memcg(struct page *page) { } diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 506d625163a1..3299cd69e4ca 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -512,16 +512,10 @@ static inline void mod_lruvec_page_state(struct page *page, #endif /* CONFIG_MEMCG */ -static inline void __inc_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - __mod_lruvec_state(lruvec, idx, 1); -} - -static inline void __dec_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) +static inline void inc_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) { - __mod_lruvec_state(lruvec, idx, -1); + mod_lruvec_state(lruvec, idx, 1); } static inline void __inc_lruvec_page_state(struct page *page, @@ -536,18 +530,6 @@ static inline void __dec_lruvec_page_state(struct page *page, __mod_lruvec_page_state(page, idx, -1); } -static inline void inc_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - mod_lruvec_state(lruvec, idx, 1); -} - -static inline void dec_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - mod_lruvec_state(lruvec, idx, -1); -} - static inline void inc_lruvec_page_state(struct page *page, enum node_stat_item idx) { -- cgit v1.2.3 From 842ca547f706b1e05ccf3026a0ab15d24772a188 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 22:55:35 -0700 Subject: mm: move page_mapping_file to pagemap.h page_mapping_file() is only used by some architectures, and then it is usually only used in one place. Make it a static inline function so other architectures don't have to carry this dead code. Link: https://lkml.kernel.org/r/20210317123011.350118-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Acked-by: Mike Rapoport Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - include/linux/pagemap.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 21115933b9b8..2e5c207e702c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1629,7 +1629,6 @@ static inline pgoff_t page_index(struct page *page) bool page_mapped(struct page *page); struct address_space *page_mapping(struct page *page); -struct address_space *page_mapping_file(struct page *page); /* * Return true only if the page has been allocated with diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4686f9ab0636..469fa7ffcf96 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -157,6 +157,16 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping) void release_pages(struct page **pages, int nr); +/* + * For file cache pages, return the address_space, otherwise return NULL + */ +static inline struct address_space *page_mapping_file(struct page *page) +{ + if (unlikely(PageSwapCache(page))) + return NULL; + return page_mapping(page); +} + /* * speculatively take a reference to a page. * If the page is free (_refcount == 0), then _refcount is untouched, and 0 -- cgit v1.2.3 From 458a4f788f8602e5701b3d8c2fb6b021310a7301 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 29 Apr 2021 22:55:50 -0700 Subject: mm/gup: add a range variant of unpin_user_pages_dirty_lock() Add an unpin_user_page_range_dirty_lock() API which takes a starting page and how many consecutive pages we want to unpin and optionally dirty. To that end, define another iterator for_each_compound_range() that operates in page ranges as opposed to page array. For users (like RDMA mr_dereg) where each sg represents a contiguous set of pages, we're able to more efficiently unpin pages without having to supply an array of pages much of what happens today with unpin_user_pages(). Link: https://lkml.kernel.org/r/20210212130843.13865-4-joao.m.martins@oracle.com Suggested-by: Jason Gunthorpe Signed-off-by: Joao Martins Reviewed-by: Jason Gunthorpe Reviewed-by: John Hubbard Cc: Christoph Hellwig Cc: Doug Ledford Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e5c207e702c..702c2a7379d6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1265,6 +1265,8 @@ static inline void put_page(struct page *page) void unpin_user_page(struct page *page); void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, bool make_dirty); +void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, + bool make_dirty); void unpin_user_pages(struct page **pages, unsigned long npages); /** -- cgit v1.2.3 From 4066c119483af8e86a75447fd35be1d2553d370f Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 29 Apr 2021 22:55:56 -0700 Subject: mm: gup: remove FOLL_SPLIT Since commit 5a52c9df62b4 ("uprobe: use FOLL_SPLIT_PMD instead of FOLL_SPLIT") and commit ba925fa35057 ("s390/gmap: improve THP splitting") FOLL_SPLIT has not been used anymore. Remove the dead code. Link: https://lkml.kernel.org/r/20210330203900.9222-1-shy828301@gmail.com Signed-off-by: Yang Shi Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 702c2a7379d6..64be3baf861a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2791,7 +2791,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO * and return without waiting upon it */ #define FOLL_POPULATE 0x40 /* fault in page */ -#define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ -- cgit v1.2.3 From a3747b53b1771a787fea71d86a2fc39aea337685 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 29 Apr 2021 22:56:14 -0700 Subject: mm: memcontrol: kill mem_cgroup_nodeinfo() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to encapsulate a simple struct member access. Link: https://lkml.kernel.org/r/20210209163304.77088-3-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Shakeel Butt Reviewed-by: Roman Gushchin Acked-by: Michal Hocko Reviewed-by: Michal Koutný Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ae448d955a87..149dc2fd97a6 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -602,12 +602,6 @@ void mem_cgroup_uncharge_list(struct list_head *page_list); void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); -static struct mem_cgroup_per_node * -mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid) -{ - return memcg->nodeinfo[nid]; -} - /** * mem_cgroup_lruvec - get the lru list vector for a memcg & node * @memcg: memcg of the wanted lruvec @@ -631,7 +625,7 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, if (!memcg) memcg = root_mem_cgroup; - mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id); + mz = memcg->nodeinfo[pgdat->node_id]; lruvec = &mz->lruvec; out: /* -- cgit v1.2.3 From a18e6e6e150a98b9ce3e9acabeff407e7b6ba0c0 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 29 Apr 2021 22:56:17 -0700 Subject: mm: memcontrol: privatize memcg_page_state query functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no users outside of the memory controller itself. The rest of the kernel cares either about node or lruvec stats. Link: https://lkml.kernel.org/r/20210209163304.77088-4-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Shakeel Butt Reviewed-by: Roman Gushchin Acked-by: Michal Hocko Reviewed-by: Michal Koutný Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 44 -------------------------------------------- 1 file changed, 44 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 149dc2fd97a6..9a02aabd0000 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -864,39 +864,6 @@ extern bool cgroup_memory_noswap; void lock_page_memcg(struct page *page); void unlock_page_memcg(struct page *page); -/* - * idx can be of type enum memcg_stat_item or node_stat_item. - * Keep in sync with memcg_exact_page_state(). - */ -static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) -{ - long x = atomic_long_read(&memcg->vmstats[idx]); -#ifdef CONFIG_SMP - if (x < 0) - x = 0; -#endif - return x; -} - -/* - * idx can be of type enum memcg_stat_item or node_stat_item. - * Keep in sync with memcg_exact_page_state(). - */ -static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg, - int idx) -{ - long x = 0; - int cpu; - - for_each_possible_cpu(cpu) - x += per_cpu(memcg->vmstats_local->stat[idx], cpu); -#ifdef CONFIG_SMP - if (x < 0) - x = 0; -#endif - return x; -} - void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val); /* idx can be of type enum memcg_stat_item or node_stat_item */ @@ -1322,17 +1289,6 @@ static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) { } -static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) -{ - return 0; -} - -static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg, - int idx) -{ - return 0; -} - static inline void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int nr) -- cgit v1.2.3 From 2d146aa3aa842d7f5065802556b4f9a2c6e8ef12 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 29 Apr 2021 22:56:26 -0700 Subject: mm: memcontrol: switch to rstat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the memory controller's custom hierarchical stats code with the generic rstat infrastructure provided by the cgroup core. The current implementation does batched upward propagation from the write side (i.e. as stats change). The per-cpu batches introduce an error, which is multiplied by the number of subgroups in a tree. In systems with many CPUs and sizable cgroup trees, the error can be large enough to confuse users (e.g. 32 batch pages * 32 CPUs * 32 subgroups results in an error of up to 128M per stat item). This can entirely swallow allocation bursts inside a workload that the user is expecting to see reflected in the statistics. In the past, we've done read-side aggregation, where a memory.stat read would have to walk the entire subtree and add up per-cpu counts. This became problematic with lazily-freed cgroups: we could have large subtrees where most cgroups were entirely idle. Hence the switch to change-driven upward propagation. Unfortunately, it needed to trade accuracy for speed due to the write side being so hot. Rstat combines the best of both worlds: from the write side, it cheaply maintains a queue of cgroups that have pending changes, so that the read side can do selective tree aggregation. This way the reported stats will always be precise and recent as can be, while the aggregation can skip over potentially large numbers of idle cgroups. The way rstat works is that it implements a tree for tracking cgroups with pending local changes, as well as a flush function that walks the tree upwards. The controller then drives this by 1) telling rstat when a local cgroup stat changes (e.g. mod_memcg_state) and 2) when a flush is required to get uptodate hierarchy stats for a given subtree (e.g. when memory.stat is read). The controller also provides a flush callback that is called during the rstat flush walk for each cgroup and aggregates its local per-cpu counters and propagates them upwards. This adds a second vmstats to struct mem_cgroup (MEMCG_NR_STAT + NR_VM_EVENT_ITEMS) to track pending subtree deltas during upward aggregation. It removes 3 words from the per-cpu data. It eliminates memcg_exact_page_state(), since memcg_page_state() is now exact. [akpm@linux-foundation.org: merge fix] [hannes@cmpxchg.org: fix a sleep in atomic section problem] Link: https://lkml.kernel.org/r/20210315234100.64307-1-hannes@cmpxchg.org Link: https://lkml.kernel.org/r/20210209163304.77088-7-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Roman Gushchin Acked-by: Michal Hocko Reviewed-by: Shakeel Butt Reviewed-by: Michal Koutný Acked-by: Balbir Singh Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 67 +++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9a02aabd0000..74910ce9a3f9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -76,10 +76,27 @@ enum mem_cgroup_events_target { }; struct memcg_vmstats_percpu { - long stat[MEMCG_NR_STAT]; - unsigned long events[NR_VM_EVENT_ITEMS]; - unsigned long nr_page_events; - unsigned long targets[MEM_CGROUP_NTARGETS]; + /* Local (CPU and cgroup) page state & events */ + long state[MEMCG_NR_STAT]; + unsigned long events[NR_VM_EVENT_ITEMS]; + + /* Delta calculation for lockless upward propagation */ + long state_prev[MEMCG_NR_STAT]; + unsigned long events_prev[NR_VM_EVENT_ITEMS]; + + /* Cgroup1: threshold notifications & softlimit tree updates */ + unsigned long nr_page_events; + unsigned long targets[MEM_CGROUP_NTARGETS]; +}; + +struct memcg_vmstats { + /* Aggregated (CPU and subtree) page state & events */ + long state[MEMCG_NR_STAT]; + unsigned long events[NR_VM_EVENT_ITEMS]; + + /* Pending child counts during tree propagation */ + long state_pending[MEMCG_NR_STAT]; + unsigned long events_pending[NR_VM_EVENT_ITEMS]; }; struct mem_cgroup_reclaim_iter { @@ -287,8 +304,8 @@ struct mem_cgroup { MEMCG_PADDING(_pad1_); - atomic_long_t vmstats[MEMCG_NR_STAT]; - atomic_long_t vmevents[NR_VM_EVENT_ITEMS]; + /* memory.stat */ + struct memcg_vmstats vmstats; /* memory.events */ atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; @@ -315,10 +332,6 @@ struct mem_cgroup { atomic_t moving_account; struct task_struct *move_lock_task; - /* Legacy local VM stats and events */ - struct memcg_vmstats_percpu __percpu *vmstats_local; - - /* Subtree VM stats and events (batched updates) */ struct memcg_vmstats_percpu __percpu *vmstats_percpu; #ifdef CONFIG_CGROUP_WRITEBACK @@ -939,10 +952,6 @@ static inline void mod_memcg_lruvec_state(struct lruvec *lruvec, local_irq_restore(flags); } -unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, - gfp_t gfp_mask, - unsigned long *total_scanned); - void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, unsigned long count); @@ -1023,6 +1032,10 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, void split_page_memcg(struct page *head, unsigned int nr); +unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, + gfp_t gfp_mask, + unsigned long *total_scanned); + #else /* CONFIG_MEMCG */ #define MEM_CGROUP_ID_SHIFT 0 @@ -1131,6 +1144,10 @@ static inline bool lruvec_holds_page_lru_lock(struct page *page, return lruvec == &pgdat->__lruvec; } +static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +{ +} + static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) { return NULL; @@ -1334,18 +1351,6 @@ static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, mod_node_page_state(page_pgdat(page), idx, val); } -static inline -unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, - gfp_t gfp_mask, - unsigned long *total_scanned) -{ - return 0; -} - -static inline void split_page_memcg(struct page *head, unsigned int nr) -{ -} - static inline void count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, unsigned long count) @@ -1368,8 +1373,16 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } -static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +static inline void split_page_memcg(struct page *head, unsigned int nr) +{ +} + +static inline +unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, + gfp_t gfp_mask, + unsigned long *total_scanned) { + return 0; } #endif /* CONFIG_MEMCG */ -- cgit v1.2.3 From 0add0c77a9bd0ce7cd3b53894fb08154881402a4 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 29 Apr 2021 22:56:36 -0700 Subject: memcg: charge before adding to swapcache on swapin Currently the kernel adds the page, allocated for swapin, to the swapcache before charging the page. This is fine but now we want a per-memcg swapcache stat which is essential for folks who wants to transparently migrate from cgroup v1's memsw to cgroup v2's memory and swap counters. In addition charging a page before exposing it to other parts of the kernel is a step in the right direction. To correctly maintain the per-memcg swapcache stat, this patch has adopted to charge the page before adding it to swapcache. One challenge in this option is the failure case of add_to_swap_cache() on which we need to undo the mem_cgroup_charge(). Specifically undoing mem_cgroup_uncharge_swap() is not simple. To resolve the issue, this patch decouples the charging for swapin pages from mem_cgroup_charge(). Two new functions are introduced, mem_cgroup_swapin_charge_page() for just charging the swapin page and mem_cgroup_swapin_uncharge_swap() for uncharging the swap slot once the page has been successfully added to the swapcache. [shakeelb@google.com: set page->private before calling swap_readpage] Link: https://lkml.kernel.org/r/20210318015959.2986837-1-shakeelb@google.com Link: https://lkml.kernel.org/r/20210305212639.775498-1-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Roman Gushchin Acked-by: Johannes Weiner Acked-by: Hugh Dickins Tested-by: Heiko Carstens Cc: Michal Hocko Cc: Stephen Rothwell Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 74910ce9a3f9..e946c96daa32 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -609,6 +609,9 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) } int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); +int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, + gfp_t gfp, swp_entry_t entry); +void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); void mem_cgroup_uncharge(struct page *page); void mem_cgroup_uncharge_list(struct list_head *page_list); @@ -1112,6 +1115,16 @@ static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, return 0; } +static inline int mem_cgroup_swapin_charge_page(struct page *page, + struct mm_struct *mm, gfp_t gfp, swp_entry_t entry) +{ + return 0; +} + +static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry) +{ +} + static inline void mem_cgroup_uncharge(struct page *page) { } -- cgit v1.2.3 From b4e0b68fbd9d1fd7e31cbe8adca3ad6cf556e2ee Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 29 Apr 2021 22:56:52 -0700 Subject: mm: memcontrol: use obj_cgroup APIs to charge kmem pages Since Roman's series "The new cgroup slab memory controller" applied. All slab objects are charged via the new APIs of obj_cgroup. The new APIs introduce a struct obj_cgroup to charge slab objects. It prevents long-living objects from pinning the original memory cgroup in the memory. But there are still some corner objects (e.g. allocations larger than order-1 page on SLUB) which are not charged via the new APIs. Those objects (include the pages which are allocated from buddy allocator directly) are charged as kmem pages which still hold a reference to the memory cgroup. We want to reuse the obj_cgroup APIs to charge the kmem pages. If we do that, we should store an object cgroup pointer to page->memcg_data for the kmem pages. Finally, page->memcg_data will have 3 different meanings. 1) For the slab pages, page->memcg_data points to an object cgroups vector. 2) For the kmem pages (exclude the slab pages), page->memcg_data points to an object cgroup. 3) For the user pages (e.g. the LRU pages), page->memcg_data points to a memory cgroup. We do not change the behavior of page_memcg() and page_memcg_rcu(). They are also suitable for LRU pages and kmem pages. Why? Because memory allocations pinning memcgs for a long time - it exists at a larger scale and is causing recurring problems in the real world: page cache doesn't get reclaimed for a long time, or is used by the second, third, fourth, ... instance of the same job that was restarted into a new cgroup every time. Unreclaimable dying cgroups pile up, waste memory, and make page reclaim very inefficient. We can convert LRU pages and most other raw memcg pins to the objcg direction to fix this problem, and then the page->memcg will always point to an object cgroup pointer. At that time, LRU pages and kmem pages will be treated the same. The implementation of page_memcg() will remove the kmem page check. This patch aims to charge the kmem pages by using the new APIs of obj_cgroup. Finally, the page->memcg_data of the kmem page points to an object cgroup. We can use the __page_objcg() to get the object cgroup associated with a kmem page. Or we can use page_memcg() to get the memory cgroup associated with a kmem page, but caller must ensure that the returned memcg won't be released (e.g. acquire the rcu_read_lock or css_set_lock). Link: https://lkml.kernel.org/r/20210401030141.37061-1-songmuchun@bytedance.com Link: https://lkml.kernel.org/r/20210319163821.20704-6-songmuchun@bytedance.com Signed-off-by: Muchun Song Acked-by: Johannes Weiner Reviewed-by: Shakeel Butt Acked-by: Roman Gushchin Reviewed-by: Miaohe Lin Cc: Michal Hocko Cc: Vladimir Davydov Cc: Xiongchun Duan Cc: Christian Borntraeger [songmuchun@bytedance.com: fix forget to obtain the ref to objcg in split_page_memcg] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 120 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 97 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e946c96daa32..78ca34c935ab 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -371,6 +371,62 @@ enum page_memcg_data_flags { #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) +static inline bool PageMemcgKmem(struct page *page); + +/* + * After the initialization objcg->memcg is always pointing at + * a valid memcg, but can be atomically swapped to the parent memcg. + * + * The caller must ensure that the returned memcg won't be released: + * e.g. acquire the rcu_read_lock or css_set_lock. + */ +static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg) +{ + return READ_ONCE(objcg->memcg); +} + +/* + * __page_memcg - get the memory cgroup associated with a non-kmem page + * @page: a pointer to the page struct + * + * Returns a pointer to the memory cgroup associated with the page, + * or NULL. This function assumes that the page is known to have a + * proper memory cgroup pointer. It's not safe to call this function + * against some type of pages, e.g. slab pages or ex-slab pages or + * kmem pages. + */ +static inline struct mem_cgroup *__page_memcg(struct page *page) +{ + unsigned long memcg_data = page->memcg_data; + + VM_BUG_ON_PAGE(PageSlab(page), page); + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); + + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); +} + +/* + * __page_objcg - get the object cgroup associated with a kmem page + * @page: a pointer to the page struct + * + * Returns a pointer to the object cgroup associated with the page, + * or NULL. This function assumes that the page is known to have a + * proper object cgroup pointer. It's not safe to call this function + * against some type of pages, e.g. slab pages or ex-slab pages or + * LRU pages. + */ +static inline struct obj_cgroup *__page_objcg(struct page *page) +{ + unsigned long memcg_data = page->memcg_data; + + VM_BUG_ON_PAGE(PageSlab(page), page); + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); + VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page); + + return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); +} + /* * page_memcg - get the memory cgroup associated with a page * @page: a pointer to the page struct @@ -380,20 +436,23 @@ enum page_memcg_data_flags { * proper memory cgroup pointer. It's not safe to call this function * against some type of pages, e.g. slab pages or ex-slab pages. * - * Any of the following ensures page and memcg binding stability: + * For a non-kmem page any of the following ensures page and memcg binding + * stability: + * * - the page lock * - LRU isolation * - lock_page_memcg() * - exclusive reference + * + * For a kmem page a caller should hold an rcu read lock to protect memcg + * associated with a kmem page from being released. */ static inline struct mem_cgroup *page_memcg(struct page *page) { - unsigned long memcg_data = page->memcg_data; - - VM_BUG_ON_PAGE(PageSlab(page), page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); - - return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + if (PageMemcgKmem(page)) + return obj_cgroup_memcg(__page_objcg(page)); + else + return __page_memcg(page); } /* @@ -407,11 +466,19 @@ static inline struct mem_cgroup *page_memcg(struct page *page) */ static inline struct mem_cgroup *page_memcg_rcu(struct page *page) { + unsigned long memcg_data = READ_ONCE(page->memcg_data); + VM_BUG_ON_PAGE(PageSlab(page), page); WARN_ON_ONCE(!rcu_read_lock_held()); - return (struct mem_cgroup *)(READ_ONCE(page->memcg_data) & - ~MEMCG_DATA_FLAGS_MASK); + if (memcg_data & MEMCG_DATA_KMEM) { + struct obj_cgroup *objcg; + + objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return obj_cgroup_memcg(objcg); + } + + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* @@ -419,15 +486,21 @@ static inline struct mem_cgroup *page_memcg_rcu(struct page *page) * @page: a pointer to the page struct * * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function unlike page_memcg() can take any page + * or NULL. This function unlike page_memcg() can take any page * as an argument. It has to be used in cases when it's not known if a page - * has an associated memory cgroup pointer or an object cgroups vector. + * has an associated memory cgroup pointer or an object cgroups vector or + * an object cgroup. + * + * For a non-kmem page any of the following ensures page and memcg binding + * stability: * - * Any of the following ensures page and memcg binding stability: * - the page lock * - LRU isolation * - lock_page_memcg() * - exclusive reference + * + * For a kmem page a caller should hold an rcu read lock to protect memcg + * associated with a kmem page from being released. */ static inline struct mem_cgroup *page_memcg_check(struct page *page) { @@ -440,6 +513,13 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) if (memcg_data & MEMCG_DATA_OBJCGS) return NULL; + if (memcg_data & MEMCG_DATA_KMEM) { + struct obj_cgroup *objcg; + + objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return obj_cgroup_memcg(objcg); + } + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } @@ -718,21 +798,15 @@ static inline void obj_cgroup_get(struct obj_cgroup *objcg) percpu_ref_get(&objcg->refcnt); } -static inline void obj_cgroup_put(struct obj_cgroup *objcg) +static inline void obj_cgroup_get_many(struct obj_cgroup *objcg, + unsigned long nr) { - percpu_ref_put(&objcg->refcnt); + percpu_ref_get_many(&objcg->refcnt, nr); } -/* - * After the initialization objcg->memcg is always pointing at - * a valid memcg, but can be atomically swapped to the parent memcg. - * - * The caller must ensure that the returned memcg won't be released: - * e.g. acquire the rcu_read_lock or css_set_lock. - */ -static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg) +static inline void obj_cgroup_put(struct obj_cgroup *objcg) { - return READ_ONCE(objcg->memcg); + percpu_ref_put(&objcg->refcnt); } static inline void mem_cgroup_put(struct mem_cgroup *memcg) -- cgit v1.2.3 From bd290e1e75d8a8b2d87031b63db56ae165677870 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 29 Apr 2021 22:56:58 -0700 Subject: mm: memcontrol: move PageMemcgKmem to the scope of CONFIG_MEMCG_KMEM The page only can be marked as kmem when CONFIG_MEMCG_KMEM is enabled. So move PageMemcgKmem() to the scope of the CONFIG_MEMCG_KMEM. As a bonus, on !CONFIG_MEMCG_KMEM build some code can be compiled out. Link: https://lkml.kernel.org/r/20210319163821.20704-8-songmuchun@bytedance.com Signed-off-by: Muchun Song Acked-by: Roman Gushchin Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: Xiongchun Duan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 78ca34c935ab..bcf92b99f001 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -523,6 +523,7 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } +#ifdef CONFIG_MEMCG_KMEM /* * PageMemcgKmem - check if the page has MemcgKmem flag set * @page: a pointer to the page struct @@ -537,7 +538,6 @@ static inline bool PageMemcgKmem(struct page *page) return page->memcg_data & MEMCG_DATA_KMEM; } -#ifdef CONFIG_MEMCG_KMEM /* * page_objcgs - get the object cgroups vector associated with a page * @page: a pointer to the page struct @@ -579,6 +579,11 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) } #else +static inline bool PageMemcgKmem(struct page *page) +{ + return false; +} + static inline struct obj_cgroup **page_objcgs(struct page *page) { return NULL; -- cgit v1.2.3 From a10e995749a6c65833edd201c55665e5d44d14fc Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Thu, 29 Apr 2021 22:57:01 -0700 Subject: linux/memcontrol.h: remove duplicate struct declaration struct mem_cgroup is declared twice. One has been declared at forward struct declaration. Remove the duplicate. Link: https://lkml.kernel.org/r/20210330020246.2265371-1-wanjiabing@vivo.com Signed-off-by: Wan Jiabing Reviewed-by: Muchun Song Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bcf92b99f001..5904716f29ba 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1123,8 +1123,6 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, #define MEM_CGROUP_ID_SHIFT 0 #define MEM_CGROUP_ID_MAX 0 -struct mem_cgroup; - static inline struct mem_cgroup *page_memcg(struct page *page) { return NULL; -- cgit v1.2.3 From 74ffa5a3e68504dd289135b1cf0422c19ffb3f2e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Apr 2021 22:57:29 -0700 Subject: mm: add remap_pfn_range_notrack Patch series "add remap_pfn_range_notrack instead of reinventing it in i915", v2. i915 has some reason to want to avoid the track_pfn_remap overhead in remap_pfn_range. Add a function to the core VM to do just that rather than reinventing the functionality poorly in the driver. Note that the remap_io_sg path does get exercises when using Xorg on my Thinkpad X1, so this should be considered lightly tested, I've not managed to hit the remap_io_mapping path at all. This patch (of 4): Add a version of remap_pfn_range that does not call track_pfn_range. This will be used to fix horrible abuses of VM internals in the i915 driver. Link: https://lkml.kernel.org/r/20210326055505.1424432-1-hch@lst.de Link: https://lkml.kernel.org/r/20210326055505.1424432-2-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Daniel Vetter Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Chris Wilson Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 64be3baf861a..a8335cecf706 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2732,6 +2732,8 @@ unsigned long change_prot_numa(struct vm_area_struct *vma, struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); +int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t prot); int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, struct page **pages, unsigned long *num); -- cgit v1.2.3 From 1fbaf8fc12a0136c7e62e7ad6fe886fe1749912c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Apr 2021 22:57:32 -0700 Subject: mm: add a io_mapping_map_user helper Add a helper that calls remap_pfn_range for an struct io_mapping, relying on the pgprot pre-validation done when creating the mapping instead of doing it at runtime. Link: https://lkml.kernel.org/r/20210326055505.1424432-3-hch@lst.de Signed-off-by: Christoph Hellwig Cc: Chris Wilson Cc: Daniel Vetter Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Peter Zijlstra Cc: Rodrigo Vivi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/io-mapping.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index c093e81310a9..e9743cfd8585 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -220,3 +220,6 @@ io_mapping_free(struct io_mapping *iomap) } #endif /* _LINUX_IO_MAPPING_H */ + +int io_mapping_map_user(struct io_mapping *iomap, struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, unsigned long size); -- cgit v1.2.3 From 14d071134c740cfe61c09fc506fd3ab052beea10 Mon Sep 17 00:00:00 2001 From: Brian Geffon Date: Thu, 29 Apr 2021 22:57:48 -0700 Subject: Revert "mremap: don't allow MREMAP_DONTUNMAP on special_mappings and aio" This reverts commit cd544fd1dc9293c6702fab6effa63dac1cc67e99. As discussed in [1] this commit was a no-op because the mapping type was checked in vma_to_resize before move_vma is ever called. This meant that vm_ops->mremap() would never be called on such mappings. Furthermore, we've since expanded support of MREMAP_DONTUNMAP to non-anonymous mappings, and these special mappings are still protected by the existing check of !VM_DONTEXPAND and !VM_PFNMAP which will result in a -EINVAL. 1. https://lkml.org/lkml/2020/12/28/2340 Link: https://lkml.kernel.org/r/20210323182520.2712101-2-bgeffon@google.com Signed-off-by: Brian Geffon Acked-by: Hugh Dickins Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Alejandro Colomar Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Axel Rasmussen Cc: "Kirill A . Shutemov" Cc: Lokesh Gidra Cc: Michael Kerrisk Cc: "Michael S . Tsirkin" Cc: Mike Rapoport Cc: Minchan Kim Cc: Peter Xu Cc: Sonny Rao Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a8335cecf706..93097dbd9604 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -580,7 +580,7 @@ struct vm_operations_struct { void (*close)(struct vm_area_struct * area); /* Called any time before splitting to check if it's allowed */ int (*may_split)(struct vm_area_struct *area, unsigned long addr); - int (*mremap)(struct vm_area_struct *area, unsigned long flags); + int (*mremap)(struct vm_area_struct *area); /* * Called by mprotect() to make driver-specific permission * checks before mprotect() is finalised. The VMA must not -- cgit v1.2.3 From bbc180a5adb05ee8053fab7a0c0bd56c5964240e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 29 Apr 2021 22:58:26 -0700 Subject: mm: HUGE_VMAP arch support cleanup This changes the awkward approach where architectures provide init functions to determine which levels they can provide large mappings for, to one where the arch is queried for each call. This removes code and indirection, and allows constant-folding of dead code for unsupported levels. This also adds a prot argument to the arch query. This is unused currently but could help with some architectures (e.g., some powerpc processors can't map uncacheable memory with large pages). Link: https://lkml.kernel.org/r/20210317062402.533919-7-npiggin@gmail.com Signed-off-by: Nicholas Piggin Reviewed-by: Ding Tianhong Acked-by: Catalin Marinas [arm64] Cc: Will Deacon Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Christoph Hellwig Cc: Miaohe Lin Cc: Michael Ellerman Cc: Russell King Cc: Uladzislau Rezki (Sony) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/io.h | 9 --------- include/linux/vmalloc.h | 6 ++++++ 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io.h b/include/linux/io.h index 61ff7d6278b6..9595151d800d 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -31,15 +31,6 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end, } #endif -#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP -void __init ioremap_huge_init(void); -int arch_ioremap_p4d_supported(void); -int arch_ioremap_pud_supported(void); -int arch_ioremap_pmd_supported(void); -#else -static inline void ioremap_huge_init(void) { } -#endif - /* * Managed iomap interface */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 3de7be6dd17c..358c51c702c0 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -78,6 +78,12 @@ struct vmap_area { }; }; +#ifndef CONFIG_HAVE_ARCH_HUGE_VMAP +static inline bool arch_vmap_p4d_supported(pgprot_t prot) { return false; } +static inline bool arch_vmap_pud_supported(pgprot_t prot) { return false; } +static inline bool arch_vmap_pmd_supported(pgprot_t prot) { return false; } +#endif + /* * Highlevel APIs for driver use */ -- cgit v1.2.3 From 6f680e70b6ff58c9670769534196800233685d55 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 29 Apr 2021 22:58:39 -0700 Subject: mm/vmalloc: provide fallback arch huge vmap support functions If an architecture doesn't support a particular page table level as a huge vmap page size then allow it to skip defining the support query function. Link: https://lkml.kernel.org/r/20210317062402.533919-11-npiggin@gmail.com Signed-off-by: Nicholas Piggin Suggested-by: Christoph Hellwig Cc: Borislav Petkov Cc: Catalin Marinas Cc: Ding Tianhong Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Miaohe Lin Cc: Michael Ellerman Cc: Russell King Cc: Thomas Gleixner Cc: Uladzislau Rezki (Sony) Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 358c51c702c0..eb5630be6783 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -78,10 +78,26 @@ struct vmap_area { }; }; -#ifndef CONFIG_HAVE_ARCH_HUGE_VMAP -static inline bool arch_vmap_p4d_supported(pgprot_t prot) { return false; } -static inline bool arch_vmap_pud_supported(pgprot_t prot) { return false; } -static inline bool arch_vmap_pmd_supported(pgprot_t prot) { return false; } +/* archs that select HAVE_ARCH_HUGE_VMAP should override one or more of these */ +#ifndef arch_vmap_p4d_supported +static inline bool arch_vmap_p4d_supported(pgprot_t prot) +{ + return false; +} +#endif + +#ifndef arch_vmap_pud_supported +static inline bool arch_vmap_pud_supported(pgprot_t prot) +{ + return false; +} +#endif + +#ifndef arch_vmap_pmd_supported +static inline bool arch_vmap_pmd_supported(pgprot_t prot) +{ + return false; +} #endif /* -- cgit v1.2.3 From 5e9e3d777b99aabe2f91f793a52e870a02642160 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 29 Apr 2021 22:58:43 -0700 Subject: mm: move vmap_range from mm/ioremap.c to mm/vmalloc.c This is a generic kernel virtual memory mapper, not specific to ioremap. Code is unchanged other than making vmap_range non-static. Link: https://lkml.kernel.org/r/20210317062402.533919-12-npiggin@gmail.com Signed-off-by: Nicholas Piggin Reviewed-by: Christoph Hellwig Cc: Borislav Petkov Cc: Catalin Marinas Cc: Ding Tianhong Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Miaohe Lin Cc: Michael Ellerman Cc: Russell King Cc: Thomas Gleixner Cc: Uladzislau Rezki (Sony) Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index eb5630be6783..ae9eb07d30d4 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -189,6 +189,9 @@ extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr); #ifdef CONFIG_MMU +int vmap_range(unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift); extern int map_kernel_range_noflush(unsigned long start, unsigned long size, pgprot_t prot, struct page **pages); int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot, -- cgit v1.2.3 From 121e6f3258fe393e22c36f61a319be8a4f2c05ae Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 29 Apr 2021 22:58:49 -0700 Subject: mm/vmalloc: hugepage vmalloc mappings Support huge page vmalloc mappings. Config option HAVE_ARCH_HUGE_VMALLOC enables support on architectures that define HAVE_ARCH_HUGE_VMAP and supports PMD sized vmap mappings. vmalloc will attempt to allocate PMD-sized pages if allocating PMD size or larger, and fall back to small pages if that was unsuccessful. Architectures must ensure that any arch specific vmalloc allocations that require PAGE_SIZE mappings (e.g., module allocations vs strict module rwx) use the VM_NOHUGE flag to inhibit larger mappings. This can result in more internal fragmentation and memory overhead for a given allocation, an option nohugevmalloc is added to disable at boot. [colin.king@canonical.com: fix read of uninitialized pointer area] Link: https://lkml.kernel.org/r/20210318155955.18220-1-colin.king@canonical.com Link: https://lkml.kernel.org/r/20210317062402.533919-14-npiggin@gmail.com Signed-off-by: Nicholas Piggin Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Ding Tianhong Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Miaohe Lin Cc: Michael Ellerman Cc: Russell King Cc: Thomas Gleixner Cc: Uladzislau Rezki (Sony) Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index ae9eb07d30d4..b4c82f2d40dc 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -26,6 +26,7 @@ struct notifier_block; /* in notifier.h */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ #define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ +#define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */ /* * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. @@ -54,6 +55,9 @@ struct vm_struct { unsigned long size; unsigned long flags; struct page **pages; +#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC + unsigned int page_order; +#endif unsigned int nr_pages; phys_addr_t phys_addr; const void *caller; @@ -188,6 +192,22 @@ void free_vm_area(struct vm_struct *area); extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr); +static inline bool is_vm_area_hugepages(const void *addr) +{ + /* + * This may not 100% tell if the area is mapped with > PAGE_SIZE + * page table entries, if for some reason the architecture indicates + * larger sizes are available but decides not to use them, nothing + * prevents that. This only indicates the size of the physical page + * allocated in the vmalloc layer. + */ +#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC + return find_vm_area(addr)->page_order > 0; +#else + return false; +#endif +} + #ifdef CONFIG_MMU int vmap_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot, @@ -205,6 +225,7 @@ static inline void set_vm_flush_reset_perms(void *addr) if (vm) vm->flags |= VM_FLUSH_RESET_PERMS; } + #else static inline int map_kernel_range_noflush(unsigned long start, unsigned long size, -- cgit v1.2.3 From b67177ecd956333029dbc1a4971a857fee0ccbb1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 29 Apr 2021 22:58:53 -0700 Subject: mm/vmalloc: remove map_kernel_range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm/vmalloc: cleanup after hugepage series", v2. Christoph pointed out some overdue cleanups required after the huge vmalloc series, and I had another failure error message improvement as well. This patch (of 5): This is a shim around vmap_pages_range, get rid of it. Move the main API comment from the _noflush variant to the normal variant, and make _noflush internal to mm/. Link: https://lkml.kernel.org/r/20210322021806.892164-1-npiggin@gmail.com Link: https://lkml.kernel.org/r/20210322021806.892164-2-npiggin@gmail.com Signed-off-by: Nicholas Piggin Reviewed-by: Christoph Hellwig Cc: Uladzislau Rezki Cc: Cédric Le Goater Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index b4c82f2d40dc..fb3b9989a4c5 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -212,10 +212,6 @@ static inline bool is_vm_area_hugepages(const void *addr) int vmap_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot, unsigned int max_page_shift); -extern int map_kernel_range_noflush(unsigned long start, unsigned long size, - pgprot_t prot, struct page **pages); -int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot, - struct page **pages); extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size); extern void unmap_kernel_range(unsigned long addr, unsigned long size); static inline void set_vm_flush_reset_perms(void *addr) @@ -227,13 +223,6 @@ static inline void set_vm_flush_reset_perms(void *addr) } #else -static inline int -map_kernel_range_noflush(unsigned long start, unsigned long size, - pgprot_t prot, struct page **pages) -{ - return size >> PAGE_SHIFT; -} -#define map_kernel_range map_kernel_range_noflush static inline void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) { -- cgit v1.2.3 From 4ad0ae8c64ac8f81a3651bca11be7c3cb086df80 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 29 Apr 2021 22:59:01 -0700 Subject: mm/vmalloc: remove unmap_kernel_range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a shim around vunmap_range, get rid of it. Move the main API comment from the _noflush variant to the normal variant, and make _noflush internal to mm/. [npiggin@gmail.com: fix nommu builds and a comment bug per sfr] Link: https://lkml.kernel.org/r/1617292598.m6g0knx24s.astroid@bobo.none [akpm@linux-foundation.org: move vunmap_range_noflush() stub inside !CONFIG_MMU, not !CONFIG_NUMA] [npiggin@gmail.com: fix nommu builds] Link: https://lkml.kernel.org/r/1617292497.o1uhq5ipxp.astroid@bobo.none Link: https://lkml.kernel.org/r/20210322021806.892164-5-npiggin@gmail.com Signed-off-by: Nicholas Piggin Reviewed-by: Christoph Hellwig Cc: Cédric Le Goater Cc: Uladzislau Rezki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index fb3b9989a4c5..394d03cc0e92 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -212,8 +212,7 @@ static inline bool is_vm_area_hugepages(const void *addr) int vmap_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot, unsigned int max_page_shift); -extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size); -extern void unmap_kernel_range(unsigned long addr, unsigned long size); +void vunmap_range(unsigned long addr, unsigned long end); static inline void set_vm_flush_reset_perms(void *addr) { struct vm_struct *vm = find_vm_area(addr); @@ -223,11 +222,6 @@ static inline void set_vm_flush_reset_perms(void *addr) } #else -static inline void -unmap_kernel_range_noflush(unsigned long addr, unsigned long size) -{ -} -#define unmap_kernel_range unmap_kernel_range_noflush static inline void set_vm_flush_reset_perms(void *addr) { } -- cgit v1.2.3 From 78f4841e34763079be0661744c1ca997be64eb56 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 22:59:25 -0700 Subject: mm/doc: fix fault_flag_allow_retry_first kerneldoc make htmldocs reports: include/linux/mm.h:496: warning: Function parameter or member 'flags' not described in 'fault_flag_allow_retry_first' Add a description. Link: https://lkml.kernel.org/r/20210322195022.2143603-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: John Hubbard Cc: Mike Rapoport Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 93097dbd9604..382c33e7d906 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -485,6 +485,7 @@ extern pgprot_t protection_map[16]; /** * fault_flag_allow_retry_first - check ALLOW_RETRY the first time + * @flags: Fault flags. * * This is mostly used for places where we want to try to avoid taking * the mmap_lock for too long a time when waiting for another condition -- cgit v1.2.3 From 136dfc9949f84089217f84e6478471dabbf14ba7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 22:59:28 -0700 Subject: mm/doc: fix page_maybe_dma_pinned kerneldoc make htmldocs reports: include/linux/mm.h:1341: warning: Excess function parameter 'Return' description in 'page_maybe_dma_pinned' Fix a few other formatting nits while I'm editing this description. Link: https://lkml.kernel.org/r/20210322195022.2143603-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Mike Rapoport Reviewed-by: John Hubbard Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 382c33e7d906..5acda8761935 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1271,10 +1271,11 @@ void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, void unpin_user_pages(struct page **pages, unsigned long npages); /** - * page_maybe_dma_pinned() - report if a page is pinned for DMA. + * page_maybe_dma_pinned - Report if a page is pinned for DMA. + * @page: The page. * * This function checks if a page has been pinned via a call to - * pin_user_pages*(). + * a function in the pin_user_pages() family. * * For non-huge pages, the return value is partially fuzzy: false is not fuzzy, * because it means "definitely not pinned for DMA", but true means "probably @@ -1292,9 +1293,8 @@ void unpin_user_pages(struct page **pages, unsigned long npages); * * For more information, please see Documentation/core-api/pin_user_pages.rst. * - * @page: pointer to page to be queried. - * @Return: True, if it is likely that the page has been "dma-pinned". - * False, if the page is definitely not dma-pinned. + * Return: True, if it is likely that the page has been "dma-pinned". + * False, if the page is definitely not dma-pinned. */ static inline bool page_maybe_dma_pinned(struct page *page) { -- cgit v1.2.3 From da2f5eb3d344503c4d851bdf1ae2379167074413 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 22:59:31 -0700 Subject: mm/doc: turn fault flags into an enum The kernel-doc script complains about include/linux/mm.h:425: warning: wrong kernel-doc identifier on line: * Fault flag definitions. I don't know how to document a series of #defines, so turn these definitions into an enum and document that instead. Link: https://lkml.kernel.org/r/20210322195022.2143603-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Mike Rapoport Cc: John Hubbard Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5acda8761935..64b2e3a0b94d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -432,8 +432,7 @@ extern unsigned int kobjsize(const void *objp); extern pgprot_t protection_map[16]; /** - * Fault flag definitions. - * + * enum fault_flag - Fault flag definitions. * @FAULT_FLAG_WRITE: Fault was a write fault. * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE. * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked. @@ -464,16 +463,18 @@ extern pgprot_t protection_map[16]; * signals before a retry to make sure the continuous page faults can still be * interrupted if necessary. */ -#define FAULT_FLAG_WRITE 0x01 -#define FAULT_FLAG_MKWRITE 0x02 -#define FAULT_FLAG_ALLOW_RETRY 0x04 -#define FAULT_FLAG_RETRY_NOWAIT 0x08 -#define FAULT_FLAG_KILLABLE 0x10 -#define FAULT_FLAG_TRIED 0x20 -#define FAULT_FLAG_USER 0x40 -#define FAULT_FLAG_REMOTE 0x80 -#define FAULT_FLAG_INSTRUCTION 0x100 -#define FAULT_FLAG_INTERRUPTIBLE 0x200 +enum fault_flag { + FAULT_FLAG_WRITE = 1 << 0, + FAULT_FLAG_MKWRITE = 1 << 1, + FAULT_FLAG_ALLOW_RETRY = 1 << 2, + FAULT_FLAG_RETRY_NOWAIT = 1 << 3, + FAULT_FLAG_KILLABLE = 1 << 4, + FAULT_FLAG_TRIED = 1 << 5, + FAULT_FLAG_USER = 1 << 6, + FAULT_FLAG_REMOTE = 1 << 7, + FAULT_FLAG_INSTRUCTION = 1 << 8, + FAULT_FLAG_INTERRUPTIBLE = 1 << 9, +}; /* * The default fault flags that should be used by most of the @@ -496,7 +497,7 @@ extern pgprot_t protection_map[16]; * Return: true if the page fault allows retry and this is the first * attempt of the fault handling; false otherwise. */ -static inline bool fault_flag_allow_retry_first(unsigned int flags) +static inline bool fault_flag_allow_retry_first(enum fault_flag flags) { return (flags & FAULT_FLAG_ALLOW_RETRY) && (!(flags & FAULT_FLAG_TRIED)); @@ -531,7 +532,7 @@ struct vm_fault { pgoff_t pgoff; /* Logical page offset based on vma */ unsigned long address; /* Faulting virtual address */ }; - unsigned int flags; /* FAULT_FLAG_xxx flags + enum fault_flag flags; /* FAULT_FLAG_xxx flags * XXX: should really be 'const' */ pmd_t *pmd; /* Pointer to pmd entry matching * the 'address' */ -- cgit v1.2.3 From 91ab1a41191ef2d4c6e123951a0f0c3876bd9376 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 29 Apr 2021 22:59:40 -0700 Subject: pagewalk: prefix struct kernel-doc descriptions The script './scripts/kernel-doc -none ./include/linux/pagewalk.h' reports: include/linux/pagewalk.h:37: warning: cannot understand function prototype: 'struct mm_walk_ops ' include/linux/pagewalk.h:85: warning: cannot understand function prototype: 'struct mm_walk ' A kernel-doc description for a structure requires to prefix the struct name with the keyword 'struct'. So, do that such that no further kernel-doc warnings are reported for this file. Link: https://lkml.kernel.org/r/20210322122542.15072-3-lukas.bulwahn@gmail.com Signed-off-by: Lukas Bulwahn Cc: Joe Perches Cc: Jonathan Corbet Cc: Ralf Ramsauer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagewalk.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index b1cb6b753abb..ac7b38ad5903 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -7,7 +7,7 @@ struct mm_walk; /** - * mm_walk_ops - callbacks for walk_page_range + * struct mm_walk_ops - callbacks for walk_page_range * @pgd_entry: if set, called for each non-empty PGD (top-level) entry * @p4d_entry: if set, called for each non-empty P4D entry * @pud_entry: if set, called for each non-empty PUD entry @@ -71,7 +71,7 @@ enum page_walk_action { }; /** - * mm_walk - walk_page_range data + * struct mm_walk - walk_page_range data * @ops: operation to call during the walk * @mm: mm_struct representing the target process of page table walk * @pgd: pointer to PGD; only valid with no_vma (otherwise set to NULL) -- cgit v1.2.3 From a064cb00d359bc464df6fd2ab6dfb8dc4b31e361 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 29 Apr 2021 22:59:49 -0700 Subject: kasan: initialize shadow to TAG_INVALID for SW_TAGS Currently, KASAN_SW_TAGS uses 0xFF as the default tag value for unallocated memory. The underlying idea is that since that memory hasn't been allocated yet, it's only supposed to be dereferenced through a pointer with the native 0xFF tag. While this is a good idea in terms on consistency, practically it doesn't bring any benefit. Since the 0xFF pointer tag is a match-all tag, it doesn't matter what tag the accessed memory has. No accesses through 0xFF-tagged pointers are considered buggy by KASAN. This patch changes the default tag value for unallocated memory to 0xFE, which is the tag KASAN uses for inaccessible memory. This doesn't affect accesses through 0xFF-tagged pointer to this memory, but this allows KASAN to detect wild and large out-of-bounds invalid memory accesses through otherwise-tagged pointers. This is a prepatory patch for the next one, which changes the tag-based KASAN modes to not poison the boot memory. Link: https://lkml.kernel.org/r/c8e93571c18b3528aac5eb33ade213bf133d10ad.1613692950.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d53ea3c047bc..9f5faefd1744 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -30,7 +30,8 @@ struct kunit_kasan_expectation { /* Software KASAN implementations use shadow memory. */ #ifdef CONFIG_KASAN_SW_TAGS -#define KASAN_SHADOW_INIT 0xFF +/* This matches KASAN_TAG_INVALID. */ +#define KASAN_SHADOW_INIT 0xFE #else #define KASAN_SHADOW_INIT 0 #endif -- cgit v1.2.3 From 1bb5eab30d68c1a3d9dbc822e1895e6c06dbe748 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 29 Apr 2021 23:00:02 -0700 Subject: kasan, mm: integrate page_alloc init with HW_TAGS This change uses the previously added memory initialization feature of HW_TAGS KASAN routines for page_alloc memory when init_on_alloc/free is enabled. With this change, kernel_init_free_pages() is no longer called when both HW_TAGS KASAN and init_on_alloc/free are enabled. Instead, memory is initialized in KASAN runtime. To avoid discrepancies with which memory gets initialized that can be caused by future changes, both KASAN and kernel_init_free_pages() hooks are put together and a warning comment is added. This patch changes the order in which memory initialization and page poisoning hooks are called. This doesn't lead to any side-effects, as whenever page poisoning is enabled, memory initialization gets disabled. Combining setting allocation tags with memory initialization improves HW_TAGS KASAN performance when init_on_alloc/free is enabled. [andreyknvl@google.com: fix for "integrate page_alloc init with HW_TAGS"] Link: https://lkml.kernel.org/r/65b6028dea2e9a6e8e2cb779b5115c09457363fc.1617122211.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/e77f0d5b1b20658ef0b8288625c74c2b3690e725.1615296150.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Tested-by: Vlastimil Babka Reviewed-by: Sergei Trofimovich Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Christoph Lameter Cc: David Rientjes Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Joonsoo Kim Cc: Kevin Brodsky Cc: Pekka Enberg Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 9f5faefd1744..30aa2bee8400 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -96,6 +96,11 @@ static __always_inline bool kasan_enabled(void) return static_branch_likely(&kasan_flag_enabled); } +static inline bool kasan_has_integrated_init(void) +{ + return kasan_enabled(); +} + #else /* CONFIG_KASAN_HW_TAGS */ static inline bool kasan_enabled(void) @@ -103,6 +108,11 @@ static inline bool kasan_enabled(void) return true; } +static inline bool kasan_has_integrated_init(void) +{ + return false; +} + #endif /* CONFIG_KASAN_HW_TAGS */ slab_flags_t __kasan_never_merge(void); @@ -120,20 +130,20 @@ static __always_inline void kasan_unpoison_range(const void *addr, size_t size) __kasan_unpoison_range(addr, size); } -void __kasan_alloc_pages(struct page *page, unsigned int order); +void __kasan_alloc_pages(struct page *page, unsigned int order, bool init); static __always_inline void kasan_alloc_pages(struct page *page, - unsigned int order) + unsigned int order, bool init) { if (kasan_enabled()) - __kasan_alloc_pages(page, order); + __kasan_alloc_pages(page, order, init); } -void __kasan_free_pages(struct page *page, unsigned int order); +void __kasan_free_pages(struct page *page, unsigned int order, bool init); static __always_inline void kasan_free_pages(struct page *page, - unsigned int order) + unsigned int order, bool init) { if (kasan_enabled()) - __kasan_free_pages(page, order); + __kasan_free_pages(page, order, init); } void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size, @@ -277,13 +287,17 @@ static inline bool kasan_enabled(void) { return false; } +static inline bool kasan_has_integrated_init(void) +{ + return false; +} static inline slab_flags_t kasan_never_merge(void) { return 0; } static inline void kasan_unpoison_range(const void *address, size_t size) {} -static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} -static inline void kasan_free_pages(struct page *page, unsigned int order) {} +static inline void kasan_alloc_pages(struct page *page, unsigned int order, bool init) {} +static inline void kasan_free_pages(struct page *page, unsigned int order, bool init) {} static inline void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags) {} -- cgit v1.2.3 From da844b787245194cfd69f0f1d2fb1dd3640a8a6d Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 29 Apr 2021 23:00:06 -0700 Subject: kasan, mm: integrate slab init_on_alloc with HW_TAGS This change uses the previously added memory initialization feature of HW_TAGS KASAN routines for slab memory when init_on_alloc is enabled. With this change, memory initialization memset() is no longer called when both HW_TAGS KASAN and init_on_alloc are enabled. Instead, memory is initialized in KASAN runtime. The memory initialization memset() is moved into slab_post_alloc_hook() that currently directly follows the initialization loop. A new argument is added to slab_post_alloc_hook() that indicates whether to initialize the memory or not. To avoid discrepancies with which memory gets initialized that can be caused by future changes, both KASAN hook and initialization memset() are put together and a warning comment is added. Combining setting allocation tags with memory initialization improves HW_TAGS KASAN performance when init_on_alloc is enabled. Link: https://lkml.kernel.org/r/c1292aeb5d519da221ec74a0684a949b027d7720.1615296150.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Christoph Lameter Cc: David Rientjes Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Joonsoo Kim Cc: Kevin Brodsky Cc: Pekka Enberg Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 30aa2bee8400..629aee484b6c 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -226,12 +226,12 @@ static __always_inline void kasan_slab_free_mempool(void *ptr) } void * __must_check __kasan_slab_alloc(struct kmem_cache *s, - void *object, gfp_t flags); + void *object, gfp_t flags, bool init); static __always_inline void * __must_check kasan_slab_alloc( - struct kmem_cache *s, void *object, gfp_t flags) + struct kmem_cache *s, void *object, gfp_t flags, bool init) { if (kasan_enabled()) - return __kasan_slab_alloc(s, object, flags); + return __kasan_slab_alloc(s, object, flags, init); return object; } @@ -320,7 +320,7 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object) static inline void kasan_kfree_large(void *ptr) {} static inline void kasan_slab_free_mempool(void *ptr) {} static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, - gfp_t flags) + gfp_t flags, bool init) { return object; } -- cgit v1.2.3 From d57a964e09c22441e9fb497d1d7a5c1983a5d1fb Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 29 Apr 2021 23:00:09 -0700 Subject: kasan, mm: integrate slab init_on_free with HW_TAGS This change uses the previously added memory initialization feature of HW_TAGS KASAN routines for slab memory when init_on_free is enabled. With this change, memory initialization memset() is no longer called when both HW_TAGS KASAN and init_on_free are enabled. Instead, memory is initialized in KASAN runtime. For SLUB, the memory initialization memset() is moved into slab_free_hook() that currently directly follows the initialization loop. A new argument is added to slab_free_hook() that indicates whether to initialize the memory or not. To avoid discrepancies with which memory gets initialized that can be caused by future changes, both KASAN hook and initialization memset() are put together and a warning comment is added. Combining setting allocation tags with memory initialization improves HW_TAGS KASAN performance when init_on_free is enabled. Link: https://lkml.kernel.org/r/190fd15c1886654afdec0d19ebebd5ade665b601.1615296150.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Christoph Lameter Cc: David Rientjes Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Joonsoo Kim Cc: Kevin Brodsky Cc: Pekka Enberg Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 629aee484b6c..b1678a61e6a7 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -203,11 +203,13 @@ static __always_inline void * __must_check kasan_init_slab_obj( return (void *)object; } -bool __kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip); -static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object) +bool __kasan_slab_free(struct kmem_cache *s, void *object, + unsigned long ip, bool init); +static __always_inline bool kasan_slab_free(struct kmem_cache *s, + void *object, bool init) { if (kasan_enabled()) - return __kasan_slab_free(s, object, _RET_IP_); + return __kasan_slab_free(s, object, _RET_IP_, init); return false; } @@ -313,7 +315,7 @@ static inline void *kasan_init_slab_obj(struct kmem_cache *cache, { return (void *)object; } -static inline bool kasan_slab_free(struct kmem_cache *s, void *object) +static inline bool kasan_slab_free(struct kmem_cache *s, void *object, bool init) { return false; } -- cgit v1.2.3 From 1f9d03c5e999ed5a57fa4d8aec9fdf67a6234b80 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 29 Apr 2021 23:00:55 -0700 Subject: mm: move mem_init_print_info() into mm_init() mem_init_print_info() is called in mem_init() on each architecture, and pass NULL argument, so using void argument and move it into mm_init(). Link: https://lkml.kernel.org/r/20210317015210.33641-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: Dave Hansen [x86] Reviewed-by: Christophe Leroy [powerpc] Acked-by: David Hildenbrand Tested-by: Anatoly Pugachev [sparc64] Acked-by: Russell King [arm] Acked-by: Mike Rapoport Cc: Catalin Marinas Cc: Richard Henderson Cc: Guo Ren Cc: Yoshinori Sato Cc: Huacai Chen Cc: Jonas Bonn Cc: Palmer Dabbelt Cc: Heiko Carstens Cc: "David S. Miller" Cc: "Peter Zijlstra" Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 64b2e3a0b94d..011f43605807 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2360,7 +2360,7 @@ extern unsigned long free_reserved_area(void *start, void *end, int poison, const char *s); extern void adjust_managed_page_count(struct page *page, long count); -extern void mem_init_print_info(const char *str); +extern void mem_init_print_info(void); extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end); -- cgit v1.2.3 From f73c6c8805ed0762d99122d5332fcf42b0c8fbb8 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 29 Apr 2021 23:01:04 -0700 Subject: include/linux/page-flags-layout.h: correctly determine LAST_CPUPID_WIDTH The naming convention used in include/linux/page-flags-layout.h: *_SHIFT: the number of bits trying to allocate *_WIDTH: the number of bits successfully allocated So when it comes to LAST_CPUPID_WIDTH, we need to check whether all previous *_WIDTH and LAST_CPUPID_SHIFT can fit into page flags. This means we need to use NODES_WIDTH, not NODES_SHIFT. Link: https://lkml.kernel.org/r/20210303071609.797782-1-yuzhao@google.com Signed-off-by: Yu Zhao Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags-layout.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 7d4ec26d8a3e..295c2c687d2c 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -83,7 +83,7 @@ #define KASAN_TAG_WIDTH 0 #endif -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT+KASAN_TAG_WIDTH \ +#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_WIDTH+LAST_CPUPID_SHIFT+KASAN_TAG_WIDTH \ <= BITS_PER_LONG - NR_PAGEFLAGS #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else -- cgit v1.2.3 From 1587db62d8c0dbd943752f657b452213e1c4d8d4 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 29 Apr 2021 23:01:07 -0700 Subject: include/linux/page-flags-layout.h: cleanups Tidy things up and delete comments stating the obvious with typos or making no sense. Link: https://lkml.kernel.org/r/20210303071609.797782-2-yuzhao@google.com Signed-off-by: Yu Zhao Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags-layout.h | 62 ++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 295c2c687d2c..ef1e3e736e14 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -21,16 +21,17 @@ #elif MAX_NR_ZONES <= 8 #define ZONES_SHIFT 3 #else -#error ZONES_SHIFT -- too many zones configured adjust calculation +#error ZONES_SHIFT "Too many zones configured" #endif +#define ZONES_WIDTH ZONES_SHIFT + #ifdef CONFIG_SPARSEMEM #include - -/* SECTION_SHIFT #bits space required to store a section # */ #define SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS) - -#endif /* CONFIG_SPARSEMEM */ +#else +#define SECTIONS_SHIFT 0 +#endif #ifndef BUILD_VDSO32_64 /* @@ -54,17 +55,28 @@ #define SECTIONS_WIDTH 0 #endif -#define ZONES_WIDTH ZONES_SHIFT - -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS +#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS #define NODES_WIDTH NODES_SHIFT -#else -#ifdef CONFIG_SPARSEMEM_VMEMMAP +#elif defined(CONFIG_SPARSEMEM_VMEMMAP) #error "Vmemmap: No space for nodes field in page flags" -#endif +#else #define NODES_WIDTH 0 #endif +/* + * Note that this #define MUST have a value so that it can be tested with + * the IS_ENABLED() macro. + */ +#if NODES_SHIFT != 0 && NODES_WIDTH == 0 +#define NODE_NOT_IN_PAGE_FLAGS 1 +#endif + +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) +#define KASAN_TAG_WIDTH 8 +#else +#define KASAN_TAG_WIDTH 0 +#endif + #ifdef CONFIG_NUMA_BALANCING #define LAST__PID_SHIFT 8 #define LAST__PID_MASK ((1 << LAST__PID_SHIFT)-1) @@ -77,36 +89,20 @@ #define LAST_CPUPID_SHIFT 0 #endif -#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) -#define KASAN_TAG_WIDTH 8 -#else -#define KASAN_TAG_WIDTH 0 -#endif - -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_WIDTH+LAST_CPUPID_SHIFT+KASAN_TAG_WIDTH \ +#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT \ <= BITS_PER_LONG - NR_PAGEFLAGS #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else #define LAST_CPUPID_WIDTH 0 #endif -#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH+LAST_CPUPID_WIDTH+KASAN_TAG_WIDTH \ - > BITS_PER_LONG - NR_PAGEFLAGS -#error "Not enough bits in page flags" -#endif - -/* - * We are going to use the flags for the page to node mapping if its in - * there. This includes the case where there is no node, so it is implicit. - * Note that this #define MUST have a value so that it can be tested with - * the IS_ENABLED() macro. - */ -#if !(NODES_WIDTH > 0 || NODES_SHIFT == 0) -#define NODE_NOT_IN_PAGE_FLAGS 1 +#if LAST_CPUPID_SHIFT != 0 && LAST_CPUPID_WIDTH == 0 +#define LAST_CPUPID_NOT_IN_PAGE_FLAGS #endif -#if defined(CONFIG_NUMA_BALANCING) && LAST_CPUPID_WIDTH == 0 -#define LAST_CPUPID_NOT_IN_PAGE_FLAGS +#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH \ + > BITS_PER_LONG - NR_PAGEFLAGS +#error "Not enough bits in page flags" #endif #endif -- cgit v1.2.3 From 84172f4bb752424415756351a40f8da5714e1554 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 23:01:15 -0700 Subject: mm/page_alloc: combine __alloc_pages and __alloc_pages_nodemask There are only two callers of __alloc_pages() so prune the thicket of alloc_page variants by combining the two functions together. Current callers of __alloc_pages() simply add an extra 'NULL' parameter and current callers of __alloc_pages_nodemask() call __alloc_pages() instead. Link: https://lkml.kernel.org/r/20210225150642.2582252-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 8572a1474e16..f39de931bdf9 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -515,15 +515,8 @@ static inline int arch_make_page_accessible(struct page *page) } #endif -struct page * -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, - nodemask_t *nodemask); - -static inline struct page * -__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid) -{ - return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL); -} +struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, + nodemask_t *nodemask); /* * Allocate pages, preferring the node given as nid. The node must be valid and @@ -535,7 +528,7 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); - return __alloc_pages(gfp_mask, order, nid); + return __alloc_pages(gfp_mask, order, nid, NULL); } /* -- cgit v1.2.3 From d7f946d0faf90014547ee5d090e9d05018278c7a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 23:01:18 -0700 Subject: mm/mempolicy: rename alloc_pages_current to alloc_pages When CONFIG_NUMA is enabled, alloc_pages() is a wrapper around alloc_pages_current(). This is pointless, just implement alloc_pages() directly. Link: https://lkml.kernel.org/r/20210225150642.2582252-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index f39de931bdf9..0a88f84b08f4 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -546,13 +546,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, } #ifdef CONFIG_NUMA -extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order); - -static inline struct page * -alloc_pages(gfp_t gfp_mask, unsigned int order) -{ - return alloc_pages_current(gfp_mask, order); -} +struct page *alloc_pages(gfp_t gfp, unsigned int order); extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, int node, bool hugepage); -- cgit v1.2.3 From 387ba26fb1cb9be9e35dc14a6d97188e916eda05 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 29 Apr 2021 23:01:45 -0700 Subject: mm/page_alloc: add a bulk page allocator This patch adds a new page allocator interface via alloc_pages_bulk, and __alloc_pages_bulk_nodemask. A caller requests a number of pages to be allocated and added to a list. The API is not guaranteed to return the requested number of pages and may fail if the preferred allocation zone has limited free memory, the cpuset changes during the allocation or page debugging decides to fail an allocation. It's up to the caller to request more pages in batch if necessary. Note that this implementation is not very efficient and could be improved but it would require refactoring. The intent is to make it available early to determine what semantics are required by different callers. Once the full semantics are nailed down, it can be refactored. [mgorman@techsingularity.net: fix alloc_pages_bulk() return type, per Matthew] Link: https://lkml.kernel.org/r/20210325123713.GQ3697@techsingularity.net [mgorman@techsingularity.net: fix uninit var warning] Link: https://lkml.kernel.org/r/20210330114847.GX3697@techsingularity.net [mgorman@techsingularity.net: fix comment, per Vlastimil] Link: https://lkml.kernel.org/r/20210412110255.GV3697@techsingularity.net Link: https://lkml.kernel.org/r/20210325114228.27719-3-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Reviewed-by: Alexander Lobakin Tested-by: Colin Ian King Cc: Alexander Duyck Cc: Christoph Hellwig Cc: Chuck Lever Cc: David Miller Cc: Ilias Apalodimas Cc: Jesper Dangaard Brouer Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0a88f84b08f4..a2be8f4174a9 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -518,6 +518,17 @@ static inline int arch_make_page_accessible(struct page *page) struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask); +unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, + nodemask_t *nodemask, int nr_pages, + struct list_head *list); + +/* Bulk allocate order-0 pages */ +static inline unsigned long +alloc_pages_bulk(gfp_t gfp, unsigned long nr_pages, struct list_head *list) +{ + return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list); +} + /* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). -- cgit v1.2.3 From 0f87d9d30f21390dd71114f30e63038980e6eb3f Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 29 Apr 2021 23:01:48 -0700 Subject: mm/page_alloc: add an array-based interface to the bulk page allocator The proposed callers for the bulk allocator store pages from the bulk allocator in an array. This patch adds an array-based interface to the API to avoid multiple list iterations. The page list interface is preserved to avoid requiring all users of the bulk API to allocate and manage enough storage to store the pages. [akpm@linux-foundation.org: remove now unused local `allocated'] Link: https://lkml.kernel.org/r/20210325114228.27719-4-mgorman@techsingularity.net Signed-off-by: Mel Gorman Reviewed-by: Alexander Lobakin Acked-by: Vlastimil Babka Cc: Alexander Duyck Cc: Christoph Hellwig Cc: Chuck Lever Cc: David Miller Cc: Ilias Apalodimas Cc: Jesper Dangaard Brouer Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index a2be8f4174a9..26f4d907254a 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -520,13 +520,20 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, nodemask_t *nodemask, int nr_pages, - struct list_head *list); + struct list_head *page_list, + struct page **page_array); /* Bulk allocate order-0 pages */ static inline unsigned long -alloc_pages_bulk(gfp_t gfp, unsigned long nr_pages, struct list_head *list) +alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list) { - return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list); + return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL); +} + +static inline unsigned long +alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array) +{ + return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, NULL, page_array); } /* -- cgit v1.2.3 From 198fba4137a1803a9cb93992b56c2ecba1aa83a3 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 29 Apr 2021 23:02:16 -0700 Subject: mm/mmzone.h: fix existing kernel-doc comments and link them to core-api There are a couple of kernel-doc comments in include/linux/mmzone.h but they have minor formatting issues that would cause kernel-doc warnings. Fix the formatting of those comments, add missing Return: descriptions and link include/linux/mmzone.h to Documentation/core-api/mm-api.rst Link: https://lkml.kernel.org/r/20210426141927.1314326-2-rppt@kernel.org Signed-off-by: Mike Rapoport Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Anshuman Khandual Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 47946cec7584..3b2205741048 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -993,7 +993,8 @@ static inline int is_highmem_idx(enum zone_type idx) * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references * to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum. - * @zone - pointer to struct zone variable + * @zone: pointer to struct zone variable + * Return: 1 for a highmem zone, 0 otherwise */ static inline int is_highmem(struct zone *zone) { @@ -1044,7 +1045,7 @@ extern struct zone *next_zone(struct zone *zone); /** * for_each_online_pgdat - helper macro to iterate over all online nodes - * @pgdat - pointer to a pg_data_t variable + * @pgdat: pointer to a pg_data_t variable */ #define for_each_online_pgdat(pgdat) \ for (pgdat = first_online_pgdat(); \ @@ -1052,7 +1053,7 @@ extern struct zone *next_zone(struct zone *zone); pgdat = next_online_pgdat(pgdat)) /** * for_each_zone - helper macro to iterate over all memory zones - * @zone - pointer to struct zone variable + * @zone: pointer to struct zone variable * * The user only needs to declare the zone variable, for_each_zone * fills it in. @@ -1091,15 +1092,18 @@ struct zoneref *__next_zones_zonelist(struct zoneref *z, /** * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point - * @z - The cursor used as a starting point for the search - * @highest_zoneidx - The zone index of the highest zone to return - * @nodes - An optional nodemask to filter the zonelist with + * @z: The cursor used as a starting point for the search + * @highest_zoneidx: The zone index of the highest zone to return + * @nodes: An optional nodemask to filter the zonelist with * * This function returns the next zone at or below a given zone index that is * within the allowed nodemask using a cursor as the starting point for the * search. The zoneref returned is a cursor that represents the current zone * being examined. It should be advanced by one before calling * next_zones_zonelist again. + * + * Return: the next zone at or below highest_zoneidx within the allowed + * nodemask using a cursor within a zonelist as a starting point */ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, @@ -1112,10 +1116,9 @@ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z, /** * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist - * @zonelist - The zonelist to search for a suitable zone - * @highest_zoneidx - The zone index of the highest zone to return - * @nodes - An optional nodemask to filter the zonelist with - * @return - Zoneref pointer for the first suitable zone found (see below) + * @zonelist: The zonelist to search for a suitable zone + * @highest_zoneidx: The zone index of the highest zone to return + * @nodes: An optional nodemask to filter the zonelist with * * This function returns the first zone at or below a given zone index that is * within the allowed nodemask. The zoneref returned is a cursor that can be @@ -1125,6 +1128,8 @@ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z, * When no eligible zone is found, zoneref->zone is NULL (zoneref itself is * never NULL). This may happen either genuinely, or due to concurrent nodemask * update due to cpuset modification. + * + * Return: Zoneref pointer for the first suitable zone found */ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, enum zone_type highest_zoneidx, @@ -1136,11 +1141,11 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, /** * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask - * @zone - The current zone in the iterator - * @z - The current pointer within zonelist->_zonerefs being iterated - * @zlist - The zonelist being iterated - * @highidx - The zone index of the highest zone to return - * @nodemask - Nodemask allowed by the allocator + * @zone: The current zone in the iterator + * @z: The current pointer within zonelist->_zonerefs being iterated + * @zlist: The zonelist being iterated + * @highidx: The zone index of the highest zone to return + * @nodemask: Nodemask allowed by the allocator * * This iterator iterates though all zones at or below a given zone index and * within a given nodemask @@ -1160,10 +1165,10 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, /** * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index - * @zone - The current zone in the iterator - * @z - The current pointer within zonelist->zones being iterated - * @zlist - The zonelist being iterated - * @highidx - The zone index of the highest zone to return + * @zone: The current zone in the iterator + * @z: The current pointer within zonelist->zones being iterated + * @zlist: The zonelist being iterated + * @highidx: The zone index of the highest zone to return * * This iterator iterates though all zones at or below a given zone index. */ -- cgit v1.2.3 From 384d0c68204a4a657f4bbc096c50d729ae7d9ef0 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 12 Feb 2021 11:02:47 +0100 Subject: PCI/VPD: Remove pci_set_vpd_size() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 24a1720a0841 ("cxgb4: collect serial config version from register") removed the only usage of pci_set_vpd_size(). If a device needs to override the auto-detected VPD size, then this can be done with a PCI quirk, as is done for Chelsio devices. There's no need to allow drivers to change the VPD size. Remove pci_set_vpd_size(). [bhelgaas: squash in Arnd's fix for "'pci_vpd_set_size' defined but not used" from https://lore.kernel.org/r/20210421140334.3847155-1-arnd@kernel.org] Link: https://lore.kernel.org/r/47d86e52-9bcf-7da7-1edb-0d988a7a82ab@gmail.com Signed-off-by: Heiner Kallweit Signed-off-by: Bjorn Helgaas Reviewed-by: Krzysztof Wilczyński --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 86c799c97b77..edadc62ae058 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1302,7 +1302,6 @@ void pci_unlock_rescan_remove(void); /* Vital Product Data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); -int pci_set_vpd_size(struct pci_dev *dev, size_t len); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx); -- cgit v1.2.3 From 4cf0abbce69bde3d07757dfa9be6420407fdbc45 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 1 Apr 2021 18:43:15 +0200 Subject: PCI/VPD: Remove pci_vpd_find_tag() 'offset' argument All callers pass 0 as offset. Therefore remove the parameter and use a fixed offset 0 in pci_vpd_find_tag(). Link: https://lore.kernel.org/r/f62e6e19-5423-2ead-b2bd-62844b23ef8f@gmail.com Signed-off-by: Heiner Kallweit Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index edadc62ae058..1eb35c09674e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2310,14 +2310,13 @@ static inline u8 pci_vpd_info_field_size(const u8 *info_field) /** * pci_vpd_find_tag - Locates the Resource Data Type tag provided * @buf: Pointer to buffered vpd data - * @off: The offset into the buffer at which to begin the search * @len: The length of the vpd buffer * @rdt: The Resource Data Type to search for * * Returns the index where the Resource Data Type was found or * -ENOENT otherwise. */ -int pci_vpd_find_tag(const u8 *buf, unsigned int off, unsigned int len, u8 rdt); +int pci_vpd_find_tag(const u8 *buf, unsigned int len, u8 rdt); /** * pci_vpd_find_info_keyword - Locates an information field keyword in the VPD -- cgit v1.2.3 From d0f9164eb294aeb884cbe36ddbbae34fa0124aa1 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 6 Apr 2021 20:04:44 +0300 Subject: vdpa: Follow kdoc comment style Follow comment style mentioned in the Writing kernel-doc document [1]. Following warnings are fixed. $ scripts/kernel-doc -v -none include/linux/vdpa.h include/linux/vdpa.h:11: warning: missing initial short description on line: * vDPA callback definition. include/linux/vdpa.h:11: info: Scanning doc for vDPA include/linux/vdpa.h:15: warning: cannot understand function prototype: 'struct vdpa_callback ' include/linux/vdpa.h:21: warning: missing initial short description on line: * vDPA notification area include/linux/vdpa.h:21: info: Scanning doc for vDPA include/linux/vdpa.h:25: warning: cannot understand function prototype: 'struct vdpa_notification_area ' include/linux/vdpa.h:31: warning: missing initial short description on line: * vDPA vq_state definition include/linux/vdpa.h:31: info: Scanning doc for vDPA include/linux/vdpa.h:34: warning: cannot understand function prototype: 'struct vdpa_vq_state ' include/linux/vdpa.h:41: info: Scanning doc for vDPA device include/linux/vdpa.h:51: warning: cannot understand function prototype: 'struct vdpa_device ' include/linux/vdpa.h:62: info: Scanning doc for vDPA IOVA range include/linux/vdpa.h:66: warning: cannot understand function prototype: 'struct vdpa_iova_range ' include/linux/vdpa.h:72: info: Scanning doc for vDPA_config_ops include/linux/vdpa.h:203: warning: cannot understand function prototype: 'struct vdpa_config_ops ' include/linux/vdpa.h:270: info: Scanning doc for vdpa_driver include/linux/vdpa.h:275: warning: cannot understand function prototype: 'struct vdpa_driver ' include/linux/vdpa.h:347: info: Scanning doc for vdpa_mgmtdev_ops include/linux/vdpa.h:360: warning: cannot understand function prototype: 'struct vdpa_mgmtdev_ops ' After this fix: scripts/kernel-doc -v -none include/linux/vdpa.h include/linux/vdpa.h:11: info: Scanning doc for struct vdpa_calllback include/linux/vdpa.h:21: info: Scanning doc for struct vdpa_notification_area include/linux/vdpa.h:31: info: Scanning doc for struct vdpa_vq_state include/linux/vdpa.h:41: info: Scanning doc for struct vdpa_device include/linux/vdpa.h:62: info: Scanning doc for struct vdpa_iova_range include/linux/vdpa.h:72: info: Scanning doc for struct vdpa_config_ops include/linux/vdpa.h:270: info: Scanning doc for struct vdpa_driver include/linux/vdpa.h:347: info: Scanning doc for struct vdpa_mgmtdev_ops [1] https://www.kernel.org/doc/html/latest/doc-guide/kernel-doc.html Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Link: https://lore.kernel.org/r/20210406170457.98481-2-parav@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Jason Wang --- include/linux/vdpa.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 15fa085fab05..37b65ca940cf 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -8,7 +8,7 @@ #include /** - * vDPA callback definition. + * struct vdpa_calllback - vDPA callback definition. * @callback: interrupt callback function * @private: the data passed to the callback function */ @@ -18,7 +18,7 @@ struct vdpa_callback { }; /** - * vDPA notification area + * struct vdpa_notification_area - vDPA notification area * @addr: base address of the notification area * @size: size of the notification area */ @@ -28,7 +28,7 @@ struct vdpa_notification_area { }; /** - * vDPA vq_state definition + * struct vdpa_vq_state - vDPA vq_state definition * @avail_index: available index */ struct vdpa_vq_state { @@ -38,7 +38,7 @@ struct vdpa_vq_state { struct vdpa_mgmt_dev; /** - * vDPA device - representation of a vDPA device + * struct vdpa_device - representation of a vDPA device * @dev: underlying device * @dma_dev: the actual device that is performing DMA * @config: the configuration ops for this device. @@ -59,7 +59,7 @@ struct vdpa_device { }; /** - * vDPA IOVA range - the IOVA range support by the device + * struct vdpa_iova_range - the IOVA range support by the device * @first: start of the IOVA range * @last: end of the IOVA range */ @@ -69,7 +69,7 @@ struct vdpa_iova_range { }; /** - * vDPA_config_ops - operations for configuring a vDPA device. + * struct vdpa_config_ops - operations for configuring a vDPA device. * Note: vDPA device drivers are required to implement all of the * operations unless it is mentioned to be optional in the following * list. @@ -267,7 +267,7 @@ int _vdpa_register_device(struct vdpa_device *vdev, int nvqs); void _vdpa_unregister_device(struct vdpa_device *vdev); /** - * vdpa_driver - operations for a vDPA driver + * struct vdpa_driver - operations for a vDPA driver * @driver: underlying device driver * @probe: the function to call when a device is found. Returns 0 or -errno. * @remove: the function to call when a device is removed. @@ -344,18 +344,18 @@ static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset, } /** - * vdpa_mgmtdev_ops - vdpa device ops - * @dev_add: Add a vdpa device using alloc and register - * @mdev: parent device to use for device addition - * @name: name of the new vdpa device - * Driver need to add a new device using _vdpa_register_device() - * after fully initializing the vdpa device. Driver must return 0 - * on success or appropriate error code. - * @dev_del: Remove a vdpa device using unregister - * @mdev: parent device to use for device removal - * @dev: vdpa device to remove - * Driver need to remove the specified device by calling - * _vdpa_unregister_device(). + * struct vdpa_mgmtdev_ops - vdpa device ops + * @dev_add: Add a vdpa device using alloc and register + * @mdev: parent device to use for device addition + * @name: name of the new vdpa device + * Driver need to add a new device using _vdpa_register_device() + * after fully initializing the vdpa device. Driver must return 0 + * on success or appropriate error code. + * @dev_del: Remove a vdpa device using unregister + * @mdev: parent device to use for device removal + * @dev: vdpa device to remove + * Driver need to remove the specified device by calling + * _vdpa_unregister_device(). */ struct vdpa_mgmtdev_ops { int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name); -- cgit v1.2.3 From 9e3bb9b79a7131a088cfffbdcc30e747dad9d090 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 15 Apr 2021 03:31:41 -0400 Subject: virtio_pci_modern: introduce helper to map vq notify area This patch factors out the logic of vq notify area mapping. Following patches will switch to use this common helpers for both virtio_pci library and virtio-pci vDPA driver. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20210415073147.19331-2-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Eli Cohen --- include/linux/virtio_pci_modern.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index f26acbeec965..1b95d39b00fc 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -106,6 +106,8 @@ void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, in u32 align, u32 start, u32 size, size_t *len); +void *vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev, + u16 index); int vp_modern_probe(struct virtio_pci_modern_device *mdev); void vp_modern_remove(struct virtio_pci_modern_device *mdev); #endif -- cgit v1.2.3 From a5f7a24f49d81fab9f59611814a8817cc8a876a2 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 15 Apr 2021 03:31:44 -0400 Subject: virtio_pci_modern: hide vp_modern_get_queue_notify_off() All users (both virtio-pci library and vp_vdpa driver) has been switched to use vp_modern_map_vq_notify(). So there's no need to export the low level helper of vp_modern_get_queue_notify_off(). Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20210415073147.19331-5-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Eli Cohen --- include/linux/virtio_pci_modern.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index 1b95d39b00fc..179a2fb4bf37 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -99,8 +99,6 @@ void vp_modern_set_queue_size(struct virtio_pci_modern_device *mdev, u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev, u16 idx); u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev); -u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev, - u16 idx); void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off, size_t minlen, u32 align, -- cgit v1.2.3 From fd466b36940b22a506265edf12714bd0cf9ed836 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 15 Apr 2021 03:31:45 -0400 Subject: virito_pci libray: hide vp_modern_map_capability() No user now and the capability should not be setup externally. Instead, every access to the capability should be done via virtio_pci_modern_device. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20210415073147.19331-6-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Eli Cohen --- include/linux/virtio_pci_modern.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index 179a2fb4bf37..e6e7072413c1 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -99,11 +99,6 @@ void vp_modern_set_queue_size(struct virtio_pci_modern_device *mdev, u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev, u16 idx); u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev); -void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off, - size_t minlen, - u32 align, - u32 start, u32 size, - size_t *len); void *vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev, u16 index); int vp_modern_probe(struct virtio_pci_modern_device *mdev); -- cgit v1.2.3 From 9e311bcad73dc14bd0a736db6ad3d382227e11fe Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 15 Apr 2021 03:31:46 -0400 Subject: virtio-pci library: report resource address Sometimes it might be useful to report the capability physical address. One example is to report the physical address of the doorbell in order to be mapped by userspace. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20210415073147.19331-7-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_pci_modern.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index e6e7072413c1..cdfabbefacdf 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -13,6 +13,8 @@ struct virtio_pci_modern_device { void __iomem *device; /* Base of vq notifications (non-legacy mode). */ void __iomem *notify_base; + /* Physical base of vq notifications */ + resource_size_t notify_pa; /* Where to read and clear interrupt */ u8 __iomem *isr; @@ -100,7 +102,7 @@ u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev, u16 idx); u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev); void *vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev, - u16 index); + u16 index, resource_size_t *pa); int vp_modern_probe(struct virtio_pci_modern_device *mdev); void vp_modern_remove(struct virtio_pci_modern_device *mdev); #endif -- cgit v1.2.3 From f53d9910d009bc015b42d88114e2d86a93b0e6b7 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Mon, 15 Mar 2021 17:34:38 +0100 Subject: vringh: add 'iotlb_lock' to synchronize iotlb accesses Usually iotlb accesses are synchronized with a spinlock. Let's request it as a new parameter in vringh_set_iotlb() and hold it when we navigate the iotlb in iotlb_translate() to avoid race conditions with any new additions/deletions of ranges from the ioltb. Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20210315163450.254396-3-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/vringh.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 59bd50f99291..9c077863c8f6 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -46,6 +46,9 @@ struct vringh { /* IOTLB for this vring */ struct vhost_iotlb *iotlb; + /* spinlock to synchronize IOTLB accesses */ + spinlock_t *iotlb_lock; + /* The function to call to notify the guest about added buffers */ void (*notify)(struct vringh *); }; @@ -258,7 +261,8 @@ static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val) #if IS_REACHABLE(CONFIG_VHOST_IOTLB) -void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb); +void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb, + spinlock_t *iotlb_lock); int vringh_init_iotlb(struct vringh *vrh, u64 features, unsigned int num, bool weak_barriers, -- cgit v1.2.3 From b8c06ad4d67db56ed6bdfb685c134da74e92a2c7 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Mon, 15 Mar 2021 17:34:41 +0100 Subject: vringh: implement vringh_kiov_advance() In some cases, it may be useful to provide a way to skip a number of bytes in a vringh_kiov. Let's implement vringh_kiov_advance() for this purpose, reusing the code from vringh_iov_xfer(). We replace that code calling the new vringh_kiov_advance(). Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20210315163450.254396-6-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/vringh.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 9c077863c8f6..755211ebd195 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -199,6 +199,8 @@ static inline void vringh_kiov_cleanup(struct vringh_kiov *kiov) kiov->iov = NULL; } +void vringh_kiov_advance(struct vringh_kiov *kiov, size_t len); + int vringh_getdesc_kern(struct vringh *vrh, struct vringh_kiov *riov, struct vringh_kiov *wiov, -- cgit v1.2.3 From 14c9ac05ce09c8c6a89ffcca6ffb68707cba36c2 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Mon, 15 Mar 2021 17:34:42 +0100 Subject: vringh: add vringh_kiov_length() helper This new helper returns the total number of bytes covered by a vringh_kiov. Suggested-by: Jason Wang Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20210315163450.254396-7-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/vringh.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 755211ebd195..84db7b8f912f 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -199,6 +199,17 @@ static inline void vringh_kiov_cleanup(struct vringh_kiov *kiov) kiov->iov = NULL; } +static inline size_t vringh_kiov_length(struct vringh_kiov *kiov) +{ + size_t len = 0; + int i; + + for (i = kiov->i; i < kiov->used; i++) + len += kiov->iov[i].iov_len; + + return len; +} + void vringh_kiov_advance(struct vringh_kiov *kiov, size_t len); int vringh_getdesc_kern(struct vringh *vrh, -- cgit v1.2.3 From 442706f9f94d28fe3c9f188ae4ebbd6b40addffe Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Mon, 15 Mar 2021 17:34:44 +0100 Subject: vdpa: add get_config_size callback in vdpa_config_ops This new callback is used to get the size of the configuration space of vDPA devices. Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20210315163450.254396-9-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/linux/vdpa.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 37b65ca940cf..f311d227aa1b 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -150,6 +150,9 @@ struct vdpa_iova_range { * @set_status: Set the device status * @vdev: vdpa device * @status: virtio device status + * @get_config_size: Get the size of the configuration space + * @vdev: vdpa device + * Returns size_t: configuration size * @get_config: Read from device specific configuration space * @vdev: vdpa device * @offset: offset from the beginning of @@ -231,6 +234,7 @@ struct vdpa_config_ops { u32 (*get_vendor_id)(struct vdpa_device *vdev); u8 (*get_status)(struct vdpa_device *vdev); void (*set_status)(struct vdpa_device *vdev, u8 status); + size_t (*get_config_size)(struct vdpa_device *vdev); void (*get_config)(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len); void (*set_config)(struct vdpa_device *vdev, unsigned int offset, -- cgit v1.2.3 From 801c6058d14a82179a7ee17a4b532cac6fad067f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 29 Apr 2021 15:19:37 +0000 Subject: bpf: Fix leakage of uninitialized bpf stack under speculation The current implemented mechanisms to mitigate data disclosure under speculation mainly address stack and map value oob access from the speculative domain. However, Piotr discovered that uninitialized BPF stack is not protected yet, and thus old data from the kernel stack, potentially including addresses of kernel structures, could still be extracted from that 512 bytes large window. The BPF stack is special compared to map values since it's not zero initialized for every program invocation, whereas map values /are/ zero initialized upon their initial allocation and thus cannot leak any prior data in either domain. In the non-speculative domain, the verifier ensures that every stack slot read must have a prior stack slot write by the BPF program to avoid such data leaking issue. However, this is not enough: for example, when the pointer arithmetic operation moves the stack pointer from the last valid stack offset to the first valid offset, the sanitation logic allows for any intermediate offsets during speculative execution, which could then be used to extract any restricted stack content via side-channel. Given for unprivileged stack pointer arithmetic the use of unknown but bounded scalars is generally forbidden, we can simply turn the register-based arithmetic operation into an immediate-based arithmetic operation without the need for masking. This also gives the benefit of reducing the needed instructions for the operation. Given after the work in 7fedb63a8307 ("bpf: Tighten speculative pointer arithmetic mask"), the aux->alu_limit already holds the final immediate value for the offset register with the known scalar. Thus, a simple mov of the immediate to AX register with using AX as the source for the original instruction is sufficient and possible now in this case. Reported-by: Piotr Krysiuk Signed-off-by: Daniel Borkmann Tested-by: Piotr Krysiuk Reviewed-by: Piotr Krysiuk Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 6023a1367853..06841517ab1e 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -302,10 +302,11 @@ struct bpf_verifier_state_list { }; /* Possible states for alu_state member. */ -#define BPF_ALU_SANITIZE_SRC 1U -#define BPF_ALU_SANITIZE_DST 2U +#define BPF_ALU_SANITIZE_SRC (1U << 0) +#define BPF_ALU_SANITIZE_DST (1U << 1) #define BPF_ALU_NEG_VALUE (1U << 2) #define BPF_ALU_NON_POINTER (1U << 3) +#define BPF_ALU_IMMEDIATE (1U << 4) #define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ BPF_ALU_SANITIZE_DST) -- cgit v1.2.3 From cd2c7545ae1beac3b6aae033c7f31193b3255946 Mon Sep 17 00:00:00 2001 From: Changheun Lee Date: Mon, 3 May 2021 18:52:03 +0900 Subject: bio: limit bio max size bio size can grow up to 4GB when muli-page bvec is enabled. but sometimes it would lead to inefficient behaviors. in case of large chunk direct I/O, - 32MB chunk read in user space - all pages for 32MB would be merged to a bio structure if the pages physical addresses are contiguous. it makes some delay to submit until merge complete. bio max size should be limited to a proper size. When 32MB chunk read with direct I/O option is coming from userspace, kernel behavior is below now in do_direct_IO() loop. it's timeline. | bio merge for 32MB. total 8,192 pages are merged. | total elapsed time is over 2ms. |------------------ ... ----------------------->| | 8,192 pages merged a bio. | at this time, first bio submit is done. | 1 bio is split to 32 read request and issue. |---------------> |---------------> |---------------> ...... |---------------> |--------------->| total 19ms elapsed to complete 32MB read done from device. | If bio max size is limited with 1MB, behavior is changed below. | bio merge for 1MB. 256 pages are merged for each bio. | total 32 bio will be made. | total elapsed time is over 2ms. it's same. | but, first bio submit timing is fast. about 100us. |--->|--->|--->|---> ... -->|--->|--->|--->|--->| | 256 pages merged a bio. | at this time, first bio submit is done. | and 1 read request is issued for 1 bio. |---------------> |---------------> |---------------> ...... |---------------> |--------------->| total 17ms elapsed to complete 32MB read done from device. | As a result, read request issue timing is faster if bio max size is limited. Current kernel behavior with multipage bvec, super large bio can be created. And it lead to delay first I/O request issue. Signed-off-by: Changheun Lee Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20210503095203.29076-1-nanich.lee@samsung.com Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 +++- include/linux/blkdev.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index a0b4cfdf62a4..f1a99f0a240c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -106,6 +106,8 @@ static inline void *bio_data(struct bio *bio) return NULL; } +extern unsigned int bio_max_size(struct bio *bio); + /** * bio_full - check if the bio is full * @bio: bio to check @@ -119,7 +121,7 @@ static inline bool bio_full(struct bio *bio, unsigned len) if (bio->bi_vcnt >= bio->bi_max_vecs) return true; - if (bio->bi_iter.bi_size > UINT_MAX - len) + if (bio->bi_iter.bi_size > bio_max_size(bio) - len) return true; return false; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b91ba6207365..40c7c4d87aa1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -327,6 +327,8 @@ enum blk_bounce { }; struct queue_limits { + unsigned int bio_max_bytes; + enum blk_bounce bounce; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; -- cgit v1.2.3 From 48582b2e3b87b794a9845d488af2c76ce055502b Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Fri, 30 Apr 2021 11:21:54 -0400 Subject: reset: add missing empty function reset_control_rearm() All other functions are defined for when CONFIG_RESET_CONTROLLER is not set. Fixes: 557acb3d2cd9 ("reset: make shared pulsed reset controls re-triggerable") Link: https://lore.kernel.org/r/20210430152156.21162-2-jim2101024@gmail.com Signed-off-by: Jim Quinlan Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v5.11+ --- include/linux/reset.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reset.h b/include/linux/reset.h index b9109efa2a5c..9700124affa3 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -47,6 +47,11 @@ static inline int reset_control_reset(struct reset_control *rstc) return 0; } +static inline int reset_control_rearm(struct reset_control *rstc) +{ + return 0; +} + static inline int reset_control_assert(struct reset_control *rstc) { return 0; -- cgit v1.2.3 From 43016d02cf6e46edfc4696452251d34bba0c0435 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 3 May 2021 13:51:15 +0200 Subject: netfilter: arptables: use pernet ops struct during unregister Like with iptables and ebtables, hook unregistration has to use the pernet ops struct, not the template. This triggered following splat: hook not found, pf 3 num 0 WARNING: CPU: 0 PID: 224 at net/netfilter/core.c:480 __nf_unregister_net_hook+0x1eb/0x610 net/netfilter/core.c:480 [..] nf_unregister_net_hook net/netfilter/core.c:502 [inline] nf_unregister_net_hooks+0x117/0x160 net/netfilter/core.c:576 arpt_unregister_table_pre_exit+0x67/0x80 net/ipv4/netfilter/arp_tables.c:1565 Fixes: f9006acc8dfe5 ("netfilter: arp_tables: pass table pointer via nf_hook_ops") Reported-by: syzbot+dcccba8a1e41a38cb9df@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_arp/arp_tables.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 2aab9612f6ab..4f9a4b3c5892 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -53,8 +53,7 @@ int arpt_register_table(struct net *net, const struct xt_table *table, const struct arpt_replace *repl, const struct nf_hook_ops *ops); void arpt_unregister_table(struct net *net, const char *name); -void arpt_unregister_table_pre_exit(struct net *net, const char *name, - const struct nf_hook_ops *ops); +void arpt_unregister_table_pre_exit(struct net *net, const char *name); extern unsigned int arpt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table); -- cgit v1.2.3 From d7bce85aa7b92b5de8f69b3bcedfe51d7b1aabe1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 4 May 2021 04:17:20 -0400 Subject: virtio_pci_modern: correct sparse tags for notify When switching virtio_pci_modern to use a helper for mappings we lost an __iomem tag. Restore it. Reported-by: kernel test robot Fixes: 9e3bb9b79a71 ("virtio_pci_modern: introduce helper to map vq notify area") Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_pci_modern.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index cdfabbefacdf..6a95b58fd0f4 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -101,8 +101,8 @@ void vp_modern_set_queue_size(struct virtio_pci_modern_device *mdev, u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev, u16 idx); u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev); -void *vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev, - u16 index, resource_size_t *pa); +void __iomem * vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev, + u16 index, resource_size_t *pa); int vp_modern_probe(struct virtio_pci_modern_device *mdev); void vp_modern_remove(struct virtio_pci_modern_device *mdev); #endif -- cgit v1.2.3 From 6e552494fb90acae005d74ce6a2ee102d965184b Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 4 May 2021 08:54:29 -0700 Subject: iomap: remove unused private field from ioend The only remaining user of ->io_private is the generic ioend merging infrastructure. The only user of that is XFS, which no longer sets ->io_private or passes an associated merge callback. Remove the unused parameter and the ->io_private field. CC: linux-fsdevel@vger.kernel.org Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index d202fd2d0f91..c87d0cb0de6d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -198,7 +198,6 @@ struct iomap_ioend { struct inode *io_inode; /* file being written to */ size_t io_size; /* size of the extent */ loff_t io_offset; /* offset in the file */ - void *io_private; /* file system private data */ struct bio *io_bio; /* bio being built */ struct bio io_inline_bio; /* MUST BE LAST! */ }; @@ -234,9 +233,7 @@ struct iomap_writepage_ctx { void iomap_finish_ioends(struct iomap_ioend *ioend, int error); void iomap_ioend_try_merge(struct iomap_ioend *ioend, - struct list_head *more_ioends, - void (*merge_private)(struct iomap_ioend *ioend, - struct iomap_ioend *next)); + struct list_head *more_ioends); void iomap_sort_ioends(struct list_head *ioend_list); int iomap_writepage(struct page *page, struct writeback_control *wbc, struct iomap_writepage_ctx *wpc, -- cgit v1.2.3 From 98635b29a73f1a49ab6882ae58d56c9cd5ecb902 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 15 Mar 2021 10:13:54 +0100 Subject: lib: bitmap: remove the 'extern' keyword from function declarations The 'extern' keyword doesn't have any benefits for functions in header files. Remove it. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko --- include/linux/bitmap.h | 115 ++++++++++++++++++++++++------------------------- 1 file changed, 57 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 70a932470b2d..6939a8983026 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -118,54 +118,53 @@ * Allocation and deallocation of bitmap. * Provided in lib/bitmap.c to avoid circular dependency. */ -extern unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags); -extern unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags); -extern void bitmap_free(const unsigned long *bitmap); +unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags); +unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags); +void bitmap_free(const unsigned long *bitmap); /* * lib/bitmap.c provides these functions: */ -extern int __bitmap_equal(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int nbits); -extern bool __pure __bitmap_or_equal(const unsigned long *src1, - const unsigned long *src2, - const unsigned long *src3, - unsigned int nbits); -extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, - unsigned int nbits); -extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, - unsigned int shift, unsigned int nbits); -extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, - unsigned int shift, unsigned int nbits); -extern void bitmap_cut(unsigned long *dst, const unsigned long *src, - unsigned int first, unsigned int cut, - unsigned int nbits); -extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, +int __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +bool __pure __bitmap_or_equal(const unsigned long *src1, + const unsigned long *src2, + const unsigned long *src3, + unsigned int nbits); +void __bitmap_complement(unsigned long *dst, const unsigned long *src, + unsigned int nbits); +void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits); +void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits); +void bitmap_cut(unsigned long *dst, const unsigned long *src, + unsigned int first, unsigned int cut, unsigned int nbits); +int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +void __bitmap_replace(unsigned long *dst, + const unsigned long *old, const unsigned long *new, + const unsigned long *mask, unsigned int nbits); +int __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); -extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int nbits); -extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int nbits); -extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int nbits); -extern void __bitmap_replace(unsigned long *dst, - const unsigned long *old, const unsigned long *new, - const unsigned long *mask, unsigned int nbits); -extern int __bitmap_intersects(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int nbits); -extern int __bitmap_subset(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int nbits); -extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); -extern void __bitmap_set(unsigned long *map, unsigned int start, int len); -extern void __bitmap_clear(unsigned long *map, unsigned int start, int len); - -extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map, - unsigned long size, - unsigned long start, - unsigned int nr, - unsigned long align_mask, - unsigned long align_offset); +int __bitmap_subset(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); +void __bitmap_set(unsigned long *map, unsigned int start, int len); +void __bitmap_clear(unsigned long *map, unsigned int start, int len); + +unsigned long bitmap_find_next_zero_area_off(unsigned long *map, + unsigned long size, + unsigned long start, + unsigned int nr, + unsigned long align_mask, + unsigned long align_offset); /** * bitmap_find_next_zero_area - find a contiguous aligned zero area @@ -190,33 +189,33 @@ bitmap_find_next_zero_area(unsigned long *map, align_mask, 0); } -extern int bitmap_parse(const char *buf, unsigned int buflen, +int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *dst, int nbits); -extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, +int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); -extern int bitmap_parselist(const char *buf, unsigned long *maskp, +int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); -extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, +int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); -extern void bitmap_remap(unsigned long *dst, const unsigned long *src, +void bitmap_remap(unsigned long *dst, const unsigned long *src, const unsigned long *old, const unsigned long *new, unsigned int nbits); -extern int bitmap_bitremap(int oldbit, +int bitmap_bitremap(int oldbit, const unsigned long *old, const unsigned long *new, int bits); -extern void bitmap_onto(unsigned long *dst, const unsigned long *orig, +void bitmap_onto(unsigned long *dst, const unsigned long *orig, const unsigned long *relmap, unsigned int bits); -extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, +void bitmap_fold(unsigned long *dst, const unsigned long *orig, unsigned int sz, unsigned int nbits); -extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); -extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); -extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); +int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); +void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); +int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); #ifdef __BIG_ENDIAN -extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); +void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); #else #define bitmap_copy_le bitmap_copy #endif -extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits); -extern int bitmap_print_to_pagebuf(bool list, char *buf, +unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits); +int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) @@ -265,9 +264,9 @@ static inline void bitmap_copy_clear_tail(unsigned long *dst, * therefore conversion is not needed when copying data from/to arrays of u32. */ #if BITS_PER_LONG == 64 -extern void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, +void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, unsigned int nbits); -extern void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, +void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits); #else #define bitmap_from_arr32(bitmap, buf, nbits) \ -- cgit v1.2.3 From c13656b904b6173aad723d9680a81c60de2f5edc Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 15 Mar 2021 10:13:55 +0100 Subject: lib: bitmap: order includes alphabetically For better readability and maintenance: order the includes in bitmap source files alphabetically. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko --- include/linux/bitmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 6939a8983026..3282db97e06c 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -4,10 +4,10 @@ #ifndef __ASSEMBLY__ -#include #include -#include #include +#include +#include /* * bitmaps provide bit arrays that consume one or more unsigned -- cgit v1.2.3 From e829c2e4744850bab4d8f8ffebd00df10b4c6c2b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 15 Mar 2021 10:13:56 +0100 Subject: lib: bitmap: provide devm_bitmap_alloc() and devm_bitmap_zalloc() Provide managed variants of bitmap_alloc() and bitmap_zalloc(). Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko --- include/linux/bitmap.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 3282db97e06c..73d039476fa4 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -9,6 +9,8 @@ #include #include +struct device; + /* * bitmaps provide bit arrays that consume one or more unsigned * longs. The bitmap interface and available operations are listed @@ -122,6 +124,12 @@ unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags); unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags); void bitmap_free(const unsigned long *bitmap); +/* Managed variants of the above. */ +unsigned long *devm_bitmap_alloc(struct device *dev, + unsigned int nbits, gfp_t flags); +unsigned long *devm_bitmap_zalloc(struct device *dev, + unsigned int nbits, gfp_t flags); + /* * lib/bitmap.c provides these functions: */ -- cgit v1.2.3 From 7716506adac4664793a9d6d3dfa31ffddfa98714 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 4 May 2021 18:32:45 -0700 Subject: mm: introduce and use mapping_empty() Patch series "Remove nrexceptional tracking", v2. We actually use nrexceptional for very little these days. It's a minor pain to keep in sync with nrpages, but the pain becomes much bigger with the THP patches because we don't know how many indices a shadow entry occupies. It's easier to just remove it than keep it accurate. Also, we save 8 bytes per inode which is nothing to sneeze at; on my laptop, it would improve shmem_inode_cache from 22 to 23 objects per 16kB, and inode_cache from 26 to 27 objects. Combined, that saves a megabyte of memory from a combined usage of 25MB for both caches. Unfortunately, ext4 doesn't cross a magic boundary, so it doesn't save any memory for ext4. This patch (of 4): Instead of checking the two counters (nrpages and nrexceptional), we can just check whether i_pages is empty. Link: https://lkml.kernel.org/r/20201026151849.24232-1-willy@infradead.org Link: https://lkml.kernel.org/r/20201026151849.24232-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Vishal Verma Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 469fa7ffcf96..a4bd41128bf3 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -18,6 +18,11 @@ struct pagevec; +static inline bool mapping_empty(struct address_space *mapping) +{ + return xa_empty(&mapping->i_pages); +} + /* * Bits in mapping->flags. */ -- cgit v1.2.3 From 8bc3c481b3d0dcef2cf8e1b7c6b780af6725f7e3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 4 May 2021 18:32:54 -0700 Subject: mm: remove nrexceptional from inode We no longer track anything in nrexceptional, so remove it, saving 8 bytes per inode. Link: https://lkml.kernel.org/r/20201026151849.24232-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Vishal Verma Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 12766edee81f..acef282b97c6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -442,7 +442,6 @@ int pagecache_write_end(struct file *, struct address_space *mapping, * @i_mmap: Tree of private and shared mappings. * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable. * @nrpages: Number of page entries, protected by the i_pages lock. - * @nrexceptional: Shadow or DAX entries, protected by the i_pages lock. * @writeback_index: Writeback starts here. * @a_ops: Methods. * @flags: Error bits and flags (AS_*). @@ -463,7 +462,6 @@ struct address_space { struct rb_root_cached i_mmap; struct rw_semaphore i_mmap_rwsem; unsigned long nrpages; - unsigned long nrexceptional; pgoff_t writeback_index; const struct address_space_operations *a_ops; unsigned long flags; -- cgit v1.2.3 From aec44e0f0213e36d4f0868a80cdc5097a510f79d Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 4 May 2021 18:33:00 -0700 Subject: hugetlb: pass vma into huge_pte_alloc() and huge_pmd_share() Patch series "hugetlb: Disable huge pmd unshare for uffd-wp", v4. This series tries to disable huge pmd unshare of hugetlbfs backed memory for uffd-wp. Although uffd-wp of hugetlbfs is still during rfc stage, the idea of this series may be needed for multiple tasks (Axel's uffd minor fault series, and Mike's soft dirty series), so I picked it out from the larger series. This patch (of 4): It is a preparation work to be able to behave differently in the per architecture huge_pte_alloc() according to different VMA attributes. Pass it deeper into huge_pmd_share() so that we can avoid the find_vma() call. [peterx@redhat.com: build fix] Link: https://lkml.kernel.org/r/20210304164653.GB397383@xz-x1Link: https://lkml.kernel.org/r/20210218230633.15028-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20210218230633.15028-2-peterx@redhat.com Signed-off-by: Peter Xu Suggested-by: Mike Kravetz Cc: Adam Ruprecht Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Axel Rasmussen Cc: Cannon Matthews Cc: Catalin Marinas Cc: Chinwen Chang Cc: David Rientjes Cc: "Dr . David Alan Gilbert" Cc: Huang Ying Cc: Ingo Molnar Cc: Jann Horn Cc: Jerome Glisse Cc: Kirill A. Shutemov Cc: Lokesh Gidra Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: "Michal Koutn" Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Mina Almasry Cc: Nicholas Piggin Cc: Oliver Upton Cc: Shaohua Li Cc: Shawn Anastasio Cc: Steven Price Cc: Steven Rostedt Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index cccd1aab69dd..653ef322fac9 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -152,7 +152,8 @@ void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); -pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); +pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, pud_t *pud); struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); @@ -161,7 +162,7 @@ extern struct list_head huge_boot_pages; /* arch callbacks */ -pte_t *huge_pte_alloc(struct mm_struct *mm, +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long sz); pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); -- cgit v1.2.3 From c1991e0705d143be773c984b006f2078aa9f2853 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 4 May 2021 18:33:04 -0700 Subject: hugetlb/userfaultfd: forbid huge pmd sharing when uffd enabled Huge pmd sharing could bring problem to userfaultfd. The thing is that userfaultfd is running its logic based on the special bits on page table entries, however the huge pmd sharing could potentially share page table entries for different address ranges. That could cause issues on either: - When sharing huge pmd page tables for an uffd write protected range, the newly mapped huge pmd range will also be write protected unexpectedly, or, - When we try to write protect a range of huge pmd shared range, we'll first do huge_pmd_unshare() in hugetlb_change_protection(), however that also means the UFFDIO_WRITEPROTECT could be silently skipped for the shared region, which could lead to data loss. While at it, a few other things are done altogether: - Move want_pmd_share() from mm/hugetlb.c into linux/hugetlb.h, because that's definitely something that arch code would like to use too - ARM64 currently directly check against CONFIG_ARCH_WANT_HUGE_PMD_SHARE when trying to share huge pmd. Switch to the want_pmd_share() helper. - Move vma_shareable() from huge_pmd_share() into want_pmd_share(). [peterx@redhat.com: fix build with !ARCH_WANT_HUGE_PMD_SHARE] Link: https://lkml.kernel.org/r/20210310185359.88297-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20210218231202.15426-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Reviewed-by: Axel Rasmussen Tested-by: Naresh Kamboju Cc: Adam Ruprecht Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Cannon Matthews Cc: Catalin Marinas Cc: Chinwen Chang Cc: David Rientjes Cc: "Dr . David Alan Gilbert" Cc: Huang Ying Cc: Ingo Molnar Cc: Jann Horn Cc: Jerome Glisse Cc: Kirill A. Shutemov Cc: Lokesh Gidra Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: "Michal Koutn" Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Mina Almasry Cc: Nicholas Piggin Cc: Oliver Upton Cc: Shaohua Li Cc: Shawn Anastasio Cc: Steven Price Cc: Steven Rostedt Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 ++ include/linux/userfaultfd_k.h | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 653ef322fac9..88e93809a455 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -1040,4 +1040,6 @@ static inline __init void hugetlb_cma_check(void) } #endif +bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); + #endif /* _LINUX_HUGETLB_H */ diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index a8e5f3ea9bb2..c63ccdae3eab 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -52,6 +52,15 @@ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx; } +/* + * Never enable huge pmd sharing on uffd-wp registered vmas, because uffd-wp + * protect information is per pgtable entry. + */ +static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma) +{ + return vma->vm_flags & VM_UFFD_WP; +} + static inline bool userfaultfd_missing(struct vm_area_struct *vma) { return vma->vm_flags & VM_UFFD_MISSING; -- cgit v1.2.3 From 537cf30bba241ae88d5f4b0b6a5e66271b394852 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 4 May 2021 18:33:08 -0700 Subject: mm/hugetlb: move flush_hugetlb_tlb_range() into hugetlb.h Prepare for it to be called outside of mm/hugetlb.c. Link: https://lkml.kernel.org/r/20210218231204.15474-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Reviewed-by: Axel Rasmussen Cc: Adam Ruprecht Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Cannon Matthews Cc: Catalin Marinas Cc: Chinwen Chang Cc: David Rientjes Cc: "Dr . David Alan Gilbert" Cc: Huang Ying Cc: Ingo Molnar Cc: Jann Horn Cc: Jerome Glisse Cc: Kirill A. Shutemov Cc: Lokesh Gidra Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: "Michal Koutn" Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Mina Almasry Cc: Nicholas Piggin Cc: Oliver Upton Cc: Shaohua Li Cc: Shawn Anastasio Cc: Steven Price Cc: Steven Rostedt Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 88e93809a455..e43668144664 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -1042,4 +1042,12 @@ static inline __init void hugetlb_cma_check(void) bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); +#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE +/* + * ARCHes with special requirements for evicting HUGETLB backing TLB entries can + * implement this. + */ +#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) +#endif + #endif /* _LINUX_HUGETLB_H */ -- cgit v1.2.3 From 6dfeaff93be1a4cab4fb48dad7df326d05059a99 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 4 May 2021 18:33:13 -0700 Subject: hugetlb/userfaultfd: unshare all pmds for hugetlbfs when register wp Huge pmd sharing for hugetlbfs is racy with userfaultfd-wp because userfaultfd-wp is always based on pgtable entries, so they cannot be shared. Walk the hugetlb range and unshare all such mappings if there is, right before UFFDIO_REGISTER will succeed and return to userspace. This will pair with want_pmd_share() in hugetlb code so that huge pmd sharing is completely disabled for userfaultfd-wp registered range. Link: https://lkml.kernel.org/r/20210218231206.15524-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Cc: Peter Xu Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: Mike Rapoport Cc: Kirill A. Shutemov Cc: Matthew Wilcox (Oracle) Cc: Adam Ruprecht Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Anshuman Khandual Cc: Cannon Matthews Cc: Catalin Marinas Cc: Chinwen Chang Cc: David Rientjes Cc: "Dr . David Alan Gilbert" Cc: Huang Ying Cc: Ingo Molnar Cc: Jann Horn Cc: Jerome Glisse Cc: Lokesh Gidra Cc: Michael Ellerman Cc: "Michal Koutn" Cc: Michel Lespinasse Cc: Mina Almasry Cc: Nicholas Piggin Cc: Oliver Upton Cc: Shaohua Li Cc: Shawn Anastasio Cc: Steven Price Cc: Steven Rostedt Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e43668144664..0f5813522224 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -188,6 +188,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); bool is_hugetlb_entry_migration(pte_t pte); +void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ @@ -369,6 +370,8 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, return 0; } +static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } + #endif /* !CONFIG_HUGETLB_PAGE */ /* * hugepages at page global directory. If arch support -- cgit v1.2.3 From d4afd60c24f87b6275b12ec3d67d8c2ad78cb075 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 4 May 2021 18:34:05 -0700 Subject: mm/huge_memory.c: remove unused macro TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG Commit 4958e4d86ecb ("mm: thp: remove debug_cow switch") forgot to remove TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG macro. Remove it here. Link: https://lkml.kernel.org/r/20210318122722.13135-6-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Reviewed-by: Zi Yan Reviewed-by: Peter Xu Cc: Aneesh Kumar K.V Cc: Matthew Wilcox Cc: Michel Lespinasse Cc: Ralph Campbell Cc: Thomas Hellstrm (Intel) Cc: Vlastimil Babka Cc: Wei Yang Cc: William Kucharski Cc: Yang Shi Cc: yuleixzhang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ba973efcd369..9626fda5efce 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -87,9 +87,6 @@ enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, -#ifdef CONFIG_DEBUG_VM - TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG, -#endif }; struct kobject; -- cgit v1.2.3 From 2938396771c8fd0870b5284319f9e78b4b552a79 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 4 May 2021 18:34:52 -0700 Subject: hugetlb: add per-hstate mutex to synchronize user adjustments The helper routine hstate_next_node_to_alloc accesses and modifies the hstate variable next_nid_to_alloc. The helper is used by the routines alloc_pool_huge_page and adjust_pool_surplus. adjust_pool_surplus is called with hugetlb_lock held. However, alloc_pool_huge_page can not be called with the hugetlb lock held as it will call the page allocator. Two instances of alloc_pool_huge_page could be run in parallel or alloc_pool_huge_page could run in parallel with adjust_pool_surplus which may result in the variable next_nid_to_alloc becoming invalid for the caller and pages being allocated on the wrong node. Both alloc_pool_huge_page and adjust_pool_surplus are only called from the routine set_max_huge_pages after boot. set_max_huge_pages is only called as the reusult of a user writing to the proc/sysfs nr_hugepages, or nr_hugepages_mempolicy file to adjust the number of hugetlb pages. It makes little sense to allow multiple adjustment to the number of hugetlb pages in parallel. Add a mutex to the hstate and use it to only allow one hugetlb page adjustment at a time. This will synchronize modifications to the next_nid_to_alloc variable. Link: https://lkml.kernel.org/r/20210409205254.242291-4-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Acked-by: Michal Hocko Reviewed-by: Oscar Salvador Reviewed-by: Miaohe Lin Reviewed-by: Muchun Song Reviewed-by: David Hildenbrand Cc: "Aneesh Kumar K . V" Cc: Barry Song Cc: David Rientjes Cc: Hillf Danton Cc: HORIGUCHI NAOYA Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Mina Almasry Cc: Peter Xu Cc: Peter Zijlstra Cc: Roman Gushchin Cc: Shakeel Butt Cc: Waiman Long Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 0f5813522224..628639422c5d 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -559,6 +559,7 @@ HPAGEFLAG(Freed, freed) #define HSTATE_NAME_LEN 32 /* Defines one hugetlb page size */ struct hstate { + struct mutex resize_lock; int next_nid_to_alloc; int next_nid_to_free; unsigned int order; -- cgit v1.2.3 From 369fa227c21949b22fd7374506c4992a0d7bb580 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 4 May 2021 18:35:26 -0700 Subject: mm: make alloc_contig_range handle free hugetlb pages alloc_contig_range will fail if it ever sees a HugeTLB page within the range we are trying to allocate, even when that page is free and can be easily reallocated. This has proved to be problematic for some users of alloc_contic_range, e.g: CMA and virtio-mem, where those would fail the call even when those pages lay in ZONE_MOVABLE and are free. We can do better by trying to replace such page. Free hugepages are tricky to handle so as to no userspace application notices disruption, we need to replace the current free hugepage with a new one. In order to do that, a new function called alloc_and_dissolve_huge_page is introduced. This function will first try to get a new fresh hugepage, and if it succeeds, it will replace the old one in the free hugepage pool. The free page replacement is done under hugetlb_lock, so no external users of hugetlb will notice the change. To allocate the new huge page, we use alloc_buddy_huge_page(), so we do not have to deal with any counters, and prep_new_huge_page() is not called. This is valulable because in case we need to free the new page, we only need to call __free_pages(). Once we know that the page to be replaced is a genuine 0-refcounted huge page, we remove the old page from the freelist by remove_hugetlb_page(). Then, we can call __prep_new_huge_page() and __prep_account_new_huge_page() for the new huge page to properly initialize it and increment the hstate->nr_huge_pages counter (previously decremented by remove_hugetlb_page()). Once done, the page is enqueued by enqueue_huge_page() and it is ready to be used. There is one tricky case when page's refcount is 0 because it is in the process of being released. A missing PageHugeFreed bit will tell us that freeing is in flight so we retry after dropping the hugetlb_lock. The race window should be small and the next retry should make a forward progress. E.g: CPU0 CPU1 free_huge_page() isolate_or_dissolve_huge_page PageHuge() == T alloc_and_dissolve_huge_page alloc_buddy_huge_page() spin_lock_irq(hugetlb_lock) // PageHuge() && !PageHugeFreed && // !PageCount() spin_unlock_irq(hugetlb_lock) spin_lock_irq(hugetlb_lock) 1) update_and_free_page PageHuge() == F __free_pages() 2) enqueue_huge_page SetPageHugeFreed() spin_unlock_irq(&hugetlb_lock) spin_lock_irq(hugetlb_lock) 1) PageHuge() == F (freed by case#1 from CPU0) 2) PageHuge() == T PageHugeFreed() == T - proceed with replacing the page In the case above we retry as the window race is quite small and we have high chances to succeed next time. With regard to the allocation, we restrict it to the node the page belongs to with __GFP_THISNODE, meaning we do not fallback on other node's zones. Note that gigantic hugetlb pages are fenced off since there is a cyclic dependency between them and alloc_contig_range. Link: https://lkml.kernel.org/r/20210419075413.1064-6-osalvador@suse.de Signed-off-by: Oscar Salvador Acked-by: Michal Hocko Acked-by: David Hildenbrand Reviewed-by: Mike Kravetz Cc: Muchun Song Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 628639422c5d..ec6a10b8860a 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -588,6 +588,7 @@ struct huge_bootmem_page { struct hstate *hstate; }; +int isolate_or_dissolve_huge_page(struct page *page); struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, @@ -870,6 +871,11 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; +static inline int isolate_or_dissolve_huge_page(struct page *page) +{ + return -ENOMEM; +} + static inline struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve) -- cgit v1.2.3 From ae37c7ff79f1f030e28ec76c46ee032f8fd07607 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 4 May 2021 18:35:29 -0700 Subject: mm: make alloc_contig_range handle in-use hugetlb pages alloc_contig_range() will fail if it finds a HugeTLB page within the range, without a chance to handle them. Since HugeTLB pages can be migrated as any LRU or Movable page, it does not make sense to bail out without trying. Enable the interface to recognize in-use HugeTLB pages so we can migrate them, and have much better chances to succeed the call. Link: https://lkml.kernel.org/r/20210419075413.1064-7-osalvador@suse.de Signed-off-by: Oscar Salvador Reviewed-by: Mike Kravetz Acked-by: Michal Hocko Acked-by: David Hildenbrand Cc: Muchun Song Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ec6a10b8860a..d0f310ae3f82 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -588,7 +588,7 @@ struct huge_bootmem_page { struct hstate *hstate; }; -int isolate_or_dissolve_huge_page(struct page *page); +int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, @@ -871,7 +871,8 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; -static inline int isolate_or_dissolve_huge_page(struct page *page) +static inline int isolate_or_dissolve_huge_page(struct page *page, + struct list_head *list) { return -ENOMEM; } -- cgit v1.2.3 From 7677f7fd8be76659cd2d0db8ff4093bbb51c20e5 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Tue, 4 May 2021 18:35:36 -0700 Subject: userfaultfd: add minor fault registration mode Patch series "userfaultfd: add minor fault handling", v9. Overview ======== This series adds a new userfaultfd feature, UFFD_FEATURE_MINOR_HUGETLBFS. When enabled (via the UFFDIO_API ioctl), this feature means that any hugetlbfs VMAs registered with UFFDIO_REGISTER_MODE_MISSING will *also* get events for "minor" faults. By "minor" fault, I mean the following situation: Let there exist two mappings (i.e., VMAs) to the same page(s) (shared memory). One of the mappings is registered with userfaultfd (in minor mode), and the other is not. Via the non-UFFD mapping, the underlying pages have already been allocated & filled with some contents. The UFFD mapping has not yet been faulted in; when it is touched for the first time, this results in what I'm calling a "minor" fault. As a concrete example, when working with hugetlbfs, we have huge_pte_none(), but find_lock_page() finds an existing page. We also add a new ioctl to resolve such faults: UFFDIO_CONTINUE. The idea is, userspace resolves the fault by either a) doing nothing if the contents are already correct, or b) updating the underlying contents using the second, non-UFFD mapping (via memcpy/memset or similar, or something fancier like RDMA, or etc...). In either case, userspace issues UFFDIO_CONTINUE to tell the kernel "I have ensured the page contents are correct, carry on setting up the mapping". Use Case ======== Consider the use case of VM live migration (e.g. under QEMU/KVM): 1. While a VM is still running, we copy the contents of its memory to a target machine. The pages are populated on the target by writing to the non-UFFD mapping, using the setup described above. The VM is still running (and therefore its memory is likely changing), so this may be repeated several times, until we decide the target is "up to date enough". 2. We pause the VM on the source, and start executing on the target machine. During this gap, the VM's user(s) will *see* a pause, so it is desirable to minimize this window. 3. Between the last time any page was copied from the source to the target, and when the VM was paused, the contents of that page may have changed - and therefore the copy we have on the target machine is out of date. Although we can keep track of which pages are out of date, for VMs with large amounts of memory, it is "slow" to transfer this information to the target machine. We want to resume execution before such a transfer would complete. 4. So, the guest begins executing on the target machine. The first time it touches its memory (via the UFFD-registered mapping), userspace wants to intercept this fault. Userspace checks whether or not the page is up to date, and if not, copies the updated page from the source machine, via the non-UFFD mapping. Finally, whether a copy was performed or not, userspace issues a UFFDIO_CONTINUE ioctl to tell the kernel "I have ensured the page contents are correct, carry on setting up the mapping". We don't have to do all of the final updates on-demand. The userfaultfd manager can, in the background, also copy over updated pages once it receives the map of which pages are up-to-date or not. Interaction with Existing APIs ============================== Because this is a feature, a registered VMA could potentially receive both missing and minor faults. I spent some time thinking through how the existing API interacts with the new feature: UFFDIO_CONTINUE cannot be used to resolve non-minor faults, as it does not allocate a new page. If UFFDIO_CONTINUE is used on a non-minor fault: - For non-shared memory or shmem, -EINVAL is returned. - For hugetlb, -EFAULT is returned. UFFDIO_COPY and UFFDIO_ZEROPAGE cannot be used to resolve minor faults. Without modifications, the existing codepath assumes a new page needs to be allocated. This is okay, since userspace must have a second non-UFFD-registered mapping anyway, thus there isn't much reason to want to use these in any case (just memcpy or memset or similar). - If UFFDIO_COPY is used on a minor fault, -EEXIST is returned. - If UFFDIO_ZEROPAGE is used on a minor fault, -EEXIST is returned (or -EINVAL in the case of hugetlb, as UFFDIO_ZEROPAGE is unsupported in any case). - UFFDIO_WRITEPROTECT simply doesn't work with shared memory, and returns -ENOENT in that case (regardless of the kind of fault). Future Work =========== This series only supports hugetlbfs. I have a second series in flight to support shmem as well, extending the functionality. This series is more mature than the shmem support at this point, and the functionality works fully on hugetlbfs, so this series can be merged first and then shmem support will follow. This patch (of 6): This feature allows userspace to intercept "minor" faults. By "minor" faults, I mean the following situation: Let there exist two mappings (i.e., VMAs) to the same page(s). One of the mappings is registered with userfaultfd (in minor mode), and the other is not. Via the non-UFFD mapping, the underlying pages have already been allocated & filled with some contents. The UFFD mapping has not yet been faulted in; when it is touched for the first time, this results in what I'm calling a "minor" fault. As a concrete example, when working with hugetlbfs, we have huge_pte_none(), but find_lock_page() finds an existing page. This commit adds the new registration mode, and sets the relevant flag on the VMAs being registered. In the hugetlb fault path, if we find that we have huge_pte_none(), but find_lock_page() does indeed find an existing page, then we have a "minor" fault, and if the VMA has the userfaultfd registration flag, we call into userfaultfd to handle it. This is implemented as a new registration mode, instead of an API feature. This is because the alternative implementation has significant drawbacks [1]. However, doing it this was requires we allocate a VM_* flag for the new registration mode. On 32-bit systems, there are no unused bits, so this feature is only supported on architectures with CONFIG_ARCH_USES_HIGH_VMA_FLAGS. When attempting to register a VMA in MINOR mode on 32-bit architectures, we return -EINVAL. [1] https://lore.kernel.org/patchwork/patch/1380226/ [peterx@redhat.com: fix minor fault page leak] Link: https://lkml.kernel.org/r/20210322175132.36659-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20210301222728.176417-1-axelrasmussen@google.com Link: https://lkml.kernel.org/r/20210301222728.176417-2-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Reviewed-by: Peter Xu Reviewed-by: Mike Kravetz Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Chinwen Chang Cc: Huang Ying Cc: Ingo Molnar Cc: Jann Horn Cc: Jerome Glisse Cc: Lokesh Gidra Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: "Michal Koutn" Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Peter Xu Cc: Shaohua Li Cc: Shawn Anastasio Cc: Steven Rostedt Cc: Steven Price Cc: Vlastimil Babka Cc: Adam Ruprecht Cc: Axel Rasmussen Cc: Cannon Matthews Cc: "Dr . David Alan Gilbert" Cc: David Rientjes Cc: Mina Almasry Cc: Oliver Upton Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 7 +++++++ include/linux/userfaultfd_k.h | 15 ++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 011f43605807..1dbb53c44243 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -372,6 +372,13 @@ extern unsigned int kobjsize(const void *objp); # define VM_GROWSUP VM_NONE #endif +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR +# define VM_UFFD_MINOR_BIT 37 +# define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */ +#else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ +# define VM_UFFD_MINOR VM_NONE +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ + /* Bits set in the VMA until the stack is in its final location */ #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ) diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index c63ccdae3eab..0390e5ac63b3 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -17,6 +17,9 @@ #include #include +/* The set of all possible UFFD-related VM flags. */ +#define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR) + /* * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining * new flags, since they might collide with O_* ones. We want @@ -71,6 +74,11 @@ static inline bool userfaultfd_wp(struct vm_area_struct *vma) return vma->vm_flags & VM_UFFD_WP; } +static inline bool userfaultfd_minor(struct vm_area_struct *vma) +{ + return vma->vm_flags & VM_UFFD_MINOR; +} + static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, pte_t pte) { @@ -85,7 +93,7 @@ static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, static inline bool userfaultfd_armed(struct vm_area_struct *vma) { - return vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP); + return vma->vm_flags & __VM_UFFD_FLAGS; } extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); @@ -132,6 +140,11 @@ static inline bool userfaultfd_wp(struct vm_area_struct *vma) return false; } +static inline bool userfaultfd_minor(struct vm_area_struct *vma) +{ + return false; +} + static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, pte_t pte) { -- cgit v1.2.3 From 0d9cadabd193c6008d256533f544de8206fd3a80 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Tue, 4 May 2021 18:35:40 -0700 Subject: userfaultfd: disable huge PMD sharing for MINOR registered VMAs As the comment says: for the MINOR fault use case, although the page might be present and populated in the other (non-UFFD-registered) half of the mapping, it may be out of date, and we explicitly want userspace to get a minor fault so it can check and potentially update the page's contents. Huge PMD sharing would prevent these faults from occurring for suitably aligned areas, so disable it upon UFFD registration. Link: https://lkml.kernel.org/r/20210301222728.176417-3-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Reviewed-by: Peter Xu Reviewed-by: Mike Kravetz Cc: Adam Ruprecht Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Cannon Matthews Cc: Catalin Marinas Cc: Chinwen Chang Cc: David Rientjes Cc: "Dr . David Alan Gilbert" Cc: Huang Ying Cc: Ingo Molnar Cc: Jann Horn Cc: Jerome Glisse Cc: Kirill A. Shutemov Cc: Lokesh Gidra Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: "Michal Koutn" Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Mina Almasry Cc: Nicholas Piggin Cc: Oliver Upton Cc: Shaohua Li Cc: Shawn Anastasio Cc: Steven Price Cc: Steven Rostedt Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/userfaultfd_k.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 0390e5ac63b3..e060d5f77cc5 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -56,12 +56,19 @@ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, } /* - * Never enable huge pmd sharing on uffd-wp registered vmas, because uffd-wp - * protect information is per pgtable entry. + * Never enable huge pmd sharing on some uffd registered vmas: + * + * - VM_UFFD_WP VMAs, because write protect information is per pgtable entry. + * + * - VM_UFFD_MINOR VMAs, because otherwise we would never get minor faults for + * VMAs which share huge pmds. (If you have two mappings to the same + * underlying pages, and fault in the non-UFFD-registered one with a write, + * with huge pmd sharing this would *also* setup the second UFFD-registered + * mapping, and we'd not get minor faults.) */ static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma) { - return vma->vm_flags & VM_UFFD_WP; + return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); } static inline bool userfaultfd_missing(struct vm_area_struct *vma) -- cgit v1.2.3 From 714c189108244f1df579689061db1d785d92e7e2 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Tue, 4 May 2021 18:35:45 -0700 Subject: userfaultfd: hugetlbfs: only compile UFFD helpers if config enabled For background, mm/userfaultfd.c provides a general mcopy_atomic implementation. But some types of memory (i.e., hugetlb and shmem) need a slightly different implementation, so they provide their own helpers for this. In other words, userfaultfd is the only caller of these functions. This patch achieves two things: 1. Don't spend time compiling code which will end up never being referenced anyway (a small build time optimization). 2. In patches later in this series, we extend the signature of these helpers with UFFD-specific state (a mode enumeration). Once this happens, we *have to* either not compile the helpers, or unconditionally define the UFFD-only state (which seems messier to me). This includes the declarations in the headers, as otherwise they'd yield warnings about implicitly defining the type of those arguments. Link: https://lkml.kernel.org/r/20210301222728.176417-4-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Reviewed-by: Mike Kravetz Reviewed-by: Peter Xu Cc: Adam Ruprecht Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Cannon Matthews Cc: Catalin Marinas Cc: Chinwen Chang Cc: David Rientjes Cc: "Dr . David Alan Gilbert" Cc: Huang Ying Cc: Ingo Molnar Cc: Jann Horn Cc: Jerome Glisse Cc: Kirill A. Shutemov Cc: Lokesh Gidra Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: "Michal Koutn" Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Mina Almasry Cc: Nicholas Piggin Cc: Oliver Upton Cc: Shaohua Li Cc: Shawn Anastasio Cc: Steven Price Cc: Steven Rostedt Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d0f310ae3f82..a1dbe4568707 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -134,11 +134,13 @@ void hugetlb_show_meminfo(void); unsigned long hugetlb_total_pages(void); vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); +#ifdef CONFIG_USERFAULTFD int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, struct page **pagep); +#endif /* CONFIG_USERFAULTFD */ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, vm_flags_t vm_flags); @@ -310,6 +312,7 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, BUG(); } +#ifdef CONFIG_USERFAULTFD static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, struct vm_area_struct *dst_vma, @@ -320,6 +323,7 @@ static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, BUG(); return 0; } +#endif /* CONFIG_USERFAULTFD */ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) -- cgit v1.2.3 From f619147104c8ea71e120e4936d2b68ec11a1e527 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Tue, 4 May 2021 18:35:49 -0700 Subject: userfaultfd: add UFFDIO_CONTINUE ioctl This ioctl is how userspace ought to resolve "minor" userfaults. The idea is, userspace is notified that a minor fault has occurred. It might change the contents of the page using its second non-UFFD mapping, or not. Then, it calls UFFDIO_CONTINUE to tell the kernel "I have ensured the page contents are correct, carry on setting up the mapping". Note that it doesn't make much sense to use UFFDIO_{COPY,ZEROPAGE} for MINOR registered VMAs. ZEROPAGE maps the VMA to the zero page; but in the minor fault case, we already have some pre-existing underlying page. Likewise, UFFDIO_COPY isn't useful if we have a second non-UFFD mapping. We'd just use memcpy() or similar instead. It turns out hugetlb_mcopy_atomic_pte() already does very close to what we want, if an existing page is provided via `struct page **pagep`. We already special-case the behavior a bit for the UFFDIO_ZEROPAGE case, so just extend that design: add an enum for the three modes of operation, and make the small adjustments needed for the MCOPY_ATOMIC_CONTINUE case. (Basically, look up the existing page, and avoid adding the existing page to the page cache or calling set_page_huge_active() on it.) Link: https://lkml.kernel.org/r/20210301222728.176417-5-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Reviewed-by: Peter Xu Cc: Adam Ruprecht Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Cannon Matthews Cc: Catalin Marinas Cc: Chinwen Chang Cc: David Rientjes Cc: "Dr . David Alan Gilbert" Cc: Huang Ying Cc: Ingo Molnar Cc: Jann Horn Cc: Jerome Glisse Cc: Kirill A. Shutemov Cc: Lokesh Gidra Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: "Michal Koutn" Cc: Michel Lespinasse Cc: Mike Kravetz Cc: Mike Rapoport Cc: Mina Almasry Cc: Nicholas Piggin Cc: Oliver Upton Cc: Shaohua Li Cc: Shawn Anastasio Cc: Steven Price Cc: Steven Rostedt Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 3 +++ include/linux/userfaultfd_k.h | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index a1dbe4568707..b92f25ccef58 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -11,6 +11,7 @@ #include #include #include +#include struct ctl_table; struct user_struct; @@ -139,6 +140,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, + enum mcopy_atomic_mode mode, struct page **pagep); #endif /* CONFIG_USERFAULTFD */ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, @@ -318,6 +320,7 @@ static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, + enum mcopy_atomic_mode mode, struct page **pagep) { BUG(); diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index e060d5f77cc5..794d1538b8ba 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -37,6 +37,22 @@ extern int sysctl_unprivileged_userfaultfd; extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); +/* + * The mode of operation for __mcopy_atomic and its helpers. + * + * This is almost an implementation detail (mcopy_atomic below doesn't take this + * as a parameter), but it's exposed here because memory-kind-specific + * implementations (e.g. hugetlbfs) need to know the mode of operation. + */ +enum mcopy_atomic_mode { + /* A normal copy_from_user into the destination range. */ + MCOPY_ATOMIC_NORMAL, + /* Don't copy; map the destination range to the zero page. */ + MCOPY_ATOMIC_ZEROPAGE, + /* Just install pte(s) with the existing page(s) in the page cache. */ + MCOPY_ATOMIC_CONTINUE, +}; + extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, bool *mmap_changing, __u64 mode); @@ -44,6 +60,8 @@ extern ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long len, bool *mmap_changing); +extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start, + unsigned long len, bool *mmap_changing); extern int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, unsigned long len, bool enable_wp, bool *mmap_changing); -- cgit v1.2.3 From 202e35db5e719ee8af6028183403f475e243f82d Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 4 May 2021 18:36:04 -0700 Subject: mm/vmscan: replace implicit RECLAIM_ZONE checks with explicit checks RECLAIM_ZONE was assumed to be unused because it was never explicitly used in the kernel. However, there were a number of places where it was checked implicitly by checking 'node_reclaim_mode' for a zero value. These zero checks are not great because it is not obvious what a zero mode *means* in the code. Replace them with a helper which makes it more obvious: node_reclaim_enabled(). This helper also provides a handy place to explicitly check the RECLAIM_ZONE bit itself. Check it explicitly there to make it more obvious where the bit can affect behavior. This should have no functional impact. Link: https://lkml.kernel.org/r/20210219172559.BF589C44@viggo.jf.intel.com Signed-off-by: Dave Hansen Reviewed-by: Ben Widawsky Reviewed-by: Oscar Salvador Acked-by: Christoph Lameter Acked-by: David Rientjes Cc: Alex Shi Cc: "Tobin C. Harding" Cc: Huang Ying Cc: Dan Williams Cc: Qian Cai Cc: Daniel Wagner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 4cc6ec3bf0ab..42191da1bdc9 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -12,6 +12,7 @@ #include #include #include +#include #include struct notifier_block; @@ -378,6 +379,12 @@ extern int sysctl_min_slab_ratio; #define node_reclaim_mode 0 #endif +static inline bool node_reclaim_enabled(void) +{ + /* Is any node_reclaim_mode bit set? */ + return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP); +} + extern void check_move_unevictable_pages(struct pagevec *pvec); extern int kswapd_run(int nid); -- cgit v1.2.3 From 2bfd36374edd9ed7f2ebf66cacebedf7273901cb Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 4 May 2021 18:36:11 -0700 Subject: mm: vmscan: consolidate shrinker_maps handling code The shrinker map management is not purely memcg specific, it is at the intersection between memory cgroup and shrinkers. It's allocation and assignment of a structure, and the only memcg bit is the map is being stored in a memcg structure. So move the shrinker_maps handling code into vmscan.c for tighter integration with shrinker code, and remove the "memcg_" prefix. There is no functional change. Link: https://lkml.kernel.org/r/20210311190845.9708-3-shy828301@gmail.com Signed-off-by: Yang Shi Acked-by: Vlastimil Babka Acked-by: Kirill Tkhai Acked-by: Roman Gushchin Reviewed-by: Shakeel Butt Cc: Dave Chinner Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5904716f29ba..7dbd2c9bad32 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1610,10 +1610,9 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) return false; } -extern int memcg_expand_shrinker_maps(int new_id); - -extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg, - int nid, int shrinker_id); +int alloc_shrinker_maps(struct mem_cgroup *memcg); +void free_shrinker_maps(struct mem_cgroup *memcg); +void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); #else #define mem_cgroup_sockets_enabled 0 static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; @@ -1623,8 +1622,8 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) return false; } -static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg, - int nid, int shrinker_id) +static inline void set_shrinker_bit(struct mem_cgroup *memcg, + int nid, int shrinker_id) { } #endif -- cgit v1.2.3 From e4262c4f51d6373447c9d89093f49ff6b1e607be Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 4 May 2021 18:36:23 -0700 Subject: mm: memcontrol: rename shrinker_map to shrinker_info The following patch is going to add nr_deferred into shrinker_map, the change will make shrinker_map not only include map anymore, so rename it to "memcg_shrinker_info". And this should make the patch adding nr_deferred cleaner and readable and make review easier. Also remove the "memcg_" prefix. Link: https://lkml.kernel.org/r/20210311190845.9708-7-shy828301@gmail.com Signed-off-by: Yang Shi Acked-by: Vlastimil Babka Acked-by: Kirill Tkhai Acked-by: Roman Gushchin Reviewed-by: Shakeel Butt Cc: Dave Chinner Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7dbd2c9bad32..6cd800fe9a67 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -117,7 +117,7 @@ struct batched_lruvec_stat { * Bitmap of shrinker::id corresponding to memcg-aware shrinkers, * which have elements charged to this memcg. */ -struct memcg_shrinker_map { +struct shrinker_info { struct rcu_head rcu; unsigned long map[]; }; @@ -145,7 +145,7 @@ struct mem_cgroup_per_node { struct mem_cgroup_reclaim_iter iter; - struct memcg_shrinker_map __rcu *shrinker_map; + struct shrinker_info __rcu *shrinker_info; struct rb_node tree_node; /* RB tree node */ unsigned long usage_in_excess;/* Set to the value by which */ @@ -1610,8 +1610,8 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) return false; } -int alloc_shrinker_maps(struct mem_cgroup *memcg); -void free_shrinker_maps(struct mem_cgroup *memcg); +int alloc_shrinker_info(struct mem_cgroup *memcg); +void free_shrinker_info(struct mem_cgroup *memcg); void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); #else #define mem_cgroup_sockets_enabled 0 -- cgit v1.2.3 From 41ca668a71e7b03743369a2c6d8b8edc1e943dc8 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 4 May 2021 18:36:29 -0700 Subject: mm: vmscan: use a new flag to indicate shrinker is registered Currently registered shrinker is indicated by non-NULL shrinker->nr_deferred. This approach is fine with nr_deferred at the shrinker level, but the following patches will move MEMCG_AWARE shrinkers' nr_deferred to memcg level, so their shrinker->nr_deferred would always be NULL. This would prevent the shrinkers from unregistering correctly. Remove SHRINKER_REGISTERING since we could check if shrinker is registered successfully by the new flag. Link: https://lkml.kernel.org/r/20210311190845.9708-9-shy828301@gmail.com Signed-off-by: Yang Shi Acked-by: Kirill Tkhai Acked-by: Vlastimil Babka Acked-by: Roman Gushchin Reviewed-by: Shakeel Butt Cc: Dave Chinner Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shrinker.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 0f80123650e2..1eac79ce57d4 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -79,13 +79,14 @@ struct shrinker { #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ /* Flags */ -#define SHRINKER_NUMA_AWARE (1 << 0) -#define SHRINKER_MEMCG_AWARE (1 << 1) +#define SHRINKER_REGISTERED (1 << 0) +#define SHRINKER_NUMA_AWARE (1 << 1) +#define SHRINKER_MEMCG_AWARE (1 << 2) /* * It just makes sense when the shrinker is also MEMCG_AWARE for now, * non-MEMCG_AWARE shrinker should not have this flag set. */ -#define SHRINKER_NONSLAB (1 << 2) +#define SHRINKER_NONSLAB (1 << 3) extern int prealloc_shrinker(struct shrinker *shrinker); extern void register_shrinker_prepared(struct shrinker *shrinker); -- cgit v1.2.3 From 3c6f17e6c5d048c8029578c475dd037dd5db58af Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 4 May 2021 18:36:33 -0700 Subject: mm: vmscan: add per memcg shrinker nr_deferred Currently the number of deferred objects are per shrinker, but some slabs, for example, vfs inode/dentry cache are per memcg, this would result in poor isolation among memcgs. The deferred objects typically are generated by __GFP_NOFS allocations, one memcg with excessive __GFP_NOFS allocations may blow up deferred objects, then other innocent memcgs may suffer from over shrink, excessive reclaim latency, etc. For example, two workloads run in memcgA and memcgB respectively, workload in B is vfs heavy workload. Workload in A generates excessive deferred objects, then B's vfs cache might be hit heavily (drop half of caches) by B's limit reclaim or global reclaim. We observed this hit in our production environment which was running vfs heavy workload shown as the below tracing log: <...>-409454 [016] .... 28286961.747146: mm_shrink_slab_start: super_cache_scan+0x0/0x1a0 ffff9a83046f3458: nid: 1 objects to shrink 3641681686040 gfp_flags GFP_HIGHUSER_MOVABLE|__GFP_ZERO pgs_scanned 1 lru_pgs 15721 cache items 246404277 delta 31345 total_scan 123202138 <...>-409454 [022] .... 28287105.928018: mm_shrink_slab_end: super_cache_scan+0x0/0x1a0 ffff9a83046f3458: nid: 1 unused scan count 3641681686040 new scan count 3641798379189 total_scan 602 last shrinker return val 123186855 The vfs cache and page cache ratio was 10:1 on this machine, and half of caches were dropped. This also resulted in significant amount of page caches were dropped due to inodes eviction. Make nr_deferred per memcg for memcg aware shrinkers would solve the unfairness and bring better isolation. The following patch will add nr_deferred to parent memcg when memcg offline. To preserve nr_deferred when reparenting memcgs to root, root memcg needs shrinker_info allocated too. When memcg is not enabled (!CONFIG_MEMCG or memcg disabled), the shrinker's nr_deferred would be used. And non memcg aware shrinkers use shrinker's nr_deferred all the time. Link: https://lkml.kernel.org/r/20210311190845.9708-10-shy828301@gmail.com Signed-off-by: Yang Shi Acked-by: Roman Gushchin Acked-by: Kirill Tkhai Reviewed-by: Shakeel Butt Cc: Dave Chinner Cc: Johannes Weiner Cc: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6cd800fe9a67..32bd62047238 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -114,12 +114,13 @@ struct batched_lruvec_stat { }; /* - * Bitmap of shrinker::id corresponding to memcg-aware shrinkers, - * which have elements charged to this memcg. + * Bitmap and deferred work of shrinker::id corresponding to memcg-aware + * shrinkers, which have elements charged to this memcg. */ struct shrinker_info { struct rcu_head rcu; - unsigned long map[]; + atomic_long_t *nr_deferred; + unsigned long *map; }; /* -- cgit v1.2.3 From a178015cde69981cdcd8f109c5abc98703fead62 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 4 May 2021 18:36:42 -0700 Subject: mm: memcontrol: reparent nr_deferred when memcg offline Now shrinker's nr_deferred is per memcg for memcg aware shrinkers, add to parent's corresponding nr_deferred when memcg offline. Link: https://lkml.kernel.org/r/20210311190845.9708-13-shy828301@gmail.com Signed-off-by: Yang Shi Acked-by: Vlastimil Babka Acked-by: Kirill Tkhai Acked-by: Roman Gushchin Reviewed-by: Shakeel Butt Cc: Dave Chinner Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 32bd62047238..c193be760709 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1614,6 +1614,7 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) int alloc_shrinker_info(struct mem_cgroup *memcg); void free_shrinker_info(struct mem_cgroup *memcg); void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); +void reparent_shrinker_deferred(struct mem_cgroup *memcg); #else #define mem_cgroup_sockets_enabled 0 static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; -- cgit v1.2.3 From ef4984384172e93cc95e0e8cd102536d67e8a787 Mon Sep 17 00:00:00 2001 From: Pintu Kumar Date: Tue, 4 May 2021 18:36:48 -0700 Subject: mm/compaction: remove unused variable sysctl_compact_memory The sysctl_compact_memory is mostly unused in mm/compaction.c It just acts as a place holder for sysctl to store .data. But the .data itself is not needed here. So we can get ride of this variable completely and make .data as NULL. This will also eliminate the extern declaration from header file. No functionality is broken or changed this way. Link: https://lkml.kernel.org/r/1614852224-14671-1-git-send-email-pintu@codeaurora.org Signed-off-by: Pintu Kumar Signed-off-by: Pintu Agarwal Reviewed-by: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index ed4070ed41ef..4221888bdcd6 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -81,7 +81,6 @@ static inline unsigned long compact_gap(unsigned int order) } #ifdef CONFIG_COMPACTION -extern int sysctl_compact_memory; extern unsigned int sysctl_compaction_proactiveness; extern int sysctl_compaction_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); -- cgit v1.2.3 From d479960e44f27e0e52ba31b21740b703c538027c Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 4 May 2021 18:36:54 -0700 Subject: mm: disable LRU pagevec during the migration temporarily LRU pagevec holds refcount of pages until the pagevec are drained. It could prevent migration since the refcount of the page is greater than the expection in migration logic. To mitigate the issue, callers of migrate_pages drains LRU pagevec via migrate_prep or lru_add_drain_all before migrate_pages call. However, it's not enough because pages coming into pagevec after the draining call still could stay at the pagevec so it could keep preventing page migration. Since some callers of migrate_pages have retrial logic with LRU draining, the page would migrate at next trail but it is still fragile in that it doesn't close the fundamental race between upcoming LRU pages into pagvec and migration so the migration failure could cause contiguous memory allocation failure in the end. To close the race, this patch disables lru caches(i.e, pagevec) during ongoing migration until migrate is done. Since it's really hard to reproduce, I measured how many times migrate_pages retried with force mode(it is about a fallback to a sync migration) with below debug code. int migrate_pages(struct list_head *from, new_page_t get_new_page, .. .. if (rc && reason == MR_CONTIG_RANGE && pass > 2) { printk(KERN_ERR, "pfn 0x%lx reason %d", page_to_pfn(page), rc); dump_page(page, "fail to migrate"); } The test was repeating android apps launching with cma allocation in background every five seconds. Total cma allocation count was about 500 during the testing. With this patch, the dump_page count was reduced from 400 to 30. The new interface is also useful for memory hotplug which currently drains lru pcp caches after each migration failure. This is rather suboptimal as it has to disrupt others running during the operation. With the new interface the operation happens only once. This is also in line with pcp allocator cache which are disabled for the offlining as well. Link: https://lkml.kernel.org/r/20210319175127.886124-1-minchan@kernel.org Signed-off-by: Minchan Kim Reviewed-by: Chris Goldsworthy Acked-by: Michal Hocko Cc: John Dias Cc: Suren Baghdasaryan Cc: Matthew Wilcox Cc: David Hildenbrand Cc: Vlastimil Babka Cc: Oliver Sang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 2 ++ include/linux/swap.h | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 3a389633b68f..9e4a2dc8622c 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -46,6 +46,7 @@ extern int isolate_movable_page(struct page *page, isolate_mode_t mode); extern void putback_movable_page(struct page *page); extern void migrate_prep(void); +extern void migrate_finish(void); extern void migrate_prep_local(void); extern void migrate_page_states(struct page *newpage, struct page *page); extern void migrate_page_copy(struct page *newpage, struct page *page); @@ -67,6 +68,7 @@ static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) { return -EBUSY; } static inline int migrate_prep(void) { return -ENOSYS; } +static inline int migrate_finish(void) { return -ENOSYS; } static inline int migrate_prep_local(void) { return -ENOSYS; } static inline void migrate_page_states(struct page *newpage, struct page *page) diff --git a/include/linux/swap.h b/include/linux/swap.h index 42191da1bdc9..f69e0f67651d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -340,6 +340,20 @@ extern void lru_note_cost(struct lruvec *lruvec, bool file, extern void lru_note_cost_page(struct page *); extern void lru_cache_add(struct page *); extern void mark_page_accessed(struct page *); + +extern atomic_t lru_disable_count; + +static inline bool lru_cache_disabled(void) +{ + return atomic_read(&lru_disable_count); +} + +static inline void lru_cache_enable(void) +{ + atomic_dec(&lru_disable_count); +} + +extern void lru_cache_disable(void); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); -- cgit v1.2.3 From 361a2a229fa31ab7f2b236b5946e434964d00762 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 4 May 2021 18:36:57 -0700 Subject: mm: replace migrate_[prep|finish] with lru_cache_[disable|enable] Currently, migrate_[prep|finish] is merely a wrapper of lru_cache_[disable|enable]. There is not much to gain from having additional abstraction. Use lru_cache_[disable|enable] instead of migrate_[prep|finish], which would be more descriptive. note: migrate_prep_local in compaction.c changed into lru_add_drain to avoid CPU schedule cost with involving many other CPUs to keep old behavior. Link: https://lkml.kernel.org/r/20210319175127.886124-2-minchan@kernel.org Signed-off-by: Minchan Kim Acked-by: Michal Hocko Reviewed-by: David Hildenbrand Cc: Chris Goldsworthy Cc: John Dias Cc: Matthew Wilcox Cc: Oliver Sang Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 9e4a2dc8622c..6155d97ec76c 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -45,9 +45,6 @@ extern struct page *alloc_migration_target(struct page *page, unsigned long priv extern int isolate_movable_page(struct page *page, isolate_mode_t mode); extern void putback_movable_page(struct page *page); -extern void migrate_prep(void); -extern void migrate_finish(void); -extern void migrate_prep_local(void); extern void migrate_page_states(struct page *newpage, struct page *page); extern void migrate_page_copy(struct page *newpage, struct page *page); extern int migrate_huge_page_move_mapping(struct address_space *mapping, @@ -67,10 +64,6 @@ static inline struct page *alloc_migration_target(struct page *page, static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) { return -EBUSY; } -static inline int migrate_prep(void) { return -ENOSYS; } -static inline int migrate_finish(void) { return -ENOSYS; } -static inline int migrate_prep_local(void) { return -ENOSYS; } - static inline void migrate_page_states(struct page *newpage, struct page *page) { } -- cgit v1.2.3 From 8cc621d2f45ddd3dc664024a647ee7adf48d79a5 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 4 May 2021 18:37:00 -0700 Subject: mm: fs: invalidate BH LRU during page migration Pages containing buffer_heads that are in one of the per-CPU buffer_head LRU caches will be pinned and thus cannot be migrated. This can prevent CMA allocations from succeeding, which are often used on platforms with co-processors (such as a DSP) that can only use physically contiguous memory. It can also prevent memory hot-unplugging from succeeding, which involves migrating at least MIN_MEMORY_BLOCK_SIZE bytes of memory, which ranges from 8 MiB to 1 GiB based on the architecture in use. Correspondingly, invalidate the BH LRU caches before a migration starts and stop any buffer_head from being cached in the LRU caches, until migration has finished. Link: https://lkml.kernel.org/r/20210319175127.886124-3-minchan@kernel.org Signed-off-by: Minchan Kim Reported-by: Chris Goldsworthy Reported-by: Laura Abbott Tested-by: Oliver Sang Cc: David Hildenbrand Cc: John Dias Cc: Matthew Wilcox Cc: Michal Hocko Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 6b47f94378c5..e7e99da31349 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -194,6 +194,8 @@ void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size, struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); +void invalidate_bh_lrus_cpu(int cpu); +bool has_bh_in_lru(int cpu, void *dummy); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); @@ -406,6 +408,8 @@ static inline int inode_has_buffers(struct inode *inode) { return 0; } static inline void invalidate_inode_buffers(struct inode *inode) {} static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } +static inline void invalidate_bh_lrus_cpu(int cpu) {} +static inline bool has_bh_in_lru(int cpu, void *dummy) { return 0; } #define buffer_heads_over_limit 0 #endif /* CONFIG_BLOCK */ -- cgit v1.2.3 From 606a6f71a25accfc960a5063c23717ff07aa43a3 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 4 May 2021 18:37:04 -0700 Subject: mm/migrate.c: make putback_movable_page() static Patch series "Cleanup and fixup for mm/migrate.c", v3. This series contains cleanups to remove unnecessary VM_BUG_ON_PAGE and rc != MIGRATEPAGE_SUCCESS check. Also use helper function to remove some duplicated codes. What's more, this fixes potential deadlock in NUMA balancing shared exec THP case and so on. More details can be found in the respective changelogs. This patch (of 5): The putback_movable_page() is just called by putback_movable_pages() and we know the page is locked and both PageMovable() and PageIsolated() is checked right before calling putback_movable_page(). So we make it static and remove all the 3 VM_BUG_ON_PAGE(). Link: https://lkml.kernel.org/r/20210325131524.48181-1-linmiaohe@huawei.com Link: https://lkml.kernel.org/r/20210325131524.48181-2-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Reviewed-by: David Hildenbrand Reviewed-by: Yang Shi Cc: Jerome Glisse Cc: Rafael Aquini Cc: Alistair Popple Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 6155d97ec76c..175ef15ae9e8 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -43,7 +43,6 @@ extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason); extern struct page *alloc_migration_target(struct page *page, unsigned long private); extern int isolate_movable_page(struct page *page, isolate_mode_t mode); -extern void putback_movable_page(struct page *page); extern void migrate_page_states(struct page *newpage, struct page *page); extern void migrate_page_copy(struct page *newpage, struct page *page); -- cgit v1.2.3 From bbb269206f3c914d4f23e023de4ec020abea6d1b Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 4 May 2021 18:37:19 -0700 Subject: mm: vmstat: add cma statistics Since CMA is used more widely, it's worth to have CMA allocation statistics into vmstat. With it, we could know how agressively system uses cma allocation and how often it fails. Link: https://lkml.kernel.org/r/20210302183346.3707237-1-minchan@kernel.org Signed-off-by: Minchan Kim Reviewed-by: John Hubbard Cc: John Dias Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 18e75974d4e3..21d7c7f72f1c 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -70,6 +70,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #endif #ifdef CONFIG_HUGETLB_PAGE HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, +#endif +#ifdef CONFIG_CMA + CMA_ALLOC_SUCCESS, + CMA_ALLOC_FAIL, #endif UNEVICTABLE_PGCULLED, /* culled to noreclaim list */ UNEVICTABLE_PGSCANNED, /* scanned for reclaimability */ -- cgit v1.2.3 From 78fa51503fdbe463c96eef4c3cf69ca54032647a Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 4 May 2021 18:37:34 -0700 Subject: mm: use proper type for cma_[alloc|release] size_t in cma_alloc is confusing since it makes people think it's byte count, not pages. Change it to unsigned long[1]. The unsigned int in cma_release is also not right so change it. Since we have unsigned long in cma_release, free_contig_range should also respect it. [1] 67a2e213e7e9, mm: cma: fix incorrect type conversion for size during dma allocation Link: https://lore.kernel.org/linux-mm/20210324043434.GP1719932@casper.infradead.org/ Link: https://lkml.kernel.org/r/20210331164018.710560-1-minchan@kernel.org Signed-off-by: Minchan Kim Reviewed-by: David Hildenbrand Cc: Matthew Wilcox Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cma.h | 4 ++-- include/linux/gfp.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cma.h b/include/linux/cma.h index 217999c8a762..53fd8c3cdbd0 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -44,9 +44,9 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, const char *name, struct cma **res_cma); -extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, +extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align, bool no_warn); -extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count); +extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count); extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data); #endif diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 26f4d907254a..8a5f6c3d7dba 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -657,7 +657,7 @@ extern int alloc_contig_range(unsigned long start, unsigned long end, extern struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask, int nid, nodemask_t *nodemask); #endif -void free_contig_range(unsigned long pfn, unsigned int nr_pages); +void free_contig_range(unsigned long pfn, unsigned long nr_pages); #ifdef CONFIG_CMA /* CMA stuff */ -- cgit v1.2.3 From 575299ea18a8c0575d4c2ef6ad3fa4d41d529d1c Mon Sep 17 00:00:00 2001 From: Saravanan D Date: Tue, 4 May 2021 18:38:03 -0700 Subject: x86/mm: track linear mapping split events To help with debugging the sluggishness caused by TLB miss/reload, we introduce monotonic hugepage [direct mapped] split event counts since system state: SYSTEM_RUNNING to be displayed as part of /proc/vmstat in x86 servers The lifetime split event information will be displayed at the bottom of /proc/vmstat .... swap_ra 0 swap_ra_hit 0 direct_map_level2_splits 94 direct_map_level3_splits 4 nr_unstable 0 .... One of the many lasting sources of direct hugepage splits is kernel tracing (kprobes, tracepoints). Note that the kernel's code segment [512 MB] points to the same physical addresses that have been already mapped in the kernel's direct mapping range. Source : Documentation/x86/x86_64/mm.rst When we enable kernel tracing, the kernel has to modify attributes/permissions of the text segment hugepages that are direct mapped causing them to split. Kernel's direct mapped hugepages do not coalesce back after split and remain in place for the remainder of the lifetime. An instance of direct page splits when we turn on dynamic kernel tracing .... cat /proc/vmstat | grep -i direct_map_level direct_map_level2_splits 784 direct_map_level3_splits 12 bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ [pid, comm] = count(); }' cat /proc/vmstat | grep -i direct_map_level direct_map_level2_splits 789 direct_map_level3_splits 12 .... Link: https://lkml.kernel.org/r/20210218235744.1040634-1-saravanand@fb.com Signed-off-by: Saravanan D Acked-by: Tejun Heo Acked-by: Johannes Weiner Acked-by: Dave Hansen Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 21d7c7f72f1c..ae0dd1948c2b 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -124,6 +124,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_SWAP SWAP_RA, SWAP_RA_HIT, +#endif +#ifdef CONFIG_X86 + DIRECT_MAP_LEVEL2_SPLIT, + DIRECT_MAP_LEVEL3_SPLIT, #endif NR_VM_EVENT_ITEMS }; -- cgit v1.2.3 From 1a08ae36cf8b5f26d0c64ebfe46f8eb07ea0b678 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 4 May 2021 18:38:53 -0700 Subject: mm cma: rename PF_MEMALLOC_NOCMA to PF_MEMALLOC_PIN PF_MEMALLOC_NOCMA is used ot guarantee that the allocator will not return pages that might belong to CMA region. This is currently used for long term gup to make sure that such pins are not going to be done on any CMA pages. When PF_MEMALLOC_NOCMA has been introduced we haven't realized that it is focusing on CMA pages too much and that there is larger class of pages that need the same treatment. MOVABLE zone cannot contain any long term pins as well so it makes sense to reuse and redefine this flag for that usecase as well. Rename the flag to PF_MEMALLOC_PIN which defines an allocation context which can only get pages suitable for long-term pins. Also rename: memalloc_nocma_save()/memalloc_nocma_restore to memalloc_pin_save()/memalloc_pin_restore() and make the new functions common. [rppt@linux.ibm.com: fix renaming of PF_MEMALLOC_NOCMA to PF_MEMALLOC_PIN] Link: https://lkml.kernel.org/r/20210331163816.11517-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20210215161349.246722-6-pasha.tatashin@soleen.com Signed-off-by: Pavel Tatashin Reviewed-by: John Hubbard Acked-by: Michal Hocko Signed-off-by: Mike Rapoport Cc: Dan Williams Cc: David Hildenbrand Cc: David Rientjes Cc: Ingo Molnar Cc: Ira Weiny Cc: James Morris Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Kravetz Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Sasha Levin Cc: Steven Rostedt (VMware) Cc: Tyler Hicks Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- include/linux/sched/mm.h | 21 +++++---------------- 2 files changed, 6 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9c25c8e67030..d2c881384517 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1583,7 +1583,7 @@ extern struct pid *cad_pid; #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ -#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */ +#define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 90b2a0bce11c..ae654819e8aa 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -271,29 +271,18 @@ static inline void memalloc_noreclaim_restore(unsigned int flags) current->flags = (current->flags & ~PF_MEMALLOC) | flags; } -#ifdef CONFIG_CMA -static inline unsigned int memalloc_nocma_save(void) +static inline unsigned int memalloc_pin_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_NOCMA; + unsigned int flags = current->flags & PF_MEMALLOC_PIN; - current->flags |= PF_MEMALLOC_NOCMA; + current->flags |= PF_MEMALLOC_PIN; return flags; } -static inline void memalloc_nocma_restore(unsigned int flags) +static inline void memalloc_pin_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_NOCMA) | flags; + current->flags = (current->flags & ~PF_MEMALLOC_PIN) | flags; } -#else -static inline unsigned int memalloc_nocma_save(void) -{ - return 0; -} - -static inline void memalloc_nocma_restore(unsigned int flags) -{ -} -#endif #ifdef CONFIG_MEMCG DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg); -- cgit v1.2.3 From 8e3560d963d22ba41857f48e4114ce80373144ea Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 4 May 2021 18:39:00 -0700 Subject: mm: honor PF_MEMALLOC_PIN for all movable pages PF_MEMALLOC_PIN is only honored for CMA pages, extend this flag to work for any allocations from ZONE_MOVABLE by removing __GFP_MOVABLE from gfp_mask when this flag is passed in the current context. Add is_pinnable_page() to return true if page is in a pinnable page. A pinnable page is not in ZONE_MOVABLE and not of MIGRATE_CMA type. Link: https://lkml.kernel.org/r/20210215161349.246722-8-pasha.tatashin@soleen.com Signed-off-by: Pavel Tatashin Acked-by: Michal Hocko Cc: Dan Williams Cc: David Hildenbrand Cc: David Rientjes Cc: Ingo Molnar Cc: Ira Weiny Cc: James Morris Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Kravetz Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Sasha Levin Cc: Steven Rostedt (VMware) Cc: Tyler Hicks Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 18 ++++++++++++++++++ include/linux/sched/mm.h | 6 +++++- 2 files changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1dbb53c44243..d0e628f511e4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1141,6 +1141,11 @@ static inline bool is_zone_device_page(const struct page *page) } #endif +static inline bool is_zone_movable_page(const struct page *page) +{ + return page_zonenum(page) == ZONE_MOVABLE; +} + #ifdef CONFIG_DEV_PAGEMAP_OPS void free_devmap_managed_page(struct page *page); DECLARE_STATIC_KEY_FALSE(devmap_managed_key); @@ -1550,6 +1555,19 @@ static inline unsigned long page_to_section(const struct page *page) } #endif +/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */ +#ifdef CONFIG_MIGRATION +static inline bool is_pinnable_page(struct page *page) +{ + return !is_zone_movable_page(page) && !is_migrate_cma_page(page); +} +#else +static inline bool is_pinnable_page(struct page *page) +{ + return true; +} +#endif + static inline void set_page_zone(struct page *page, enum zone_type zone) { page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index ae654819e8aa..e24b1fe348e3 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -151,12 +151,13 @@ static inline bool in_vfork(struct task_struct *tsk) * Applies per-task gfp context to the given allocation flags. * PF_MEMALLOC_NOIO implies GFP_NOIO * PF_MEMALLOC_NOFS implies GFP_NOFS + * PF_MEMALLOC_PIN implies !GFP_MOVABLE */ static inline gfp_t current_gfp_context(gfp_t flags) { unsigned int pflags = READ_ONCE(current->flags); - if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS))) { + if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) { /* * NOIO implies both NOIO and NOFS and it is a weaker context * so always make sure it makes precedence @@ -165,6 +166,9 @@ static inline gfp_t current_gfp_context(gfp_t flags) flags &= ~(__GFP_IO | __GFP_FS); else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; + + if (pflags & PF_MEMALLOC_PIN) + flags &= ~__GFP_MOVABLE; } return flags; } -- cgit v1.2.3 From 9afaf30f7a1aab2022961715a66f644275b8daec Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 4 May 2021 18:39:04 -0700 Subject: mm/gup: do not migrate zero page On some platforms ZERO_PAGE(0) might end-up in a movable zone. Do not migrate zero page in gup during longterm pinning as migration of zero page is not allowed. For example, in x86 QEMU with 16G of memory and kernelcore=5G parameter, I see the following: Boot#1: zero_pfn 0x48a8d zero_pfn zone: ZONE_DMA32 Boot#2: zero_pfn 0x20168d zero_pfn zone: ZONE_MOVABLE On x86, empty_zero_page is declared in .bss and depending on the loader may end up in different physical locations during boots. Also, move is_zero_pfn() my_zero_pfn() functions under CONFIG_MMU, because zero_pfn that they are using is declared in memory.c which is compiled with CONFIG_MMU. Link: https://lkml.kernel.org/r/20210215161349.246722-9-pasha.tatashin@soleen.com Signed-off-by: Pavel Tatashin Cc: Dan Williams Cc: David Hildenbrand Cc: David Rientjes Cc: Ingo Molnar Cc: Ira Weiny Cc: James Morris Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Michal Hocko Cc: Mike Kravetz Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Sasha Levin Cc: Steven Rostedt (VMware) Cc: Tyler Hicks Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 ++- include/linux/mmzone.h | 4 ++++ include/linux/pgtable.h | 12 ++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index d0e628f511e4..76e27ebb28a3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1559,7 +1559,8 @@ static inline unsigned long page_to_section(const struct page *page) #ifdef CONFIG_MIGRATION static inline bool is_pinnable_page(struct page *page) { - return !is_zone_movable_page(page) && !is_migrate_cma_page(page); + return !(is_zone_movable_page(page) || is_migrate_cma_page(page)) || + is_zero_pfn(page_to_pfn(page)); } #else static inline bool is_pinnable_page(struct page *page) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3b2205741048..92b44149d5b9 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -427,6 +427,10 @@ enum zone_type { * techniques might use alloc_contig_range() to hide previously * exposed pages from the buddy again (e.g., to implement some sort * of memory unplug in virtio-mem). + * 6. ZERO_PAGE(0), kernelcore/movablecore setups might create + * situations where ZERO_PAGE(0) which is allocated differently + * on different platforms may end up in a movable zone. ZERO_PAGE(0) + * cannot be migrated. * * In general, no unmovable allocations that degrade memory offlining * should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range()) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 5e772392a379..2194a9cd885c 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1111,6 +1111,7 @@ extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, extern void untrack_pfn_moved(struct vm_area_struct *vma); #endif +#ifdef CONFIG_MMU #ifdef __HAVE_COLOR_ZERO_PAGE static inline int is_zero_pfn(unsigned long pfn) { @@ -1134,6 +1135,17 @@ static inline unsigned long my_zero_pfn(unsigned long addr) return zero_pfn; } #endif +#else +static inline int is_zero_pfn(unsigned long pfn) +{ + return 0; +} + +static inline unsigned long my_zero_pfn(unsigned long addr) +{ + return 0; +} +#endif /* CONFIG_MMU */ #ifdef CONFIG_MMU -- cgit v1.2.3 From d1e153fea2a8940273174fc17733c44323d35cd5 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 4 May 2021 18:39:08 -0700 Subject: mm/gup: migrate pinned pages out of movable zone We should not pin pages in ZONE_MOVABLE. Currently, we do not pin only movable CMA pages. Generalize the function that migrates CMA pages to migrate all movable pages. Use is_pinnable_page() to check which pages need to be migrated Link: https://lkml.kernel.org/r/20210215161349.246722-10-pasha.tatashin@soleen.com Signed-off-by: Pavel Tatashin Reviewed-by: John Hubbard Cc: Dan Williams Cc: David Hildenbrand Cc: David Rientjes Cc: Ingo Molnar Cc: Ira Weiny Cc: James Morris Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Michal Hocko Cc: Mike Kravetz Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Sasha Levin Cc: Steven Rostedt (VMware) Cc: Tyler Hicks Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 1 + include/linux/mmzone.h | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 175ef15ae9e8..4bb4e519e3f5 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -27,6 +27,7 @@ enum migrate_reason { MR_MEMPOLICY_MBIND, MR_NUMA_MISPLACED, MR_CONTIG_RANGE, + MR_LONGTERM_PIN, MR_TYPES }; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 92b44149d5b9..e8922a67d1a4 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -407,8 +407,13 @@ enum zone_type { * to increase the number of THP/huge pages. Notable special cases are: * * 1. Pinned pages: (long-term) pinning of movable pages might - * essentially turn such pages unmovable. Memory offlining might - * retry a long time. + * essentially turn such pages unmovable. Therefore, we do not allow + * pinning long-term pages in ZONE_MOVABLE. When pages are pinned and + * faulted, they come from the right zone right away. However, it is + * still possible that address space already has pages in + * ZONE_MOVABLE at the time when pages are pinned (i.e. user has + * touches that memory before pinning). In such case we migrate them + * to a different zone. When migration fails - pinning fails. * 2. memblock allocations: kernelcore/movablecore setups might create * situations where ZONE_MOVABLE contains unmovable allocations * after boot. Memory offlining and allocations fail early. -- cgit v1.2.3 From a08a2ae3461383c2d50d0997dcc6cd1dd1fefb08 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 4 May 2021 18:39:42 -0700 Subject: mm,memory_hotplug: allocate memmap from the added memory range Physical memory hotadd has to allocate a memmap (struct page array) for the newly added memory section. Currently, alloc_pages_node() is used for those allocations. This has some disadvantages: a) an existing memory is consumed for that purpose (eg: ~2MB per 128MB memory section on x86_64) This can even lead to extreme cases where system goes OOM because the physically hotplugged memory depletes the available memory before it is onlined. b) if the whole node is movable then we have off-node struct pages which has performance drawbacks. c) It might be there are no PMD_ALIGNED chunks so memmap array gets populated with base pages. This can be improved when CONFIG_SPARSEMEM_VMEMMAP is enabled. Vmemap page tables can map arbitrary memory. That means that we can reserve a part of the physically hotadded memory to back vmemmap page tables. This implementation uses the beginning of the hotplugged memory for that purpose. There are some non-obviously things to consider though. Vmemmap pages are allocated/freed during the memory hotplug events (add_memory_resource(), try_remove_memory()) when the memory is added/removed. This means that the reserved physical range is not online although it is used. The most obvious side effect is that pfn_to_online_page() returns NULL for those pfns. The current design expects that this should be OK as the hotplugged memory is considered a garbage until it is onlined. For example hibernation wouldn't save the content of those vmmemmaps into the image so it wouldn't be restored on resume but this should be OK as there no real content to recover anyway while metadata is reachable from other data structures (e.g. vmemmap page tables). The reserved space is therefore (de)initialized during the {on,off}line events (mhp_{de}init_memmap_on_memory). That is done by extracting page allocator independent initialization from the regular onlining path. The primary reason to handle the reserved space outside of {on,off}line_pages is to make each initialization specific to the purpose rather than special case them in a single function. As per above, the functions that are introduced are: - mhp_init_memmap_on_memory: Initializes vmemmap pages by calling move_pfn_range_to_zone(), calls kasan_add_zero_shadow(), and onlines as many sections as vmemmap pages fully span. - mhp_deinit_memmap_on_memory: Offlines as many sections as vmemmap pages fully span, removes the range from zhe zone by remove_pfn_range_from_zone(), and calls kasan_remove_zero_shadow() for the range. The new function memory_block_online() calls mhp_init_memmap_on_memory() before doing the actual online_pages(). Should online_pages() fail, we clean up by calling mhp_deinit_memmap_on_memory(). Adjusting of present_pages is done at the end once we know that online_pages() succedeed. On offline, memory_block_offline() needs to unaccount vmemmap pages from present_pages() before calling offline_pages(). This is necessary because offline_pages() tears down some structures based on the fact whether the node or the zone become empty. If offline_pages() fails, we account back vmemmap pages. If it succeeds, we call mhp_deinit_memmap_on_memory(). Hot-remove: We need to be careful when removing memory, as adding and removing memory needs to be done with the same granularity. To check that this assumption is not violated, we check the memory range we want to remove and if a) any memory block has vmemmap pages and b) the range spans more than a single memory block, we scream out loud and refuse to proceed. If all is good and the range was using memmap on memory (aka vmemmap pages), we construct an altmap structure so free_hugepage_table does the right thing and calls vmem_altmap_free instead of free_pagetable. Link: https://lkml.kernel.org/r/20210421102701.25051-5-osalvador@suse.de Signed-off-by: Oscar Salvador Reviewed-by: David Hildenbrand Acked-by: Michal Hocko Cc: Anshuman Khandual Cc: Pavel Tatashin Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory.h | 8 +++++++- include/linux/memory_hotplug.h | 15 ++++++++++++++- include/linux/memremap.h | 2 +- include/linux/mmzone.h | 7 +++++-- 4 files changed, 27 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory.h b/include/linux/memory.h index 4da95e684e20..97e92e8b556a 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -29,6 +29,11 @@ struct memory_block { int online_type; /* for passing data to online routine */ int nid; /* NID for this memory block */ struct device dev; + /* + * Number of vmemmap pages. These pages + * lay at the beginning of the memory block. + */ + unsigned long nr_vmemmap_pages; }; int arch_get_memory_phys_device(unsigned long start_pfn); @@ -80,7 +85,8 @@ static inline int memory_notify(unsigned long val, void *v) #else extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); -int create_memory_block_devices(unsigned long start, unsigned long size); +int create_memory_block_devices(unsigned long start, unsigned long size, + unsigned long vmemmap_pages); void remove_memory_block_devices(unsigned long start, unsigned long size); extern void memory_dev_init(void); extern int memory_notify(unsigned long val, void *v); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 7288aa5ef73b..28f32fd00fe9 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -55,6 +55,14 @@ typedef int __bitwise mhp_t; */ #define MHP_MERGE_RESOURCE ((__force mhp_t)BIT(0)) +/* + * We want memmap (struct page array) to be self contained. + * To do so, we will use the beginning of the hot-added range to build + * the page tables for the memmap array that describes the entire range. + * Only selected architectures support it with SPARSE_VMEMMAP. + */ +#define MHP_MEMMAP_ON_MEMORY ((__force mhp_t)BIT(1)) + /* * Extended parameters for memory hotplug: * altmap: alternative allocator for memmap array (optional) @@ -99,9 +107,13 @@ static inline void zone_seqlock_init(struct zone *zone) extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages); extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages); extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); +extern void adjust_present_page_count(struct zone *zone, long nr_pages); /* VM interface that may be used by firmware interface */ +extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, + struct zone *zone); +extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages); extern int online_pages(unsigned long pfn, unsigned long nr_pages, - int online_type, int nid); + struct zone *zone); extern struct zone *test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn); extern void __offline_isolated_pages(unsigned long start_pfn, @@ -359,6 +371,7 @@ extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_ extern int arch_create_linear_mapping(int nid, u64 start, u64 size, struct mhp_params *params); void arch_remove_linear_mapping(u64 start, u64 size); +extern bool mhp_supports_memmap_on_memory(unsigned long size); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/linux/memremap.h b/include/linux/memremap.h index f5b464daeeca..45a79da89c5f 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -17,7 +17,7 @@ struct device; * @alloc: track pages consumed, private to vmemmap_populate() */ struct vmem_altmap { - const unsigned long base_pfn; + unsigned long base_pfn; const unsigned long end_pfn; const unsigned long reserve; unsigned long free; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e8922a67d1a4..917bd6c604d5 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -436,6 +436,11 @@ enum zone_type { * situations where ZERO_PAGE(0) which is allocated differently * on different platforms may end up in a movable zone. ZERO_PAGE(0) * cannot be migrated. + * 7. Memory-hotplug: when using memmap_on_memory and onlining the + * memory to the MOVABLE zone, the vmemmap pages are also placed in + * such zone. Such pages cannot be really moved around as they are + * self-stored in the range, but they are treated as movable when + * the range they describe is about to be offlined. * * In general, no unmovable allocations that degrade memory offlining * should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range()) @@ -1392,10 +1397,8 @@ static inline int online_section_nr(unsigned long nr) #ifdef CONFIG_MEMORY_HOTPLUG void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn); -#ifdef CONFIG_MEMORY_HOTREMOVE void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn); #endif -#endif static inline struct mem_section *__pfn_to_section(unsigned long pfn) { -- cgit v1.2.3 From 28961998f858114e51d2ae862065b858afcfa2b2 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 4 May 2021 18:40:03 -0700 Subject: iov_iter: lift memzero_page() to highmem.h Patch series "btrfs: Convert kmap/memset/kunmap to memzero_user()". Lifting memzero_user(), convert it to kmap_local_page() and then use it in btrfs. This patch (of 3): memzero_page() can replace the kmap/memset/kunmap pattern in other places in the code. While zero_user() has the same interface it is not the same call and its use should be limited and some of those calls may be better converted from zero_user() to memzero_page().[1] But that is not addressed in this series. Lift memzero_page() to highmem. [1] https://lore.kernel.org/lkml/CAHk-=wijdojzo56FzYqE5TOYw2Vws7ik3LEMGj9SPQaJJ+Z73Q@mail.gmail.com/ Link: https://lkml.kernel.org/r/20210309212137.2610186-1-ira.weiny@intel.com Link: https://lkml.kernel.org/r/20210309212137.2610186-2-ira.weiny@intel.com Signed-off-by: Ira Weiny Cc: Alexander Viro Cc: David Sterba Cc: Chris Mason Cc: Josef Bacik Cc: Chaitanya Kulkarni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 44170f312ae7..832b49b50c7b 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -332,4 +332,11 @@ static inline void memcpy_to_page(struct page *page, size_t offset, kunmap_local(to); } +static inline void memzero_page(struct page *page, size_t offset, size_t len) +{ + char *addr = kmap_atomic(page); + memset(addr + offset, 0, len); + kunmap_atomic(addr); +} + #endif /* _LINUX_HIGHMEM_H */ -- cgit v1.2.3 From 866a6dadbb027b2955a7ae00bab9705d382def12 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 4 May 2021 17:27:28 -0700 Subject: context_tracking: Move guest exit context tracking to separate helpers Provide separate context tracking helpers for guest exit, the standalone helpers will be called separately by KVM x86 in later patches to fix tick-based accounting. Suggested-by: Thomas Gleixner Signed-off-by: Wanpeng Li Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210505002735.1684165-2-seanjc@google.com --- include/linux/context_tracking.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index bceb06498521..b8c7313495a7 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -131,10 +131,15 @@ static __always_inline void guest_enter_irqoff(void) } } -static __always_inline void guest_exit_irqoff(void) +static __always_inline void context_tracking_guest_exit(void) { if (context_tracking_enabled()) __context_tracking_exit(CONTEXT_GUEST); +} + +static __always_inline void guest_exit_irqoff(void) +{ + context_tracking_guest_exit(); instrumentation_begin(); if (vtime_accounting_enabled_this_cpu()) @@ -159,6 +164,8 @@ static __always_inline void guest_enter_irqoff(void) instrumentation_end(); } +static __always_inline void context_tracking_guest_exit(void) { } + static __always_inline void guest_exit_irqoff(void) { instrumentation_begin(); -- cgit v1.2.3 From 88d8220bbf06dd8045b2ac4be1046290eaa7773a Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 4 May 2021 17:27:29 -0700 Subject: context_tracking: Move guest exit vtime accounting to separate helpers Provide separate vtime accounting functions for guest exit instead of open coding the logic within the context tracking code. This will allow KVM x86 to handle vtime accounting slightly differently when using tick-based accounting. Suggested-by: Thomas Gleixner Signed-off-by: Wanpeng Li Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Thomas Gleixner Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20210505002735.1684165-3-seanjc@google.com --- include/linux/context_tracking.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index b8c7313495a7..4f4556232dcf 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -137,15 +137,20 @@ static __always_inline void context_tracking_guest_exit(void) __context_tracking_exit(CONTEXT_GUEST); } -static __always_inline void guest_exit_irqoff(void) +static __always_inline void vtime_account_guest_exit(void) { - context_tracking_guest_exit(); - - instrumentation_begin(); if (vtime_accounting_enabled_this_cpu()) vtime_guest_exit(current); else current->flags &= ~PF_VCPU; +} + +static __always_inline void guest_exit_irqoff(void) +{ + context_tracking_guest_exit(); + + instrumentation_begin(); + vtime_account_guest_exit(); instrumentation_end(); } @@ -166,12 +171,17 @@ static __always_inline void guest_enter_irqoff(void) static __always_inline void context_tracking_guest_exit(void) { } +static __always_inline void vtime_account_guest_exit(void) +{ + vtime_account_kernel(current); + current->flags &= ~PF_VCPU; +} + static __always_inline void guest_exit_irqoff(void) { instrumentation_begin(); /* Flush the guest cputime we spent on the guest */ - vtime_account_kernel(current); - current->flags &= ~PF_VCPU; + vtime_account_guest_exit(); instrumentation_end(); } #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ -- cgit v1.2.3 From b41c723b203e19480c26f2ec8f04eedc03d34b34 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 17:27:31 -0700 Subject: sched/vtime: Move vtime accounting external declarations above inlines Move the blob of external declarations (and their stubs) above the set of inline definitions (and their stubs) for vtime accounting. This will allow a future patch to bring in more inline definitions without also having to shuffle large chunks of code. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Thomas Gleixner Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20210505002735.1684165-5-seanjc@google.com --- include/linux/vtime.h | 74 +++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 041d6524d144..6a4317560539 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -10,6 +10,43 @@ struct task_struct; +/* + * Common vtime APIs + */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +extern void vtime_account_kernel(struct task_struct *tsk); +extern void vtime_account_idle(struct task_struct *tsk); +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ +static inline void vtime_account_kernel(struct task_struct *tsk) { } +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern void arch_vtime_task_switch(struct task_struct *tsk); +extern void vtime_user_enter(struct task_struct *tsk); +extern void vtime_user_exit(struct task_struct *tsk); +extern void vtime_guest_enter(struct task_struct *tsk); +extern void vtime_guest_exit(struct task_struct *tsk); +extern void vtime_init_idle(struct task_struct *tsk, int cpu); +#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */ +static inline void vtime_user_enter(struct task_struct *tsk) { } +static inline void vtime_user_exit(struct task_struct *tsk) { } +static inline void vtime_guest_enter(struct task_struct *tsk) { } +static inline void vtime_guest_exit(struct task_struct *tsk) { } +static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { } +#endif + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +extern void vtime_account_irq(struct task_struct *tsk, unsigned int offset); +extern void vtime_account_softirq(struct task_struct *tsk); +extern void vtime_account_hardirq(struct task_struct *tsk); +extern void vtime_flush(struct task_struct *tsk); +#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ +static inline void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { } +static inline void vtime_account_softirq(struct task_struct *tsk) { } +static inline void vtime_account_hardirq(struct task_struct *tsk) { } +static inline void vtime_flush(struct task_struct *tsk) { } +#endif + /* * vtime_accounting_enabled_this_cpu() definitions/declarations */ @@ -57,43 +94,6 @@ static inline void vtime_task_switch(struct task_struct *prev) { } #endif -/* - * Common vtime APIs - */ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -extern void vtime_account_kernel(struct task_struct *tsk); -extern void vtime_account_idle(struct task_struct *tsk); -#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ -static inline void vtime_account_kernel(struct task_struct *tsk) { } -#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -extern void arch_vtime_task_switch(struct task_struct *tsk); -extern void vtime_user_enter(struct task_struct *tsk); -extern void vtime_user_exit(struct task_struct *tsk); -extern void vtime_guest_enter(struct task_struct *tsk); -extern void vtime_guest_exit(struct task_struct *tsk); -extern void vtime_init_idle(struct task_struct *tsk, int cpu); -#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */ -static inline void vtime_user_enter(struct task_struct *tsk) { } -static inline void vtime_user_exit(struct task_struct *tsk) { } -static inline void vtime_guest_enter(struct task_struct *tsk) { } -static inline void vtime_guest_exit(struct task_struct *tsk) { } -static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { } -#endif - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -extern void vtime_account_irq(struct task_struct *tsk, unsigned int offset); -extern void vtime_account_softirq(struct task_struct *tsk); -extern void vtime_account_hardirq(struct task_struct *tsk); -extern void vtime_flush(struct task_struct *tsk); -#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -static inline void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { } -static inline void vtime_account_softirq(struct task_struct *tsk) { } -static inline void vtime_account_hardirq(struct task_struct *tsk) { } -static inline void vtime_flush(struct task_struct *tsk) { } -#endif - #ifdef CONFIG_IRQ_TIME_ACCOUNTING extern void irqtime_account_irq(struct task_struct *tsk, unsigned int offset); -- cgit v1.2.3 From 6f922b89e5518143920b10e3643e556d9df58d94 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 17:27:32 -0700 Subject: sched/vtime: Move guest enter/exit vtime accounting to vtime.h Provide separate helpers for guest enter vtime accounting (in addition to the existing guest exit helpers), and move all vtime accounting helpers to vtime.h where the existing #ifdef infrastructure can be leveraged to better delineate the different types of accounting. This will also allow future cleanups via deduplication of context tracking code. Opportunstically delete the vtime_account_kernel() stub now that all callers are wrapped with CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210505002735.1684165-6-seanjc@google.com --- include/linux/context_tracking.h | 17 +-------------- include/linux/vtime.h | 46 ++++++++++++++++++++++++++++++++++------ 2 files changed, 41 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 4f4556232dcf..56c648bdbde8 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -137,14 +137,6 @@ static __always_inline void context_tracking_guest_exit(void) __context_tracking_exit(CONTEXT_GUEST); } -static __always_inline void vtime_account_guest_exit(void) -{ - if (vtime_accounting_enabled_this_cpu()) - vtime_guest_exit(current); - else - current->flags &= ~PF_VCPU; -} - static __always_inline void guest_exit_irqoff(void) { context_tracking_guest_exit(); @@ -163,20 +155,13 @@ static __always_inline void guest_enter_irqoff(void) * to flush. */ instrumentation_begin(); - vtime_account_kernel(current); - current->flags |= PF_VCPU; + vtime_account_guest_enter(); rcu_virt_note_context_switch(smp_processor_id()); instrumentation_end(); } static __always_inline void context_tracking_guest_exit(void) { } -static __always_inline void vtime_account_guest_exit(void) -{ - vtime_account_kernel(current); - current->flags &= ~PF_VCPU; -} - static __always_inline void guest_exit_irqoff(void) { instrumentation_begin(); diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 6a4317560539..3684487d01e1 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -3,21 +3,18 @@ #define _LINUX_KERNEL_VTIME_H #include +#include + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #include #endif - -struct task_struct; - /* * Common vtime APIs */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING extern void vtime_account_kernel(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); -#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ -static inline void vtime_account_kernel(struct task_struct *tsk) { } #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN @@ -55,6 +52,18 @@ static inline void vtime_flush(struct task_struct *tsk) { } static inline bool vtime_accounting_enabled_this_cpu(void) { return true; } extern void vtime_task_switch(struct task_struct *prev); +static __always_inline void vtime_account_guest_enter(void) +{ + vtime_account_kernel(current); + current->flags |= PF_VCPU; +} + +static __always_inline void vtime_account_guest_exit(void) +{ + vtime_account_kernel(current); + current->flags &= ~PF_VCPU; +} + #elif defined(CONFIG_VIRT_CPU_ACCOUNTING_GEN) /* @@ -86,12 +95,37 @@ static inline void vtime_task_switch(struct task_struct *prev) vtime_task_switch_generic(prev); } +static __always_inline void vtime_account_guest_enter(void) +{ + if (vtime_accounting_enabled_this_cpu()) + vtime_guest_enter(current); + else + current->flags |= PF_VCPU; +} + +static __always_inline void vtime_account_guest_exit(void) +{ + if (vtime_accounting_enabled_this_cpu()) + vtime_guest_exit(current); + else + current->flags &= ~PF_VCPU; +} + #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ -static inline bool vtime_accounting_enabled_cpu(int cpu) {return false; } static inline bool vtime_accounting_enabled_this_cpu(void) { return false; } static inline void vtime_task_switch(struct task_struct *prev) { } +static __always_inline void vtime_account_guest_enter(void) +{ + current->flags |= PF_VCPU; +} + +static __always_inline void vtime_account_guest_exit(void) +{ + current->flags &= ~PF_VCPU; +} + #endif -- cgit v1.2.3 From 14296e0c447885d6c7b326e059fb528eb00526ed Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 17:27:33 -0700 Subject: context_tracking: Consolidate guest enter/exit wrappers Consolidate the guest enter/exit wrappers, providing and tweaking stubs as needed. This will allow moving the wrappers under KVM without having to bleed #ifdefs into the soon-to-be KVM code. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210505002735.1684165-7-seanjc@google.com --- include/linux/context_tracking.h | 65 +++++++++++++++------------------------- 1 file changed, 24 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 56c648bdbde8..aa58c2ac67ca 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -71,6 +71,19 @@ static inline void exception_exit(enum ctx_state prev_ctx) } } +static __always_inline bool context_tracking_guest_enter(void) +{ + if (context_tracking_enabled()) + __context_tracking_enter(CONTEXT_GUEST); + + return context_tracking_enabled_this_cpu(); +} + +static __always_inline void context_tracking_guest_exit(void) +{ + if (context_tracking_enabled()) + __context_tracking_exit(CONTEXT_GUEST); +} /** * ct_state() - return the current context tracking state if known @@ -92,6 +105,9 @@ static inline void user_exit_irqoff(void) { } static inline enum ctx_state exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; } +static inline bool context_tracking_guest_enter(void) { return false; } +static inline void context_tracking_guest_exit(void) { } + #endif /* !CONFIG_CONTEXT_TRACKING */ #define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond)) @@ -102,74 +118,41 @@ extern void context_tracking_init(void); static inline void context_tracking_init(void) { } #endif /* CONFIG_CONTEXT_TRACKING_FORCE */ - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN /* must be called with irqs disabled */ static __always_inline void guest_enter_irqoff(void) { + /* + * This is running in ioctl context so its safe to assume that it's the + * stime pending cputime to flush. + */ instrumentation_begin(); - if (vtime_accounting_enabled_this_cpu()) - vtime_guest_enter(current); - else - current->flags |= PF_VCPU; + vtime_account_guest_enter(); instrumentation_end(); - if (context_tracking_enabled()) - __context_tracking_enter(CONTEXT_GUEST); - - /* KVM does not hold any references to rcu protected data when it + /* + * KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode * is very similar to exiting to userspace from rcu point of view. In * addition CPU may stay in a guest mode for quite a long time (up to * one time slice). Lets treat guest mode as quiescent state, just like * we do with user-mode execution. */ - if (!context_tracking_enabled_this_cpu()) { + if (!context_tracking_guest_enter()) { instrumentation_begin(); rcu_virt_note_context_switch(smp_processor_id()); instrumentation_end(); } } -static __always_inline void context_tracking_guest_exit(void) -{ - if (context_tracking_enabled()) - __context_tracking_exit(CONTEXT_GUEST); -} - static __always_inline void guest_exit_irqoff(void) { context_tracking_guest_exit(); - instrumentation_begin(); - vtime_account_guest_exit(); - instrumentation_end(); -} - -#else -static __always_inline void guest_enter_irqoff(void) -{ - /* - * This is running in ioctl context so its safe - * to assume that it's the stime pending cputime - * to flush. - */ - instrumentation_begin(); - vtime_account_guest_enter(); - rcu_virt_note_context_switch(smp_processor_id()); - instrumentation_end(); -} - -static __always_inline void context_tracking_guest_exit(void) { } - -static __always_inline void guest_exit_irqoff(void) -{ instrumentation_begin(); /* Flush the guest cputime we spent on the guest */ vtime_account_guest_exit(); instrumentation_end(); } -#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ static inline void guest_exit(void) { -- cgit v1.2.3 From 1ca0016c149be35fe19a6b75fce95c25807b7159 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 17:27:34 -0700 Subject: context_tracking: KVM: Move guest enter/exit wrappers to KVM's domain Move the guest enter/exit wrappers to kvm_host.h so that KVM can manage its context tracking vs. vtime accounting without bleeding too many KVM details into the context tracking code. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210505002735.1684165-8-seanjc@google.com --- include/linux/context_tracking.h | 45 ---------------------------------------- include/linux/kvm_host.h | 45 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index aa58c2ac67ca..4d7fced3a39f 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -118,49 +118,4 @@ extern void context_tracking_init(void); static inline void context_tracking_init(void) { } #endif /* CONFIG_CONTEXT_TRACKING_FORCE */ -/* must be called with irqs disabled */ -static __always_inline void guest_enter_irqoff(void) -{ - /* - * This is running in ioctl context so its safe to assume that it's the - * stime pending cputime to flush. - */ - instrumentation_begin(); - vtime_account_guest_enter(); - instrumentation_end(); - - /* - * KVM does not hold any references to rcu protected data when it - * switches CPU into a guest mode. In fact switching to a guest mode - * is very similar to exiting to userspace from rcu point of view. In - * addition CPU may stay in a guest mode for quite a long time (up to - * one time slice). Lets treat guest mode as quiescent state, just like - * we do with user-mode execution. - */ - if (!context_tracking_guest_enter()) { - instrumentation_begin(); - rcu_virt_note_context_switch(smp_processor_id()); - instrumentation_end(); - } -} - -static __always_inline void guest_exit_irqoff(void) -{ - context_tracking_guest_exit(); - - instrumentation_begin(); - /* Flush the guest cputime we spent on the guest */ - vtime_account_guest_exit(); - instrumentation_end(); -} - -static inline void guest_exit(void) -{ - unsigned long flags; - - local_irq_save(flags); - guest_exit_irqoff(); - local_irq_restore(flags); -} - #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8895b95b6a22..2f34487e21f2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -338,6 +338,51 @@ struct kvm_vcpu { struct kvm_dirty_ring dirty_ring; }; +/* must be called with irqs disabled */ +static __always_inline void guest_enter_irqoff(void) +{ + /* + * This is running in ioctl context so its safe to assume that it's the + * stime pending cputime to flush. + */ + instrumentation_begin(); + vtime_account_guest_enter(); + instrumentation_end(); + + /* + * KVM does not hold any references to rcu protected data when it + * switches CPU into a guest mode. In fact switching to a guest mode + * is very similar to exiting to userspace from rcu point of view. In + * addition CPU may stay in a guest mode for quite a long time (up to + * one time slice). Lets treat guest mode as quiescent state, just like + * we do with user-mode execution. + */ + if (!context_tracking_guest_enter()) { + instrumentation_begin(); + rcu_virt_note_context_switch(smp_processor_id()); + instrumentation_end(); + } +} + +static __always_inline void guest_exit_irqoff(void) +{ + context_tracking_guest_exit(); + + instrumentation_begin(); + /* Flush the guest cputime we spent on the guest */ + vtime_account_guest_exit(); + instrumentation_end(); +} + +static inline void guest_exit(void) +{ + unsigned long flags; + + local_irq_save(flags); + guest_exit_irqoff(); + local_irq_restore(flags); +} + static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) { /* -- cgit v1.2.3 From 1139aeb1c521eb4a050920ce6c64c36c4f2a3ab7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 5 May 2021 23:12:42 +0200 Subject: smp: Fix smp_call_function_single_async prototype As of commit 966a967116e6 ("smp: Avoid using two cache lines for struct call_single_data"), the smp code prefers 32-byte aligned call_single_data objects for performance reasons, but the block layer includes an instance of this structure in the main 'struct request' that is more senstive to size than to performance here, see 4ccafe032005 ("block: unalign call_single_data in struct request"). The result is a violation of the calling conventions that clang correctly points out: block/blk-mq.c:630:39: warning: passing 8-byte aligned argument to 32-byte aligned parameter 2 of 'smp_call_function_single_async' may result in an unaligned pointer access [-Walign-mismatch] smp_call_function_single_async(cpu, &rq->csd); It does seem that the usage of the call_single_data without cache line alignment should still be allowed by the smp code, so just change the function prototype so it accepts both, but leave the default alignment unchanged for the other users. This seems better to me than adding a local hack to shut up an otherwise correct warning in the caller. Signed-off-by: Arnd Bergmann Signed-off-by: Peter Zijlstra (Intel) Acked-by: Jens Axboe Link: https://lkml.kernel.org/r/20210505211300.3174456-1-arnd@kernel.org --- include/linux/smp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 84a0b4828f66..f0d3ef654207 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -53,7 +53,7 @@ int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, void *info, bool wait, const struct cpumask *mask); -int smp_call_function_single_async(int cpu, call_single_data_t *csd); +int smp_call_function_single_async(int cpu, struct __call_single_data *csd); /* * Call a function on all processors -- cgit v1.2.3 From d4455faccd6cbe11ddfdbe28723a2122453b4f4e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 6 May 2021 18:02:16 -0700 Subject: proc: mandate ->proc_lseek in "struct proc_ops" Now that proc_ops are separate from file_operations and other operations it easy to check all instances to have ->proc_lseek hook and remove check in main code. Note: nonseekable_open() files naturally don't require ->proc_lseek. Garbage collect pde_lseek() function. [adobriyan@gmail.com: smoke test lseek()] Link: https://lkml.kernel.org/r/YG4OIhChOrVTPgdN@localhost.localdomain Link: https://lkml.kernel.org/r/YFYX0Bzwxlc7aBa/@localhost.localdomain Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 000cc0533c33..069c7fd95396 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -32,6 +32,7 @@ struct proc_ops { ssize_t (*proc_read)(struct file *, char __user *, size_t, loff_t *); ssize_t (*proc_read_iter)(struct kiocb *, struct iov_iter *); ssize_t (*proc_write)(struct file *, const char __user *, size_t, loff_t *); + /* mandatory unless nonseekable_open() or equivalent is used */ loff_t (*proc_lseek)(struct file *, loff_t, int); int (*proc_release)(struct inode *, struct file *); __poll_t (*proc_poll)(struct file *, struct poll_table_struct *); -- cgit v1.2.3 From 4ee60ec156d91c315d1f62dfc1bc5799dcc6b473 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 6 May 2021 18:02:27 -0700 Subject: include: remove pagemap.h from blkdev.h My UEK-derived config has 1030 files depending on pagemap.h before this change. Afterwards, just 326 files need to be rebuilt when I touch pagemap.h. I think blkdev.h is probably included too widely, but untangling that dependency is harder and this solves my problem. x86 allmodconfig builds, but there may be implicit include problems on other architectures. Link: https://lkml.kernel.org/r/20210309195747.283796-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Dan Williams [nvdimm] Acked-by: Jens Axboe [block] Reviewed-by: Christoph Hellwig Acked-by: Coly Li [bcache] Acked-by: Martin K. Petersen [scsi] Reviewed-by: William Kucharski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 1 - include/linux/swap.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b91ba6207365..1255823b2bc0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/swap.h b/include/linux/swap.h index f69e0f67651d..144727041e78 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 08c5188ef40ff82aed559123dc0ab2d2254b1b1c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 6 May 2021 18:02:30 -0700 Subject: kernel.h: drop inclusion in bitmap.h The bitmap.h header is used in a lot of code around the kernel. Besides that it includes kernel.h which sometimes makes a loop. The problem here is many unneeded loops that make header hell dependencies. For example, how may you move bitmap_zalloc() from C-file to the header? Currently it's impossible. And bitmap.h here is only the tip of an iceberg. kerne.h is a dump of everything that even has nothing in common at all. We may still have it, but in my new code I prefer to include only the headers that I want to use, without the bulk of unneeded kernel code. Break the loop by introducing align.h, including it in kernel.h and bitmap.h followed by replacing kernel.h with limits.h. Link: https://lkml.kernel.org/r/20210326170347.37441-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Yury Norov Cc: Rasmus Villemoes Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/align.h | 15 +++++++++++++++ include/linux/bitmap.h | 3 ++- include/linux/kernel.h | 9 +-------- 3 files changed, 18 insertions(+), 9 deletions(-) create mode 100644 include/linux/align.h (limited to 'include/linux') diff --git a/include/linux/align.h b/include/linux/align.h new file mode 100644 index 000000000000..2b4acec7b95a --- /dev/null +++ b/include/linux/align.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_ALIGN_H +#define _LINUX_ALIGN_H + +#include + +/* @a is a power of 2 value */ +#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) +#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) +#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) +#define PTR_ALIGN_DOWN(p, a) ((typeof(p))ALIGN_DOWN((unsigned long)(p), (a))) +#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) + +#endif /* _LINUX_ALIGN_H */ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 70a932470b2d..6cbcd9d9edd2 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -4,10 +4,11 @@ #ifndef __ASSEMBLY__ +#include #include #include +#include #include -#include /* * bitmaps provide bit arrays that consume one or more unsigned diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5b7ed6dc99ac..09035ac67d4b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -3,6 +3,7 @@ #define _LINUX_KERNEL_H #include +#include #include #include #include @@ -30,14 +31,6 @@ */ #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) -/* @a is a power of 2 value */ -#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) -#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) -#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) -#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) -#define PTR_ALIGN_DOWN(p, a) ((typeof(p))ALIGN_DOWN((unsigned long)(p), (a))) -#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) - /* generic data direction definitions */ #define READ 0 #define WRITE 1 -- cgit v1.2.3 From 112dfce8f29798192eb0be8066b54f4a68f4eb36 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Thu, 6 May 2021 18:02:33 -0700 Subject: linux/profile.h: remove unnecessary declaration Declaring struct pt_regs is unnecessary. On the one hand, there is no function using it; on the other hand, struct pt_regs has been declared in linux/kernel.h. Remove them. Link: https://lkml.kernel.org/r/20210401104834.1009157-1-wanjiabing@vivo.com Signed-off-by: Wan Jiabing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/profile.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/profile.h b/include/linux/profile.h index bad18ca43150..fd18ca96f557 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -15,7 +15,6 @@ #define KVM_PROFILING 4 struct proc_dir_entry; -struct pt_regs; struct notifier_block; #if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS) @@ -84,8 +83,6 @@ int task_handoff_unregister(struct notifier_block * n); int profile_event_register(enum profile_type, struct notifier_block * n); int profile_event_unregister(enum profile_type, struct notifier_block * n); -struct pt_regs; - #else #define prof_on 0 -- cgit v1.2.3 From 32c93976ac2ee7ecb4b09cc032efe1445d37bd7e Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 6 May 2021 18:02:39 -0700 Subject: kernel/cred.c: make init_groups static init_groups is declared in both cred.h and init_task.h, but it is not actually referenced anywhere outside of cred.c where it is defined. So make it static and remove the declarations. Link: https://lkml.kernel.org/r/20210310220102.2484201-1-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cred.h | 1 - include/linux/init_task.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index ac0e5f97d7d8..14971322e1a0 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -53,7 +53,6 @@ do { \ groups_free(group_info); \ } while (0) -extern struct group_info init_groups; #ifdef CONFIG_MULTIUSER extern struct group_info *groups_alloc(int); extern void groups_free(struct group_info *); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index b2412b4d4c20..40fc5813cf93 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -25,7 +25,6 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; extern struct nsproxy init_nsproxy; -extern struct group_info init_groups; extern struct cred init_cred; #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -- cgit v1.2.3 From 586eaebea5988302c5a8b018096dd6c6f4564940 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 6 May 2021 18:02:56 -0700 Subject: lib: extend the scope of small_const_nbits() macro find_bit would also benefit from small_const_nbits() optimizations. The detailed comment is provided by Rasmus Villemoes. Link: https://lkml.kernel.org/r/20210401003153.97325-6-yury.norov@gmail.com Suggested-by: Rasmus Villemoes Signed-off-by: Yury Norov Acked-by: Rasmus Villemoes Acked-by: Andy Shevchenko Cc: Alexey Klimov Cc: Arnd Bergmann Cc: David Sterba Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Jianpeng Ma Cc: Joe Perches Cc: John Paul Adrian Glaubitz Cc: Josh Poimboeuf Cc: Rich Felker Cc: Stefano Brivio Cc: Wei Yang Cc: Wolfram Sang Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 6cbcd9d9edd2..f57f4473bbe4 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -223,14 +223,6 @@ extern int bitmap_print_to_pagebuf(bool list, char *buf, #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) -/* - * The static inlines below do not handle constant nbits==0 correctly, - * so make such users (should any ever turn up) call the out-of-line - * versions. - */ -#define small_const_nbits(nbits) \ - (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0) - static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); -- cgit v1.2.3 From 2cc7b6a44ac21d31b398b03f4845c53152070416 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 6 May 2021 18:03:14 -0700 Subject: lib: add fast path for find_first_*_bit() and find_last_bit() Similarly to bitmap functions, users would benefit if we'll handle a case of small-size bitmaps that fit into a single word. While here, move the find_last_bit() declaration to bitops/find.h where other find_*_bit() functions sit. Link: https://lkml.kernel.org/r/20210401003153.97325-11-yury.norov@gmail.com Signed-off-by: Yury Norov Acked-by: Rasmus Villemoes Acked-by: Andy Shevchenko Cc: Alexey Klimov Cc: Arnd Bergmann Cc: David Sterba Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Jianpeng Ma Cc: Joe Perches Cc: John Paul Adrian Glaubitz Cc: Josh Poimboeuf Cc: Rich Felker Cc: Stefano Brivio Cc: Wei Yang Cc: Wolfram Sang Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index a5a48303b0f1..26bf15e6cd35 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -286,17 +286,5 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, }) #endif -#ifndef find_last_bit -/** - * find_last_bit - find the last set bit in a memory region - * @addr: The address to start the search at - * @size: The number of bits to search - * - * Returns the bit number of the last set bit, or size. - */ -extern unsigned long find_last_bit(const unsigned long *addr, - unsigned long size); -#endif - #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From e18baa7cc3598999317d6c2fe255756f6b3b7562 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 6 May 2021 18:03:37 -0700 Subject: lib: crc8: pointer to data block should be const crc8() does not change the data passed to it, so the pointer argument should be declared const. This avoids callers that receive const data having to cast it to a non-const pointer to call crc8(). Link: https://lkml.kernel.org/r/20210329122409.3291-1-rf@opensource.cirrus.com Signed-off-by: Richard Fitzgerald Acked-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crc8.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crc8.h b/include/linux/crc8.h index 13c8dabb0441..674045c59a04 100644 --- a/include/linux/crc8.h +++ b/include/linux/crc8.h @@ -96,6 +96,6 @@ void crc8_populate_msb(u8 table[CRC8_TABLE_SIZE], u8 polynomial); * Williams, Ross N., rossross.net * (see URL http://www.ross.net/crc/download/crc_v3.txt). */ -u8 crc8(const u8 table[CRC8_TABLE_SIZE], u8 *pdata, size_t nbytes, u8 crc); +u8 crc8(const u8 table[CRC8_TABLE_SIZE], const u8 *pdata, size_t nbytes, u8 crc); #endif /* __CRC8_H_ */ -- cgit v1.2.3 From e13d04ec45b07388d3c38c0e18a4d0aa4841b0c3 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 6 May 2021 18:03:52 -0700 Subject: include/linux/compat.h: remove unneeded declaration from COMPAT_SYSCALL_DEFINEx() compat_sys##name is declared twice, just one line below. With this removal SYSCALL_DEFINEx() (defined in ) and COMPAT_SYSCALL_DEFINEx() look symmetrical. Link: https://lkml.kernel.org/r/20210223114924.854794-1-masahiroy@kernel.org Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index f0d2dd35d408..cfac4ec9a7df 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -75,7 +75,6 @@ __diag_push(); \ __diag_ignore(GCC, 8, "-Wattribute-alias", \ "Type aliasing is used to sanitize syscall arguments");\ - asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_compat_sys##name)))); \ ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \ -- cgit v1.2.3 From 6f1f942cd5fbbe308f912fc84e3f10fbc8113a68 Mon Sep 17 00:00:00 2001 From: He Ying Date: Thu, 6 May 2021 18:04:57 -0700 Subject: smp: kernel/panic.c - silence warnings We found these warnings in kernel/panic.c by using sparse tool: warning: symbol 'panic_smp_self_stop' was not declared. warning: symbol 'nmi_panic_self_stop' was not declared. warning: symbol 'crash_smp_send_stop' was not declared. To avoid them, add declarations for these three functions in include/linux/smp.h. Link: https://lkml.kernel.org/r/20210316084150.75201-1-heying24@huawei.com Signed-off-by: He Ying Reported-by: Hulk Robot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/smp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 84a0b4828f66..669e35c03be2 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -55,6 +55,14 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, int smp_call_function_single_async(int cpu, call_single_data_t *csd); +/* + * Cpus stopping functions in panic. All have default weak definitions. + * Architecture-dependent code may override them. + */ +void panic_smp_self_stop(void); +void nmi_panic_self_stop(struct pt_regs *regs); +void crash_smp_send_stop(void); + /* * Call a function on all processors */ -- cgit v1.2.3 From 3d1c7fd97e4c5e54034231cd11319079dfaed60e Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Thu, 6 May 2021 18:05:00 -0700 Subject: delayacct: clear right task's flag after blkio completes When I was implementing a latency analyzer tool by using task->delays and other things, I found an issue in delayacct. The issue is it should clear the target's flag instead of current's in delayacct_blkio_end(). When I git blame delayacct, I found there're some similar issues we have fixed in delayacct_blkio_end(). - Commit c96f5471ce7d ("delayacct: Account blkio completion on the correct task") fixed the issue that it should account blkio completion on the target task instead of current. - Commit b512719f771a ("delayacct: fix crash in delayacct_blkio_end() after delayacct init failure") fixed the issue that it should check target task's delays instead of current task'. It seems that delayacct_blkio_{begin, end} are error prone. So I introduce a new paratmeter - the target task 'p' - to these helpers. After that change, the callsite will specifilly set the right task, which should make it less error prone. Link: https://lkml.kernel.org/r/20210414083720.24083-1-laoar.shao@gmail.com Signed-off-by: Yafang Shao Cc: Tejun Heo Cc: Josh Snyder Cc: Jens Axboe Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/delayacct.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 2d3bdcccf5eb..21651f946751 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -82,16 +82,16 @@ static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) return 0; } -static inline void delayacct_set_flag(int flag) +static inline void delayacct_set_flag(struct task_struct *p, int flag) { - if (current->delays) - current->delays->flags |= flag; + if (p->delays) + p->delays->flags |= flag; } -static inline void delayacct_clear_flag(int flag) +static inline void delayacct_clear_flag(struct task_struct *p, int flag) { - if (current->delays) - current->delays->flags &= ~flag; + if (p->delays) + p->delays->flags &= ~flag; } static inline void delayacct_tsk_init(struct task_struct *tsk) @@ -114,7 +114,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk) static inline void delayacct_blkio_start(void) { - delayacct_set_flag(DELAYACCT_PF_BLKIO); + delayacct_set_flag(current, DELAYACCT_PF_BLKIO); if (current->delays) __delayacct_blkio_start(); } @@ -123,7 +123,7 @@ static inline void delayacct_blkio_end(struct task_struct *p) { if (p->delays) __delayacct_blkio_end(p); - delayacct_clear_flag(DELAYACCT_PF_BLKIO); + delayacct_clear_flag(p, DELAYACCT_PF_BLKIO); } static inline int delayacct_add_tsk(struct taskstats *d, @@ -166,9 +166,9 @@ static inline void delayacct_thrashing_end(void) } #else -static inline void delayacct_set_flag(int flag) +static inline void delayacct_set_flag(struct task_struct *p, int flag) {} -static inline void delayacct_clear_flag(int flag) +static inline void delayacct_clear_flag(struct task_struct *p, int flag) {} static inline void delayacct_init(void) {} -- cgit v1.2.3 From a065c0faacb1e472cd4e048986407d1b177373a2 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 6 May 2021 18:05:39 -0700 Subject: kernel/async.c: remove async_unregister_domain() No callers in the tree. Link: https://lkml.kernel.org/r/20210309151723.1907838-2-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/async.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/async.h b/include/linux/async.h index 0a17cd27f348..cce4ad31e8fc 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -112,7 +112,6 @@ async_schedule_dev_domain(async_func_t func, struct device *dev, return async_schedule_node_domain(func, dev, dev_to_node(dev), domain); } -void async_unregister_domain(struct async_domain *domain); extern void async_synchronize_full(void); extern void async_synchronize_full_domain(struct async_domain *domain); extern void async_synchronize_cookie(async_cookie_t cookie); -- cgit v1.2.3 From e7cb072eb988e46295512617c39d004f9e1c26f8 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 6 May 2021 18:05:42 -0700 Subject: init/initramfs.c: do unpacking asynchronously Patch series "background initramfs unpacking, and CONFIG_MODPROBE_PATH", v3. These two patches are independent, but better-together. The second is a rather trivial patch that simply allows the developer to change "/sbin/modprobe" to something else - e.g. the empty string, so that all request_module() during early boot return -ENOENT early, without even spawning a usermode helper, needlessly synchronizing with the initramfs unpacking. The first patch delegates decompressing the initramfs to a worker thread, allowing do_initcalls() in main.c to proceed to the device_ and late_ initcalls without waiting for that decompression (and populating of rootfs) to finish. Obviously, some of those later calls may rely on the initramfs being available, so I've added synchronization points in the firmware loader and usermodehelper paths - there might be other places that would need this, but so far no one has been able to think of any places I have missed. There's not much to win if most of the functionality needed during boot is only available as modules. But systems with a custom-made .config and initramfs can boot faster, partly due to utilizing more than one cpu earlier, partly by avoiding known-futile modprobe calls (which would still trigger synchronization with the initramfs unpacking, thus eliminating most of the first benefit). This patch (of 2): Most of the boot process doesn't actually need anything from the initramfs, until of course PID1 is to be executed. So instead of doing the decompressing and populating of the initramfs synchronously in populate_rootfs() itself, push that off to a worker thread. This is primarily motivated by an embedded ppc target, where unpacking even the rather modest sized initramfs takes 0.6 seconds, which is long enough that the external watchdog becomes unhappy that it doesn't get attention soon enough. By doing the initramfs decompression in a worker thread, we get to do the device_initcalls and hence start petting the watchdog much sooner. Normal desktops might benefit as well. On my mostly stock Ubuntu kernel, my initramfs is a 26M xz-compressed blob, decompressing to around 126M. That takes almost two seconds: [ 0.201454] Trying to unpack rootfs image as initramfs... [ 1.976633] Freeing initrd memory: 29416K Before this patch, these lines occur consecutively in dmesg. With this patch, the timestamps on these two lines is roughly the same as above, but with 172 lines inbetween - so more than one cpu has been kept busy doing work that would otherwise only happen after the populate_rootfs() finished. Should one of the initcalls done after rootfs_initcall time (i.e., device_ and late_ initcalls) need something from the initramfs (say, a kernel module or a firmware blob), it will simply wait for the initramfs unpacking to be done before proceeding, which should in theory make this completely safe. But if some driver pokes around in the filesystem directly and not via one of the official kernel interfaces (i.e. request_firmware*(), call_usermodehelper*) that theory may not hold - also, I certainly might have missed a spot when sprinkling wait_for_initramfs(). So there is an escape hatch in the form of an initramfs_async= command line parameter. Link: https://lkml.kernel.org/r/20210313212528.2956377-1-linux@rasmusvillemoes.dk Link: https://lkml.kernel.org/r/20210313212528.2956377-2-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Reviewed-by: Luis Chamberlain Cc: Jessica Yu Cc: Borislav Petkov Cc: Jonathan Corbet Cc: Greg Kroah-Hartman Cc: Nick Desaulniers Cc: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/initrd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/initrd.h b/include/linux/initrd.h index 85c15717af34..1bbe9af48dc3 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -20,8 +20,10 @@ extern void free_initrd_mem(unsigned long, unsigned long); #ifdef CONFIG_BLK_DEV_INITRD extern void __init reserve_initrd_mem(void); +extern void wait_for_initramfs(void); #else static inline void __init reserve_initrd_mem(void) {} +static inline void wait_for_initramfs(void) {} #endif extern phys_addr_t phys_initrd_start; -- cgit v1.2.3 From cb152a1a95606aadd81df7a537dde9ef16da4b80 Mon Sep 17 00:00:00 2001 From: Shijie Luo Date: Thu, 6 May 2021 18:05:51 -0700 Subject: mm: fix some typos and code style problems fix some typos and code style problems in mm. gfp.h: s/MAXNODES/MAX_NUMNODES mmzone.h: s/then/than rmap.c: s/__vma_split()/__vma_adjust() swap.c: s/__mod_zone_page_stat/__mod_zone_page_state, s/is is/is swap_state.c: s/whoes/whose z3fold.c: code style problem fix in z3fold_unregister_migration zsmalloc.c: s/of/or, s/give/given Link: https://lkml.kernel.org/r/20210419083057.64820-1-luoshijie1@huawei.com Signed-off-by: Shijie Luo Signed-off-by: Miaohe Lin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- include/linux/mmzone.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 8a5f6c3d7dba..11da8af06704 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -490,7 +490,7 @@ static inline int gfp_zonelist(gfp_t flags) /* * We get the zone list from the current node and the gfp_mask. - * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones. + * This zone list contains a maximum of MAX_NUMNODES*MAX_NR_ZONES zones. * There are two zonelists per node, one for all zones with memory and * one containing just zones from the node the zonelist belongs to. * diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 917bd6c604d5..0d53eba1c383 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -55,7 +55,7 @@ enum migratetype { * pageblocks to MIGRATE_CMA which can be done by * __free_pageblock_cma() function. What is important though * is that a range of pageblocks must be aligned to - * MAX_ORDER_NR_PAGES should biggest page be bigger then + * MAX_ORDER_NR_PAGES should biggest page be bigger than * a single pageblock. */ MIGRATE_CMA, -- cgit v1.2.3 From bbcd53c960713507ae764bf81970651b5577b95a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 6 May 2021 18:05:55 -0700 Subject: drivers/char: remove /dev/kmem for good Patch series "drivers/char: remove /dev/kmem for good". Exploring /dev/kmem and /dev/mem in the context of memory hot(un)plug and memory ballooning, I started questioning the existence of /dev/kmem. Comparing it with the /proc/kcore implementation, it does not seem to be able to deal with things like a) Pages unmapped from the direct mapping (e.g., to be used by secretmem) -> kern_addr_valid(). virt_addr_valid() is not sufficient. b) Special cases like gart aperture memory that is not to be touched -> mem_pfn_is_ram() Unless I am missing something, it's at least broken in some cases and might fault/crash the machine. Looks like its existence has been questioned before in 2005 and 2010 [1], after ~11 additional years, it might make sense to revive the discussion. CONFIG_DEVKMEM is only enabled in a single defconfig (on purpose or by mistake?). All distributions disable it: in Ubuntu it has been disabled for more than 10 years, in Debian since 2.6.31, in Fedora at least starting with FC3, in RHEL starting with RHEL4, in SUSE starting from 15sp2, and OpenSUSE has it disabled as well. 1) /dev/kmem was popular for rootkits [2] before it got disabled basically everywhere. Ubuntu documents [3] "There is no modern user of /dev/kmem any more beyond attackers using it to load kernel rootkits.". RHEL documents in a BZ [5] "it served no practical purpose other than to serve as a potential security problem or to enable binary module drivers to access structures/functions they shouldn't be touching" 2) /proc/kcore is a decent interface to have a controlled way to read kernel memory for debugging puposes. (will need some extensions to deal with memory offlining/unplug, memory ballooning, and poisoned pages, though) 3) It might be useful for corner case debugging [1]. KDB/KGDB might be a better fit, especially, to write random memory; harder to shoot yourself into the foot. 4) "Kernel Memory Editor" [4] hasn't seen any updates since 2000 and seems to be incompatible with 64bit [1]. For educational purposes, /proc/kcore might be used to monitor value updates -- or older kernels can be used. 5) It's broken on arm64, and therefore, completely disabled there. Looks like it's essentially unused and has been replaced by better suited interfaces for individual tasks (/proc/kcore, KDB/KGDB). Let's just remove it. [1] https://lwn.net/Articles/147901/ [2] https://www.linuxjournal.com/article/10505 [3] https://wiki.ubuntu.com/Security/Features#A.2Fdev.2Fkmem_disabled [4] https://sourceforge.net/projects/kme/ [5] https://bugzilla.redhat.com/show_bug.cgi?id=154796 Link: https://lkml.kernel.org/r/20210324102351.6932-1-david@redhat.com Link: https://lkml.kernel.org/r/20210324102351.6932-2-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Kees Cook Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: "Alexander A. Klimov" Cc: Alexander Viro Cc: Alexandre Belloni Cc: Andrew Lunn Cc: Andrey Zhizhikin Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Brian Cain Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Chris Zankel Cc: Corentin Labbe Cc: "David S. Miller" Cc: "Eric W. Biederman" Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Greentime Hu Cc: Gregory Clement Cc: Heiko Carstens Cc: Helge Deller Cc: Hillf Danton Cc: huang ying Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: "James E.J. Bottomley" Cc: James Troup Cc: Jiaxun Yang Cc: Jonas Bonn Cc: Jonathan Corbet Cc: Kairui Song Cc: Krzysztof Kozlowski Cc: Kuninori Morimoto Cc: Liviu Dudau Cc: Lorenzo Pieralisi Cc: Luc Van Oostenryck Cc: Luis Chamberlain Cc: Matthew Wilcox Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Mike Rapoport Cc: Mikulas Patocka Cc: Minchan Kim Cc: Niklas Schnelle Cc: Oleksiy Avramchenko Cc: openrisc@lists.librecores.org Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: "Pavel Machek (CIP)" Cc: Pavel Machek Cc: "Peter Zijlstra (Intel)" Cc: Pierre Morel Cc: Randy Dunlap Cc: Richard Henderson Cc: Rich Felker Cc: Robert Richter Cc: Rob Herring Cc: Russell King Cc: Sam Ravnborg Cc: Sebastian Andrzej Siewior Cc: Sebastian Hesselbarth Cc: sparclinux@vger.kernel.org Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Steven Rostedt Cc: Sudeep Holla Cc: Theodore Dubois Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Viresh Kumar Cc: William Cohen Cc: Xiaoming Ni Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- include/linux/vmalloc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index acef282b97c6..c3c88fdb9b2a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -145,7 +145,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* Expect random access pattern */ #define FMODE_RANDOM ((__force fmode_t)0x1000) -/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ +/* File is huge (eg. /dev/mem): treat loff_t as unsigned */ #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) /* File is opened with O_PATH; almost nothing can be done with it */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 394d03cc0e92..f31ba59fb1ef 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -227,7 +227,7 @@ static inline void set_vm_flush_reset_perms(void *addr) } #endif -/* for /dev/kmem */ +/* for /proc/kcore */ extern long vread(char *buf, char *addr, unsigned long count); extern long vwrite(char *buf, char *addr, unsigned long count); -- cgit v1.2.3 From f7c8ce44ebb113b83135ada6e496db33d8a535e3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 6 May 2021 18:06:06 -0700 Subject: mm/vmalloc: remove vwrite() The last user (/dev/kmem) is gone. Let's drop it. Link: https://lkml.kernel.org/r/20210324102351.6932-4-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Michal Hocko Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Hillf Danton Cc: Matthew Wilcox Cc: Oleksiy Avramchenko Cc: Steven Rostedt Cc: Minchan Kim Cc: huang ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index f31ba59fb1ef..b6ff16393bf6 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -229,7 +229,6 @@ static inline void set_vm_flush_reset_perms(void *addr) /* for /proc/kcore */ extern long vread(char *buf, char *addr, unsigned long count); -extern long vwrite(char *buf, char *addr, unsigned long count); /* * Internals. Dont't use.. -- cgit v1.2.3 From 2eb70aab25dd9b0013a0035b416dbe0e81e6ad48 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Thu, 6 May 2021 18:06:24 -0700 Subject: include/linux/pgtable.h: few spelling fixes Few spelling fixes throughout the file. Link: https://lkml.kernel.org/r/20210318201404.6380-1-unixbhaskar@gmail.com Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pgtable.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 2194a9cd885c..46b13780c2c8 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -426,7 +426,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres /* * On some architectures hardware does not set page access bit when accessing - * memory page, it is responsibilty of software setting this bit. It brings + * memory page, it is responsibility of software setting this bit. It brings * out extra page fault penalty to track page access bit. For optimization page * access bit can be set during all page fault flow on these arches. * To be differentiate with macro pte_mkyoung, this macro is used on platforms @@ -519,7 +519,7 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); /* * This is an implementation of pmdp_establish() that is only suitable for an * architecture that doesn't have hardware dirty/accessed bits. In this case we - * can't race with CPU which sets these bits and non-atomic aproach is fine. + * can't race with CPU which sets these bits and non-atomic approach is fine. */ static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) @@ -852,7 +852,7 @@ static inline void __ptep_modify_prot_commit(struct vm_area_struct *vma, * updates, but to prevent any updates it may make from being lost. * * This does not protect against other software modifications of the - * pte; the appropriate pte lock must be held over the transation. + * pte; the appropriate pte lock must be held over the transaction. * * Note that this interface is intended to be batchable, meaning that * ptep_modify_prot_commit may not actually update the pte, but merely @@ -1281,13 +1281,13 @@ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) * * The complete check uses is_pmd_migration_entry() in linux/swapops.h * But using that requires moving current function and pmd_trans_unstable() - * to linux/swapops.h to resovle dependency, which is too much code move. + * to linux/swapops.h to resolve dependency, which is too much code move. * * !pmd_present() is equivalent to is_pmd_migration_entry() currently, * because !pmd_present() pages can only be under migration not swapped * out. * - * pmd_none() is preseved for future condition checks on pmd migration + * pmd_none() is preserved for future condition checks on pmd migration * entries and not confusing with this function name, although it is * redundant with !pmd_present(). */ -- cgit v1.2.3 From fa60ce2cb4506701c43bd4cf3ca23d970daf1b9c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 6 May 2021 18:06:44 -0700 Subject: treewide: remove editor modelines and cruft The section "19) Editor modelines and other cruft" in Documentation/process/coding-style.rst clearly says, "Do not include any of these in source files." I recently receive a patch to explicitly add a new one. Let's do treewide cleanups, otherwise some people follow the existing code and attempt to upstream their favoriate editor setups. It is even nicer if scripts/checkpatch.pl can check it. If we like to impose coding style in an editor-independent manner, I think editorconfig (patch [1]) is a saner solution. [1] https://lore.kernel.org/lkml/20200703073143.423557-1-danny@kdrag0n.dev/ Link: https://lkml.kernel.org/r/20210324054457.1477489-1-masahiroy@kernel.org Signed-off-by: Masahiro Yamada Acked-by: Geert Uytterhoeven Reviewed-by: Miguel Ojeda [auxdisplay] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/configfs.h | 4 +--- include/linux/genl_magic_func.h | 1 - include/linux/genl_magic_struct.h | 1 - 3 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 2e8c69b43c64..97cfd13bae51 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * +/* * configfs.h - definitions for the device driver filesystem * * Based on sysfs: diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 6cb82301d8e9..939b1a8f571b 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -404,4 +404,3 @@ s_fields \ /* }}}1 */ #endif /* GENL_MAGIC_FUNC_H */ -/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 35d21fddaf2d..f81d48987528 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -283,4 +283,3 @@ enum { \ /* }}}1 */ #endif /* GENL_MAGIC_STRUCT_H */ -/* vim: set foldmethod=marker nofoldenable : */ -- cgit v1.2.3 From f0953a1bbaca71e1ebbcb9864eb1b273156157ed Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 6 May 2021 18:06:47 -0700 Subject: mm: fix typos in comments Fix ~94 single-word typos in locking code comments, plus a few very obvious grammar mistakes. Link: https://lkml.kernel.org/r/20210322212624.GA1963421@gmail.com Link: https://lore.kernel.org/r/20210322205203.GB1959563@gmail.com Signed-off-by: Ingo Molnar Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Randy Dunlap Cc: Bhaskar Chowdhury Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- include/linux/vmalloc.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 76e27ebb28a3..322ec61d0da7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -106,7 +106,7 @@ extern int mmap_rnd_compat_bits __read_mostly; * embedding these tags into addresses that point to these memory regions, and * checking that the memory and the pointer tags match on memory accesses) * redefine this macro to strip tags from pointers. - * It's defined as noop for arcitectures that don't support memory tagging. + * It's defined as noop for architectures that don't support memory tagging. */ #ifndef untagged_addr #define untagged_addr(addr) (addr) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index b6ff16393bf6..4d668abb6391 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -33,7 +33,7 @@ struct notifier_block; /* in notifier.h */ * * If IS_ENABLED(CONFIG_KASAN_VMALLOC), VM_KASAN is set on a vm_struct after * shadow memory has been mapped. It's used to handle allocation errors so that - * we don't try to poision shadow on free if it was never allocated. + * we don't try to poison shadow on free if it was never allocated. * * Otherwise, VM_KASAN is set for kasan_module_alloc() allocations and used to * determine which allocations need the module shadow freed. @@ -43,7 +43,7 @@ struct notifier_block; /* in notifier.h */ /* * Maximum alignment for ioremap() regions. - * Can be overriden by arch-specific value. + * Can be overridden by arch-specific value. */ #ifndef IOREMAP_MAX_ORDER #define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */ -- cgit v1.2.3 From 0ab1438bad43d95877f848b7df551bd431680270 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 6 May 2021 02:45:15 +0900 Subject: linux/kconfig.h: replace IF_ENABLED() with PTR_IF() in is included from all the kernel-space source files, including C, assembly, linker scripts. It is intended to contain a minimal set of macros to evaluate CONFIG options. IF_ENABLED() is an intruder here because (x ? y : z) is C code, which should not be included from assembly files or linker scripts. Also, is no longer self-contained because NULL is defined in . Move IF_ENABLED() out to as PTR_IF(). PTF_IF() takes the general boolean expression instead of a CONFIG option so that it fits better in . Signed-off-by: Masahiro Yamada Reviewed-by: Kees Cook --- include/linux/kconfig.h | 6 ------ include/linux/kernel.h | 2 ++ 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h index 24a59cb06963..cc8fa109cfa3 100644 --- a/include/linux/kconfig.h +++ b/include/linux/kconfig.h @@ -70,10 +70,4 @@ */ #define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option)) -/* - * IF_ENABLED(CONFIG_FOO, ptr) evaluates to (ptr) if CONFIG_FOO is set to 'y' - * or 'm', NULL otherwise. - */ -#define IF_ENABLED(option, ptr) (IS_ENABLED(option) ? (ptr) : NULL) - #endif /* __LINUX_KCONFIG_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5b7ed6dc99ac..2f9d15410c93 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -48,6 +48,8 @@ */ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) +#define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL) + #define u64_to_user_ptr(x) ( \ { \ typecheck(u64, (x)); \ -- cgit v1.2.3 From 35c820e71565d1fa835b82499359218b219828ac Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 8 May 2021 21:49:48 -0600 Subject: Revert "bio: limit bio max size" This reverts commit cd2c7545ae1beac3b6aae033c7f31193b3255946. Alex reports that the commit causes corruption with LUKS on ext4. Revert it for now so that this can be investigated properly. Link: https://lore.kernel.org/linux-block/1620493841.bxdq8r5haw.none@localhost/ Reported-by: Alex Xu (Hello71) Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 +--- include/linux/blkdev.h | 2 -- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index f1a99f0a240c..a0b4cfdf62a4 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -106,8 +106,6 @@ static inline void *bio_data(struct bio *bio) return NULL; } -extern unsigned int bio_max_size(struct bio *bio); - /** * bio_full - check if the bio is full * @bio: bio to check @@ -121,7 +119,7 @@ static inline bool bio_full(struct bio *bio, unsigned len) if (bio->bi_vcnt >= bio->bi_max_vecs) return true; - if (bio->bi_iter.bi_size > bio_max_size(bio) - len) + if (bio->bi_iter.bi_size > UINT_MAX - len) return true; return false; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 40c7c4d87aa1..b91ba6207365 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -327,8 +327,6 @@ enum blk_bounce { }; struct queue_limits { - unsigned int bio_max_bytes; - enum blk_bounce bounce; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; -- cgit v1.2.3